tame/tamer/src/parse/lower.rs

// Lowering operation between parsers
//
//  Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
//  This file is part of TAME.
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.

//! IR lowering operation between [`Parser`]s.

use super::{
    state::ClosedParseState, FinalizeError, FinalizedParser, NoContext, Object,
    ParseError, ParseState, Parsed, ParsedResult, Parser, Token,
    TransitionResult,
};
use crate::diagnose::Diagnostic;
use std::{fmt::Display, iter, marker::PhantomData};

#[cfg(doc)]
use super::TokenStream;

/// An IR lowering operation that pipes the output of one [`Parser`] to the
///   input of another while propagating errors via a common
///   [`WidenedError`] type `E`.
///
/// This is produced by [`Lower`] methods.
pub struct LowerIter<'a, S, I, LS, E>
where
    S: ParseState,
    I: Iterator<Item = WidenedParsedResult<S, E>>,
    LS: ClosedParseState<Token = S::Object>,
    <S as ParseState>::Object: Token,
    E: WidenedError<S, LS>,
{
    /// A push [`Parser`].
    lower: Parser<LS, iter::Empty<LS::Token>>,

    /// Source tokens from higher-level [`Parser`].
    toks: &'a mut I,

    /// `S` is used for its associated types only.
    _phantom: PhantomData<S>,
}

impl<'a, S, I, LS, E> LowerIter<'a, S, I, LS, E>
where
    S: ParseState,
    I: Iterator<Item = WidenedParsedResult<S, E>>,
    LS: ClosedParseState<Token = S::Object>,
    <S as ParseState>::Object: Token,
    E: WidenedError<S, LS>,
{
    /// Consume inner parser and yield its context.
    #[inline]
    fn finalize(self) -> Result<FinalizedParser<LS>, FinalizeError> {
        self.lower.finalize().map_err(|(_, e)| e)
    }
}

/// Lowering operation from one [`ParseState`] to another.
///
/// Lowering is intended to be used between standalone [`ParseState`]s that
///   implement [`Default`].
///
/// It is expected that input tokens have already been widened into `E`
///   (a [`WidenedError`]) by a previous lowering operation,
///   or by an introduction parser.
pub trait Lower<S, LS, EW>
where
    S: ParseState,
    LS: ClosedParseState<Token = S::Object> + Default,
    <S as ParseState>::Object: Token,
    EW: WidenedError<S, LS>,
{
    /// Lower the IR produced by this [`Parser`] into another IR by piping
    ///   the output to a new parser defined by the [`ParseState`] `LS`.
    ///
    /// This parser consumes tokens `S::Token` and produces the IR
    ///   `S::Output`.
    /// If there is some other [`ParseState`] `LS` such that
    ///   `LS::Token == S::Output`
    ///     (that is—the output of this parser is the input to another),
    ///     then this method will wire the two together into a new iterator
    ///       that produces `LS::Output`.
    ///
    /// Visually, we have,
    ///   within the provided closure `f`,
    ///   a [`LowerIter`] that acts as this pipeline:
    ///
    /// ```text
    /// (S::Token) -> (S::Output == LS::Token) -> (LS::Output)
    /// ```
    ///
    /// The new iterator is a [`LowerIter`],
    ///   and scoped to the provided closure `f`.
    /// A push parser,
    ///   rather than pulling tokens from a [`TokenStream`],
    ///   has tokens pushed into it;
    ///     this parser is created automatically for you.
    ///
    /// All errors from the parser `LS` are widened to the error type `E`,
    ///   which is expected to be an aggregate error type
    ///     (such as a sum type)
    ///     shared by the already-widened `S`-derived input.
    /// Errors are propagated to the caller without lowering.
    #[inline]
    fn lower<U, E>(
        &mut self,
        f: impl FnOnce(&mut LowerIter<S, Self, LS, EW>) -> Result<U, E>,
    ) -> Result<U, E>
    where
        Self: Iterator<Item = WidenedParsedResult<S, EW>> + Sized,
        E: Diagnostic + From<FinalizeError>,
        <LS as ParseState>::Context: Default,
    {
        self.lower_with_context(<LS as ParseState>::Context::default(), f)
            .map(|(val, _ctx)| val)
    }

    /// Perform a lowering operation between two parsers where the context
    ///   is both received and returned.
    ///
    /// This allows state to be shared among parsers.
    ///
    /// See [`Lower::lower`] and [`ParseState::parse_with_context`] for more
    ///   information.
    #[inline]
    fn lower_with_context<U, E>(
        &mut self,
        ctx: impl Into<LS::PubContext>,
        f: impl FnOnce(&mut LowerIter<S, Self, LS, EW>) -> Result<U, E>,
    ) -> Result<(U, LS::PubContext), E>
    where
        Self: Iterator<Item = WidenedParsedResult<S, EW>> + Sized,
        E: Diagnostic + From<FinalizeError>,
    {
        let lower = LS::parse_with_context(iter::empty(), ctx.into());
        let mut iter = LowerIter {
            lower,
            toks: self,
            _phantom: PhantomData::default(),
        };
        let val = f(&mut iter)?;

        // TODO: Further propagate `FinalizedParser`
        iter.finalize()
            .map(FinalizedParser::into_context)
            .map(|ctx| (val, ctx))
            .map_err(E::from)
    }
}

impl<S, LS, E, I> Lower<S, LS, E> for I
where
    I: Iterator<Item = WidenedParsedResult<S, E>> + Sized,
    S: ParseState,
    LS: ClosedParseState<Token = S::Object> + Default,
    <S as ParseState>::Object: Token,
    E: WidenedError<S, LS>,
{
}

impl<'a, S, I, LS, E> Iterator for LowerIter<'a, S, I, LS, E>
where
    S: ParseState,
    I: Iterator<Item = WidenedParsedResult<S, E>>,
    LS: ClosedParseState<Token = S::Object>,
    <S as ParseState>::Object: Token,
    E: WidenedError<S, LS>,
{
    type Item = WidenedParsedResult<LS, E>;

    /// Pull a token through the higher-level [`Parser`],
    ///   push it to the lowering parser,
    ///   and yield the lowered result.
    ///
    /// Errors from `LS` are widened into `E`.
    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        // TODO: This is a maintenance burden with Parser's Iterator impl;
        //   they can easily get out of sync,
        //     as evidenced by the commit introducing this comment.
        let tok = self
            .lower
            .take_lookahead_tok()
            .map(Parsed::Object)
            .map(Ok)
            .or_else(|| self.toks.next())
            .or_else(|| self.lower.eof_tok().map(Parsed::Object).map(Ok));

        match tok {
            // We are done when no tokens remain.
            None => None,

            // Errors have already been widened by the previous lowering
            //   operation.
            Some(Err(e)) => Some(Err(e)),

            // Incomplete parses are simply propagated,
            //   since we have no work to do.
            Some(Ok(Parsed::Incomplete)) => Some(Ok(Parsed::Incomplete)),

            // If a token was successfully parsed,
            //   then we can do our job and lower it.
            // This utilizes the push parser `self.lower`.
            Some(Ok(Parsed::Object(obj))) => {
                Some(self.lower.feed_tok(obj).map_err(Into::into))
            }
        }
    }
}

/// A [`Diagnostic`] error type common to both `S` and `LS`.
///
/// This error type must be able to accommodate error variants from all
///   associated lowering operations.
/// The most obvious example of such an error type is an enum acting as a
///   sum type,
///     where the errors of each lowering operation are contained within
///     separate variants.
///
/// This creates a common type that can be propagated through the lowering
///   pipeline all the way to the calling terminal parser,
///     which may then decide what to do
///       (e.g. report errors and permit recovery,
///         or terminate at the first sign of trouble).
pub trait WidenedError<S: ParseState, LS: ParseState> = Diagnostic
    + From<ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>
    + From<ParseError<<LS as ParseState>::Token, <LS as ParseState>::Error>>;

/// Convenience trait for converting [`From`] a [`ParseError`] for the
///   provided [`ParseState`] `S`.
///
/// This allows specifying this type in terms of only the [`ParseState`]
///   that is almost certainly already utilized,
///     rather than having to either import more types or use the verbose
///     associated type.
pub trait FromParseError<S: ParseState> =
    From<ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>;

/// A [`ParsedResult`](super::ParsedResult) with a [`WidenedError`].
pub type WidenedParsedResult<S, E> =
    Result<Parsed<<S as ParseState>::Object>, E>;

/// A source of a lowering operation.
pub trait LowerSource<T: Token, O: Object, E: Diagnostic + PartialEq> =
    Iterator<Item = ParsedResult<ParsedObject<T, O, E>>>;

/// Make the provided [`Iterator`] `iter` usable in a `Lower` pipeline.
///
/// This will produce an iterator that shares the same output as a
///   [`Parser`],
///     so that it may participate in a lowering pipeline even if it is not
///     itself a [`ParseState`].
///
/// See [`ParsedObject`] for more information.
///
/// This is the dual of [`terminal`].
pub fn lowerable<T: Token, O: Object, E: Diagnostic + PartialEq>(
    iter: impl Iterator<Item = Result<O, E>>,
) -> impl LowerSource<T, O, E> {
    iter.map(|result| {
        result.map(Parsed::Object).map_err(ParseError::StateError)
    })
}

/// Indicate a terminal parser in a lowering pipeline,
///   and unwrap the parse API.
///
/// This is the dual of [`lowerable`],
///   responsible for breaking out of the pipeline for processing of the
///   final object stream.
///
/// [`Parsed::Incomplete`] is filtered.
/// Errors are lifted into `E` just as would be expected by [`Lower`].
pub fn terminal<
    S: ParseState,
    E: Diagnostic + From<ParseError<S::Token, S::Error>>,
>(
    iter: impl Iterator<Item = Result<Parsed<S::Object>, E>>,
) -> impl Iterator<Item = Result<S::Object, E>> {
    iter.filter_map(|result| match result {
        Ok(Parsed::Incomplete) => None,
        Ok(Parsed::Object(obj)) => Some(Ok(obj)),
        Err(e) => Some(Err(e)),
    })
}

/// Representation of a [`ParseState`] producing some type of [`Object`].
///
/// This is intended to be used not as a value,
///   but as a type for lowering operations.
/// This is useful when a parser does not make use of [`ParseState`] but
///   still wishes to participate in a lowering pipeline.
#[derive(Debug)]
pub struct ParsedObject<T: Token, O: Object, E: Diagnostic + PartialEq> {
    _phantom: PhantomData<(T, O, E)>,
}

impl<T: Token, O: Object, E: Diagnostic + PartialEq> PartialEq
    for ParsedObject<T, O, E>
{
    fn eq(&self, _other: &Self) -> bool {
        true
    }
}

impl<T: Token, O: Object, E: Diagnostic + PartialEq> Eq
    for ParsedObject<T, O, E>
{
}

impl<T: Token, O: Object, E: Diagnostic + PartialEq> Default
    for ParsedObject<T, O, E>
{
    fn default() -> Self {
        Self {
            _phantom: Default::default(),
        }
    }
}

impl<T: Token, O: Object, E: Diagnostic + PartialEq> Display
    for ParsedObject<T, O, E>
{
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "<generic data>")
    }
}

impl<T: Token, O: Object, E: Diagnostic + PartialEq> ParseState
    for ParsedObject<T, O, E>
{
    type Token = T;
    type Object = O;
    type Error = E;

    fn parse_token(
        self,
        _tok: Self::Token,
        _: NoContext,
    ) -> TransitionResult<Self> {
        unreachable!("ParsedObject must be used for type information only")
    }

    fn is_accepting(&self, _: &Self::Context) -> bool {
        unreachable!("ParsedObject must be used for type information only")
    }
}

// See `super::test` for more information on why there are so few tests
//   here.
// The robust types are quite effective at demanding coherency in spite of
//   complexity.
#[cfg(test)]
mod test {
    use super::super::{
        parser::test::{StubError, StubObject, StubParseState, StubToken},
        Transition,
    };
    use super::*;

    #[derive(Debug, PartialEq, Eq, Default)]
    enum StubEchoParseState {
        #[default]
        PreEof,
        PostEof,
    }

    impl Display for StubEchoParseState {
        fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
            write!(f, "StubEchoParseState")
        }
    }

    impl ParseState for StubEchoParseState {
        type Token = StubToken;
        type Object = StubToken;
        type Error = StubError;

        fn parse_token(
            self,
            tok: Self::Token,
            _: &mut Self::Context,
        ) -> TransitionResult<Self> {
            match tok {
                StubToken::Foo => Transition(Self::PostEof).ok(tok),
                _ => Transition(self).ok(tok),
            }
        }

        fn is_accepting(&self, _: &Self::Context) -> bool {
            true
        }

        fn eof_tok(&self, _ctx: &Self::Context) -> Option<Self::Token> {
            matches!(self, Self::PreEof).then_some(StubToken::Foo)
        }
    }

    // Similar to tests in parse::parser::test.
    #[test]
    fn can_emit_object_with_lookahead_and_eof_for_lower_iter() {
        let given = 27; // some value
        let toks = vec![StubToken::YieldWithLookahead(given)];

        impl From<FinalizeError> for StubError {
            fn from(_: FinalizeError) -> Self {
                unreachable!("not expected to be used by this test")
            }
        }

        Lower::<StubEchoParseState, StubParseState, _>::lower::<_, StubError>(
            &mut StubEchoParseState::parse(toks.into_iter()),
            |sut| {
                // We have a single token,
                //   and this consumes it,
                //   but it should introduce a lookahead token.
                assert_eq!(
                    sut.next(),
                    Some(Ok(Parsed::Object(StubObject::FromYield(given))))
                );

                // The token of lookahead should still be available to the parser,
                //   and this should consume it.
                assert_eq!(
                    sut.next(),
                    Some(Ok(Parsed::Object(StubObject::FromLookahead(given)))),
                    "lookahead token did not take effect"
                );

                // Prior to end,
                //   we give parsers the opportunity to emit an EOF token.
                assert_eq!(
                    sut.next(),
                    Some(Ok(Parsed::Object(StubObject::FromFoo))),
                    "EOF token was note emitted",
                );

                // And now this should be the end,
                //   provided that the lookahead token was actually consumed and not
                //   copied and retained.
                assert_eq!(
                    sut.next(),
                    None,
                    "expected end of both input stream and lookahead"
                );

                Ok(Ok::<(), StubError>(()))
            },
        )
        .unwrap()
        .unwrap();
    }
}