tame/tamer/src/parse.rs

// Basic streaming parsing framework
//
//  Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
//  This file is part of TAME.
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.

//! Basic streaming parser framework for lowering operations.
//!
//! _TODO: Some proper docs and examples!_

use crate::diagnose::{Annotate, AnnotatedSpan, Diagnostic};
use crate::iter::{TripIter, TrippableIterator};
use crate::span::{Span, UNKNOWN_SPAN};
use std::fmt::Debug;
use std::hint::unreachable_unchecked;
use std::iter::{self, Empty};
use std::mem::take;
use std::ops::{ControlFlow, Deref, DerefMut, FromResidual, Try};
use std::{convert::Infallible, error::Error, fmt::Display};

/// Result of applying a [`Token`] to a [`ParseState`],
///   with any error having been wrapped in a [`ParseError`].
pub type ParsedResult<S> = ParseResult<S, Parsed<<S as ParseState>::Object>>;

/// Result of some non-parsing operation on a [`Parser`],
///   with any error having been wrapped in a [`ParseError`].
pub type ParseResult<S, T> =
    Result<T, ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>;

/// A single datum from a streaming IR with an associated [`Span`].
///
/// A token may be a lexeme with associated data,
///   or a more structured object having been lowered from other IRs.
pub trait Token: Display + Debug + PartialEq {
    /// Retrieve the [`Span`] representing the source location of the token.
    fn span(&self) -> Span;
}

impl<T: Token> From<T> for Span {
    fn from(tok: T) -> Self {
        tok.span()
    }
}

/// An IR object produced by a lowering operation on one or more [`Token`]s.
///
/// Note that an [`Object`] may also be a [`Token`] if it will be in turn
///   fed to another [`Parser`] for lowering.
///
/// This trait exists to disambiguate an otherwise unbounded type for
///   [`From`] conversions,
///     used in the [`Transition`] API to provide greater flexibility.
pub trait Object: Debug + PartialEq {}

/// An infallible [`Token`] stream.
///
/// If the token stream originates from an operation that could potentially
///   fail and ought to be propagated,
///     use [`TokenResultStream`].
///
/// The name "stream" in place of "iterator" is intended to convey that this
///   type is expected to be processed in real-time as a stream,
///     not read into memory.
pub trait TokenStream<T: Token> = Iterator<Item = T>;

/// A [`Token`] stream that may encounter errors during parsing.
///
/// If the stream cannot fail,
///   consider using [`TokenStream`].
pub trait TokenResultStream<T: Token, E: Error> = Iterator<Item = Result<T, E>>;

/// A [`ParseState`] capable of being automatically stitched together with
///   a parent [`ParseState`] `SP` to create a composite parser.
///
/// Conceptually,
///   this can be visualized as combining the state machines of multiple
///   parsers into one larger state machine.
///
/// The term _state stitching_ refers to a particular pattern able to be
///   performed automatically by this parsing framework;
///     it is not necessary for parser composition,
///       provided that you perform the necessary wiring yourself in absence
///       of state stitching.
pub trait StitchableParseState<SP: ParseState> = ParseState
where
    SP: ParseState<Token = <Self as ParseState>::Token>,
    <Self as ParseState>::Object: Into<<SP as ParseState>::Object>,
    <Self as ParseState>::Error: Into<<SP as ParseState>::Error>;

/// A parsing automaton.
///
/// These states are utilized by a [`Parser`].
///
/// A [`ParseState`] is also responsible for storing data about the
///   accepted input,
///     and handling appropriate type conversions into the final type.
/// That is---an
///   automaton may store metadata that is subsequently emitted once an
///   accepting state has been reached.
/// Whatever the underlying automaton,
///   a `(state, token, context)` triple must uniquely determine the next
///   parser action.
pub trait ParseState: Default + PartialEq + Eq + Debug {
    /// Input tokens to the parser.
    type Token: Token;

    /// Objects produced by a parser utilizing these states.
    type Object: Object;

    /// Errors specific to this set of states.
    type Error: Debug + Diagnostic + PartialEq;

    /// Object provided to parser alongside each token.
    ///
    /// This may be used in situations where Rust/LLVM are unable to
    ///   optimize away moves of interior data associated with the
    ///   otherwise-immutable [`ParseState`].
    type Context: Debug = EmptyContext;

    /// Construct a parser.
    ///
    /// Whether this method is helpful or provides any clarity depends on
    ///   the context and the types that are able to be inferred.
    fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I>
    where
        Self::Context: Default,
    {
        Parser::from(toks)
    }

    /// Construct a parser with a non-default [`ParseState::Context`].
    ///
    /// This is useful in two ways:
    ///
    ///   1. To allow for parsing using a context that does not implement
    ///        [`Default`],
    ///          or whose default is not sufficient; and
    ///   2. To re-use a context from a previous [`Parser`].
    ///
    /// If neither of these apply to your situation,
    ///   consider [`ParseState::parse`] instead.
    ///
    /// To retrieve a context from a parser for re-use,
    ///   see [`Parser::finalize`].
    fn parse_with_context<I: TokenStream<Self::Token>>(
        toks: I,
        ctx: Self::Context,
    ) -> Parser<Self, I> {
        Parser::from((toks, ctx))
    }

    /// Parse a single [`Token`] and optionally perform a state transition.
    ///
    /// The current state is represented by `self`.
    /// The result of a parsing operation is a state transition with
    ///   associated [`ParseStatus`] data.
    ///
    /// Note that `self` is owned,
    ///   for a couple primary reasons:
    ///
    ///   1. This forces the parser to explicitly consider and document all
    ///        state transitions,
    ///          rather than potentially missing unintended behavior through
    ///          implicit behavior; and
    ///   2. It allows for more natural functional composition of state,
    ///        which in turn makes it easier to compose parsers
    ///          (which conceptually involves stitching together state
    ///            machines).
    ///
    /// Since a [`ParseState`] produces a new version of itself with each
    ///   invocation,
    ///     it is functionally pure.
    /// Generally,
    ///   Rust/LLVM are able to optimize moves into direct assignments.
    /// However,
    ///   there are circumstances where this is _not_ the case,
    ///   in which case [`Context`] can be used to provide a mutable context
    ///     owned by the caller (e.g. [`Parser`]) to store additional
    ///     information that is not subject to Rust's move semantics.
    /// If this is not necessary,
    ///   see [`NoContext`].
    fn parse_token(
        self,
        tok: Self::Token,
        ctx: &mut Self::Context,
    ) -> TransitionResult<Self>;

    /// Whether the current state represents an accepting state.
    ///
    /// An accepting state represents a valid state to stop parsing.
    /// If parsing stops at a state that is _not_ accepting,
    ///   then the [`TokenStream`] has ended unexpectedly and should produce
    ///   a [`ParseError::UnexpectedEof`].
    ///
    /// It makes sense for there to be exist multiple accepting states for a
    ///   parser.
    /// For example:
    ///   A parser that parses a list of attributes may be used to parse one
    ///   or more attributes,
    ///     or the entire list of attributes.
    ///   It is acceptable to attempt to parse just one of those attributes,
    ///     or it is acceptable to parse all the way until the end.
    fn is_accepting(&self) -> bool;

    /// Delegate parsing from a compatible, stitched [`ParseState`]~`SP`.
    ///
    /// This helps to combine two state machines that speak the same input
    ///   language
    ///   (share the same [`Self::Token`]),
    ///     handling the boilerplate of delegating [`Self::Token`] from a
    ///     parent state~`SP` to `Self`.
    ///
    /// Token delegation happens after [`Self`] has been entered from a
    ///   parent [`ParseState`] context~`SP`,
    ///     so stitching the start and accepting states must happen elsewhere
    ///     (for now).
    ///
    /// This assumes that no lookahead token from [`ParseStatus::Dead`] will
    ///   need to be handled by the parent state~`SP`.
    /// To handle a token of lookahead,
    ///   use [`Self::delegate_lookahead`] instead.
    ///
    /// _TODO: More documentation once this is finalized._
    fn delegate<SP, C>(
        self,
        mut context: C,
        tok: <Self as ParseState>::Token,
        into: impl FnOnce(Self) -> SP,
    ) -> TransitionResult<SP>
    where
        Self: StitchableParseState<SP>,
        C: AsMut<<Self as ParseState>::Context>,
    {
        use ParseStatus::{Dead, Incomplete, Object as Obj};

        let (Transition(newst), result) =
            self.parse_token(tok, context.as_mut()).into();

        // This does not use `delegate_lookahead` so that we can have
        //   `into: impl FnOnce` instead of `Fn`.
        Transition(into(newst)).result(match result {
            Ok(Incomplete) => Ok(Incomplete),
            Ok(Obj(obj)) => Ok(Obj(obj.into())),
            Ok(Dead(tok)) => Ok(Dead(tok)),
            Err(e) => Err(e.into()),
        })
    }

    /// Delegate parsing from a compatible, stitched [`ParseState`]~`SP` with
    ///   support for a lookahead token.
    ///
    /// This does the same thing as [`Self::delegate`],
    ///   but allows for the handling of a lookahead token from [`Self`]
    ///   rather than simply proxying [`ParseStatus::Dead`].
    ///
    /// _TODO: More documentation once this is finalized._
    fn delegate_lookahead<SP, C>(
        self,
        mut context: C,
        tok: <Self as ParseState>::Token,
        into: impl FnOnce(Self) -> SP,
    ) -> ControlFlow<TransitionResult<SP>, (Self, <Self as ParseState>::Token, C)>
    where
        Self: StitchableParseState<SP>,
        C: AsMut<<Self as ParseState>::Context>,
    {
        use ControlFlow::*;
        use ParseStatus::{Dead, Incomplete, Object as Obj};

        // NB: Rust/LLVM are generally able to elide these moves into direct
        //   assignments,
        //     but sometimes this does not work
        //       (e.g. XIRF's use of `ArrayVec`).
        // If your [`ParseState`] has a lot of `memcpy`s or other
        //   performance issues,
        //     move heavy objects into `context`.
        let (Transition(newst), result) =
            self.parse_token(tok, context.as_mut()).into();

        match result {
            Ok(Incomplete) => Break(Transition(into(newst)).incomplete()),
            Ok(Obj(obj)) => Break(Transition(into(newst)).ok(obj.into())),
            Ok(Dead(tok)) => Continue((newst, tok, context)),
            Err(e) => Break(Transition(into(newst)).err(e)),
        }
    }
}

/// Empty [`Context`] for [`ParseState`]s with pure functional
///   implementations with no mutable state.
///
/// Using this value means that a [`ParseState`] does not require a
///   context.
/// All [`Context`]s implement [`AsMut<EmptyContext>`](AsMut),
///   and so all pure [`ParseState`]s have contexts compatible with every
///   other parser for composition
///     (provided that the other invariants in [`StitchableParseState`] are
///       met).
///
/// This can be clearly represented in function signatures using
///   [`EmptyContext`].
#[derive(Debug, PartialEq, Eq, Default)]
pub struct EmptyContext;

impl AsMut<EmptyContext> for EmptyContext {
    fn as_mut(&mut self) -> &mut EmptyContext {
        self
    }
}

/// A [`ParseState`] does not require any mutable [`Context`].
///
/// A [`ParseState`] using this context is pure
///   (has no mutable state),
///     returning a new version of itself on each state change.
///
/// This type is intended to be self-documenting:
///   `_: EmptyContext` is nicer to readers than `_: &mut EmptyContext`.
///
/// See [`EmptyContext`] for more information.
pub type NoContext<'a> = &'a mut EmptyContext;

/// Mutable context for [`ParseState`].
///
/// [`ParseState`]s are immutable and pure---they
///   are invoked via [`ParseState::parse_token`] and return a new version
///   of themselves representing their new state.
/// Rust/LLVM are generally able to elide intermediate values and moves,
///   optimizing these parsers away into assignments.
///
/// However,
///   there are circumstances where moves may not be elided and may retain
///   their `memcpy` equivalents.
/// To work around this,
///   [`ParseState::parse_token`] accepts a mutable [`Context`] reference
///   which is held by the parent [`Parser`],
///     which can be mutated in-place without worrying about Rust's move
///     semantics.
///
/// Plainly: you should only use this if you have to.
/// This was added because certain parsers may be invoked millions of times
///   for each individual token in systems with many source packages,
///     which may otherwise result in millions of `memcpy`s.
///
/// When composing two [`ParseState`]s `A<B, C>`,
///   a [`Context<B, C>`](Context) must be contravariant over `B` and~`C`.
/// Concretely,
///   this means that [`AsMut<B::Context>`](AsMut) and
///   [`AsMut<C::Context>`](AsMut) must be implemented for `A::Context`.
/// This almost certainly means that `A::Context` is a product type.
/// Consequently,
///   a single [`Parser`] is able to hold a composite [`Context`] in a
///   single memory location.
///
/// [`Context<T>`](Context) implements [`Deref<T>`](Deref) for convenience.
///
/// If your [`ParseState`] does not require a mutable [`Context`],
///   see [`NoContext`].
#[derive(Debug, Default)]
pub struct Context<T: Debug + Default>(T, EmptyContext);

impl<T: Debug + Default> AsMut<EmptyContext> for Context<T> {
    fn as_mut(&mut self) -> &mut EmptyContext {
        &mut self.1
    }
}

impl<T: Debug + Default> Deref for Context<T> {
    type Target = T;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl<T: Debug + Default> DerefMut for Context<T> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}

impl<T: Debug + Default> From<T> for Context<T> {
    fn from(x: T) -> Self {
        Context(x, EmptyContext)
    }
}

/// Result of applying a [`Token`] to a [`ParseState`].
///
/// This is used by [`ParseState::parse_token`];
///   see that function for rationale.
pub type ParseStateResult<S> = Result<ParseStatus<S>, <S as ParseState>::Error>;

/// A state transition with associated data.
///
/// Conceptually,
///   imagine the act of a state transition producing data.
/// See [`Transition`] for convenience methods for producing this tuple.
#[derive(Debug, PartialEq)]
pub struct TransitionResult<S: ParseState>(
    pub Transition<S>,
    pub ParseStateResult<S>,
);

/// Denotes a state transition.
///
/// This newtype was created to produce clear, self-documenting code;
///   parsers can get confusing to read with all of the types involved,
///     so this provides a mental synchronization point.
///
/// This also provides some convenience methods to help remote boilerplate
///   and further improve code clarity.
#[derive(Debug, PartialEq, Eq)]
pub struct Transition<S: ParseState>(pub S);

impl<S: ParseState> Transition<S> {
    /// A state transition with corresponding data.
    ///
    /// This allows [`ParseState::parse_token`] to emit a parsed object and
    ///   corresponds to [`ParseStatus::Object`].
    pub fn ok<T>(self, obj: T) -> TransitionResult<S>
    where
        T: Into<ParseStatus<S>>,
    {
        TransitionResult(self, Ok(obj.into()))
    }

    /// A transition with corresponding error.
    ///
    /// This indicates a parsing failure.
    /// The state ought to be suitable for error recovery.
    pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S> {
        TransitionResult(self, Err(err.into()))
    }

    /// A state transition with corresponding [`Result`].
    ///
    /// This translates the provided [`Result`] in a manner equivalent to
    ///   [`Transition::ok`] and [`Transition::err`].
    pub fn result<T, E>(self, result: Result<T, E>) -> TransitionResult<S>
    where
        T: Into<ParseStatus<S>>,
        E: Into<S::Error>,
    {
        TransitionResult(self, result.map(Into::into).map_err(Into::into))
    }

    /// A state transition indicating that more data is needed before an
    ///   object can be emitted.
    ///
    /// This corresponds to [`ParseStatus::Incomplete`].
    pub fn incomplete(self) -> TransitionResult<S> {
        TransitionResult(self, Ok(ParseStatus::Incomplete))
    }

    /// A dead state transition.
    ///
    /// This corresponds to [`ParseStatus::Dead`],
    ///   and a calling parser should use the provided [`Token`] as
    ///   lookahead.
    pub fn dead(self, tok: S::Token) -> TransitionResult<S> {
        TransitionResult(self, Ok(ParseStatus::Dead(tok)))
    }
}

impl<S: ParseState> Into<(Transition<S>, ParseStateResult<S>)>
    for TransitionResult<S>
{
    fn into(self) -> (Transition<S>, ParseStateResult<S>) {
        (self.0, self.1)
    }
}

impl<S: ParseState> Try for TransitionResult<S> {
    type Output = (Transition<S>, ParseStateResult<S>);
    type Residual = (Transition<S>, ParseStateResult<S>);

    fn from_output(output: Self::Output) -> Self {
        match output {
            (st, result) => Self(st, result),
        }
    }

    fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
        match self.into() {
            (st, Ok(x)) => ControlFlow::Continue((st, Ok(x))),
            (st, Err(e)) => ControlFlow::Break((st, Err(e))),
        }
    }
}

impl<S: ParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
    for TransitionResult<S>
{
    fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
        match residual {
            (st, result) => Self(st, result),
        }
    }
}

impl<S: ParseState> FromResidual<Result<Infallible, TransitionResult<S>>>
    for TransitionResult<S>
{
    fn from_residual(
        residual: Result<Infallible, TransitionResult<S>>,
    ) -> Self {
        match residual {
            Err(e) => e,
            // SAFETY: This match arm doesn't seem to be required in
            //   core::result::Result's FromResidual implementation,
            //     but as of 1.61 nightly it is here.
            // Since this is Infallable,
            //   it cannot occur.
            Ok(_) => unsafe { unreachable_unchecked() },
        }
    }
}

impl<S: ParseState> FromResidual<ControlFlow<TransitionResult<S>, Infallible>>
    for TransitionResult<S>
{
    fn from_residual(
        residual: ControlFlow<TransitionResult<S>, Infallible>,
    ) -> Self {
        match residual {
            ControlFlow::Break(result) => result,
            // SAFETY: Infallible, so cannot hit.
            ControlFlow::Continue(_) => unsafe { unreachable_unchecked() },
        }
    }
}

/// An object able to be used as data for a state [`Transition`].
///
/// This flips the usual order of things:
///   rather than using a method of [`Transition`] to provide data,
///     this starts with the data and produces a transition from it.
/// This is sometimes necessary to satisfy ownership/borrowing rules.
///
/// This trait simply removes boilerplate associated with storing
///   intermediate values and translating into the resulting type.
pub trait Transitionable<S: ParseState> {
    /// Perform a state transition to `S` using [`Self`] as the associated
    ///   data.
    ///
    /// This may be necessary to satisfy ownership/borrowing rules when
    ///   state data from `S` is used to compute [`Self`].
    fn transition(self, to: S) -> TransitionResult<S>;
}

impl<S, E> Transitionable<S> for Result<ParseStatus<S>, E>
where
    S: ParseState,
    <S as ParseState>::Error: From<E>,
{
    fn transition(self, to: S) -> TransitionResult<S> {
        Transition(to).result(self)
    }
}

impl<S, E> Transitionable<S> for Result<(), E>
where
    S: ParseState,
    <S as ParseState>::Error: From<E>,
{
    fn transition(self, to: S) -> TransitionResult<S> {
        Transition(to).result(self.map(|_| ParseStatus::Incomplete))
    }
}

/// A streaming parser defined by a [`ParseState`] with exclusive
///   mutable access to an underlying [`TokenStream`].
///
/// This parser handles operations that are common among all types of
///   parsers,
///     such that specialized parsers need only implement logic that is
///     unique to their operation.
/// This also simplifies combinators,
///   since there is more uniformity among distinct parser types.
///
/// After you have finished with a parser,
///   if you have not consumed the entire iterator,
///   call [`finalize`](Parser::finalize) to ensure that parsing has
///     completed in an accepting state.
#[derive(Debug, PartialEq, Eq)]
pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
    toks: I,
    state: S,
    last_span: Span,
    ctx: S::Context,
}

impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
    /// Indicate that no further parsing will take place using this parser,
    ///   retrieve any final aggregate state (the context),
    ///   and [`drop`] it.
    ///
    /// Invoking the method is equivalent to stating that the stream has
    ///   ended,
    ///     since the parser will have no later opportunity to continue
    ///     parsing.
    /// Consequently,
    ///   the caller should expect [`ParseError::UnexpectedEof`] if the
    ///   parser is not in an accepting state.
    ///
    /// To re-use the context returned by this method,
    ///   see [`ParseState::parse_with_context`].
    /// Note that whether the context is permitted to be reused,
    ///   or is useful independently to the caller,
    ///   is a decision made by the [`ParseState`].
    pub fn finalize(
        self,
    ) -> Result<S::Context, (Self, ParseError<S::Token, S::Error>)> {
        match self.assert_accepting() {
            Ok(()) => Ok(self.ctx),
            Err(err) => Err((self, err)),
        }
    }

    /// Return [`Ok`] if the parser is in an accepting state,
    ///   otherwise [`Err`] with [`ParseError::UnexpectedEof`].
    ///
    /// See [`finalize`](Self::finalize) for the public-facing method.
    fn assert_accepting(&self) -> Result<(), ParseError<S::Token, S::Error>> {
        if self.state.is_accepting() {
            Ok(())
        } else {
            let endpoints = self.last_span.endpoints();
            Err(ParseError::UnexpectedEof(
                endpoints.1.unwrap_or(endpoints.0),
            ))
        }
    }

    /// Feed an input token to the parser.
    ///
    /// This _pushes_ data into the parser,
    ///   rather than the typical pull system used by [`Parser`]'s
    ///   [`Iterator`] implementation.
    /// The pull system also uses this method to provided data to the
    ///   parser.
    ///
    /// This method is intentionally private,
    ///   since push parsers are currently supported only internally.
    /// The only thing preventing this being public is formalization and a
    ///   commitment to maintain it.
    fn feed_tok(&mut self, tok: S::Token) -> ParsedResult<S> {
        // Store the most recently encountered Span for error
        //   reporting in case we encounter an EOF.
        self.last_span = tok.span();

        let result;
        TransitionResult(Transition(self.state), result) =
            take(&mut self.state).parse_token(tok, &mut self.ctx);

        use ParseStatus::*;
        match result {
            // Nothing handled this dead state,
            //   and we cannot discard a lookahead token,
            //   so we have no choice but to produce an error.
            Ok(Dead(invalid)) => Err(ParseError::UnexpectedToken(invalid)),

            Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
            Err(e) => Err(e.into()),
        }
    }

    /// Lower the IR produced by this [`Parser`] into another IR by piping
    ///   the output to a new parser defined by the [`ParseState`] `LS`.
    ///
    /// This parser consumes tokens `S::Token` and produces the IR
    ///   `S::Output`.
    /// If there is some other [`ParseState`] `LS` such that
    ///   `LS::Token == S::Output`
    ///     (that is—the output of this parser is the input to another),
    ///     then this method will wire the two together into a new iterator
    ///       that produces `LS::Output`.
    ///
    /// Visually, we have,
    ///   within the provided closure `f`,
    ///   a [`LowerIter`] that acts as this pipeline:
    ///
    /// ```text
    /// (S::Token) -> (S::Output == LS::Token) -> (LS::Output)
    /// ```
    ///
    /// The new iterator is a [`LowerIter`],
    ///   and scoped to the provided closure `f`.
    /// The outer [`Result`] of `Self`'s [`ParsedResult`] is stripped by
    ///   a [`TripIter`] before being provided as input to a new push
    ///   [`Parser`] utilizing `LS`.
    /// A push parser,
    ///   rather than pulling tokens from a [`TokenStream`],
    ///   has tokens pushed into it;
    ///     this parser is created automatically for you.
    ///
    /// _TODO_: There's no way to access the inner parser for error recovery
    ///   after tripping the [`TripIter`].
    /// Consequently,
    ///   this API (likely the return type) will change.
    #[inline]
    pub fn lower_while_ok<LS, U>(
        &mut self,
        f: impl FnOnce(&mut LowerIter<S, I, LS>) -> U,
    ) -> Result<U, ParseError<S::Token, S::Error>>
    where
        LS: ParseState<Token = S::Object>,
        <S as ParseState>::Object: Token,
        <LS as ParseState>::Context: Default,
    {
        self.while_ok(|toks| {
            // TODO: This parser is not accessible after error recovery!
            let lower = LS::parse(iter::empty());
            f(&mut LowerIter { lower, toks })
        })
    }
}

/// An IR lowering operation that pipes the output of one [`Parser`] to the
///   input of another.
///
/// This is produced by [`Parser::lower_while_ok`].
pub struct LowerIter<'a, 'b, S, I, LS>
where
    S: ParseState,
    I: TokenStream<S::Token>,
    LS: ParseState<Token = S::Object>,
    <S as ParseState>::Object: Token,
{
    /// A push [`Parser`].
    lower: Parser<LS, Empty<LS::Token>>,

    /// Source tokens from higher-level [`Parser`],
    ///   with the outer [`Result`] having been stripped by a [`TripIter`].
    toks: &'a mut TripIter<
        'b,
        Parser<S, I>,
        Parsed<S::Object>,
        ParseError<S::Token, S::Error>,
    >,
}

impl<'a, 'b, S, I, LS> Iterator for LowerIter<'a, 'b, S, I, LS>
where
    S: ParseState,
    I: TokenStream<S::Token>,
    LS: ParseState<Token = S::Object>,
    <S as ParseState>::Object: Token,
{
    type Item = ParsedResult<LS>;

    /// Pull a token through the higher-level [`Parser`],
    ///   push it to the lowering parser,
    ///   and yield the resulting [`ParseResult`].
    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        match self.toks.next() {
            None => None,
            Some(Parsed::Incomplete) => Some(Ok(Parsed::Incomplete)),
            Some(Parsed::Object(obj)) => Some(self.lower.feed_tok(obj)),
        }
    }
}

impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
    type Item = ParsedResult<S>;

    /// Parse a single [`Token`] according to the current
    ///   [`ParseState`],
    ///     if available.
    ///
    /// If the underlying [`TokenStream`] yields [`None`],
    ///   then the [`ParseState`] must be in an accepting state;
    ///     otherwise, [`ParseError::UnexpectedEof`] will occur.
    ///
    /// This is intended to be invoked by [`Iterator::next`].
    /// Accepting a token rather than the [`TokenStream`] allows the caller
    ///   to inspect the token first
    ///     (e.g. to store a copy of the [`Span`][crate::span::Span]).
    #[inline]
    fn next(&mut self) -> Option<Self::Item> {
        let otok = self.toks.next();

        match otok {
            None => match self.assert_accepting() {
                Ok(()) => None,
                Err(e) => Some(Err(e)),
            },

            Some(tok) => Some(self.feed_tok(tok)),
        }
    }
}

/// Common parsing errors produced by [`Parser`].
///
/// These errors are common enough that they are handled in a common way,
///   such that individual parsers needn't check for these situations
///   themselves.
///
/// Having a common type also allows combinators to handle error types in a
///   consistent way when composing parsers.
///
/// Parsers may return their own unique errors via the
///   [`StateError`][ParseError::StateError] variant.
#[derive(Debug, PartialEq)]
pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
    /// Token stream ended unexpectedly.
    ///
    /// This error means that the parser was expecting more input before
    ///   reaching an accepting state.
    /// This could represent a truncated file,
    ///   a malformed stream,
    ///   or maybe just a user that's not done typing yet
    ///     (e.g. in the case of an LSP implementation).
    ///
    /// If no span is available,
    ///   then parsing has not even had the chance to begin.
    /// If this parser follows another,
    ///   then the combinator ought to substitute a missing span with
    ///   whatever span preceded this invocation.
    UnexpectedEof(Span),

    /// The parser reached an unhandled dead state.
    ///
    /// Once a parser returns [`ParseStatus::Dead`],
    ///   a parent context must use that provided token as a lookahead.
    /// If that does not occur,
    ///   [`Parser`] produces this error.
    ///
    /// In the future,
    ///   it may be desirable to be able to query [`ParseState`] for what
    ///   tokens are acceptable at this point,
    ///     to provide better error messages.
    UnexpectedToken(T),

    /// A parser-specific error associated with an inner
    ///   [`ParseState`].
    StateError(E),
}

impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
    pub fn inner_into<EB: Diagnostic + PartialEq + Eq>(
        self,
    ) -> ParseError<T, EB>
    where
        EA: Into<EB>,
    {
        use ParseError::*;
        match self {
            UnexpectedEof(x) => UnexpectedEof(x),
            UnexpectedToken(x) => UnexpectedToken(x),
            StateError(e) => StateError(e.into()),
        }
    }
}

impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
    fn from(e: E) -> Self {
        Self::StateError(e)
    }
}

impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::UnexpectedEof(_) => {
                write!(f, "unexpected end of input")
            }
            Self::UnexpectedToken(_tok) => {
                write!(f, "unexpected input")
            }
            Self::StateError(e) => Display::fmt(e, f),
        }
    }
}

impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
    fn source(&self) -> Option<&(dyn Error + 'static)> {
        match self {
            Self::StateError(e) => Some(e),
            _ => None,
        }
    }
}

impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
    for ParseError<T, E>
{
    fn describe(&self) -> Vec<AnnotatedSpan> {
        use ParseError::*;

        match self {
            // TODO: More information from the underlying parser on what was expected.
            UnexpectedEof(span) => {
                span.error("unexpected end of input here").into()
            }

            UnexpectedToken(tok) => {
                tok.span().error("this was unexpected").into()
            }

            // TODO: Is there any additional useful context we can augment
            //   this with?
            StateError(e) => e.describe(),
        }
    }
}

impl<S, I> From<I> for Parser<S, I>
where
    S: ParseState,
    I: TokenStream<S::Token>,
    <S as ParseState>::Context: Default,
{
    /// Create a new parser with a default context.
    ///
    /// This can only be used if the associated [`ParseState::Context`] does
    ///   not implement [`Default`];
    ///     otherwise,
    ///       consider instantiating from a `(TokenStream, Context)` pair.
    /// See also [`ParseState::parse`] and
    ///   [`ParseState::parse_with_context`].
    fn from(toks: I) -> Self {
        Self {
            toks,
            state: Default::default(),
            last_span: UNKNOWN_SPAN,
            ctx: Default::default(),
        }
    }
}

impl<S, I, C> From<(I, C)> for Parser<S, I>
where
    S: ParseState<Context = C>,
    I: TokenStream<S::Token>,
{
    /// Create a new parser with a provided context.
    ///
    /// For more information,
    ///   see [`ParseState::parse_with_context`].
    ///
    /// See also [`ParseState::parse`].
    fn from((toks, ctx): (I, C)) -> Self {
        Self {
            toks,
            state: Default::default(),
            last_span: UNKNOWN_SPAN,
            ctx,
        }
    }
}

/// Result of a parsing operation.
#[derive(Debug, PartialEq, Eq)]
pub enum ParseStatus<S: ParseState> {
    /// Additional tokens are needed to complete parsing of the next object.
    Incomplete,

    /// Parsing of an object is complete.
    ///
    /// This does not indicate that the parser is complete,
    ///   as more objects may be able to be emitted.
    Object(S::Object),

    /// Parser encountered a dead state relative to the given token.
    ///
    /// A dead state is an empty accepting state that has no state
    ///   transition for the given token.
    /// A state is empty if a [`ParseStatus::Object`] will not be lost if
    ///   parsing ends at this point
    ///     (that is---there is no partially-built object).
    /// This could simply mean that the parser has completed its job and
    ///   that control must be returned to a parent context.
    ///
    /// If a parser is _not_ in an accepting state,
    ///   then an error ought to occur rather than a dead state;
    ///     the difference between the two is that the token associated with
    ///       a dead state can be used as a lookahead token in order to
    ///       produce a state transition at a higher level,
    ///     whereas an error indicates that parsing has failed.
    /// Intuitively,
    ///   this means that a [`ParseStatus::Object`] had just been emitted
    ///   and that the token following it isn't something that can be
    ///   parsed.
    ///
    /// If there is no parent context to handle the token,
    ///   [`Parser`] must yield an error.
    Dead(S::Token),
}

impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
    fn from(obj: T) -> Self {
        Self::Object(obj)
    }
}

/// Result of a parsing operation.
///
/// Whereas [`ParseStatus`] is used by [`ParseState`] to influence parser
///   operation,
///     this type is public-facing and used by [`Parser`].
#[derive(Debug, PartialEq, Eq)]
pub enum Parsed<O> {
    /// Additional tokens are needed to complete parsing of the next object.
    Incomplete,

    /// Parsing of an object is complete.
    ///
    /// This does not indicate that the parser is complete,
    ///   as more objects may be able to be emitted.
    Object(O),
}

impl<S: ParseState> From<ParseStatus<S>> for Parsed<S::Object> {
    fn from(status: ParseStatus<S>) -> Self {
        match status {
            ParseStatus::Incomplete => Parsed::Incomplete,
            ParseStatus::Object(x) => Parsed::Object(x),
            ParseStatus::Dead(_) => {
                unreachable!("Dead status must be filtered by Parser")
            }
        }
    }
}

#[cfg(test)]
pub mod test {
    use std::{assert_matches::assert_matches, iter::once};

    use super::*;
    use crate::{span::DUMMY_SPAN as DS, sym::GlobalSymbolIntern};

    #[derive(Debug, PartialEq, Eq, Clone)]
    enum TestToken {
        Close(Span),
        MarkDone(Span),
        Text(Span),
        SetCtxVal(u8),
    }

    impl Display for TestToken {
        fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            unimplemented!("fmt::Display")
        }
    }

    impl Token for TestToken {
        fn span(&self) -> Span {
            use TestToken::*;
            match self {
                Close(span) | MarkDone(span) | Text(span) => *span,
                _ => UNKNOWN_SPAN,
            }
        }
    }

    impl Object for TestToken {}

    #[derive(Debug, PartialEq, Eq)]
    enum EchoState {
        Empty,
        Done,
    }

    impl Default for EchoState {
        fn default() -> Self {
            Self::Empty
        }
    }

    #[derive(Debug, PartialEq, Default)]
    struct StubContext {
        val: u8,
    }

    impl ParseState for EchoState {
        type Token = TestToken;
        type Object = TestToken;
        type Error = EchoStateError;

        type Context = StubContext;

        fn parse_token(
            self,
            tok: TestToken,
            ctx: &mut StubContext,
        ) -> TransitionResult<Self> {
            match tok {
                TestToken::MarkDone(..) => Transition(Self::Done).ok(tok),
                TestToken::Close(..) => {
                    Transition(self).err(EchoStateError::InnerError(tok))
                }
                TestToken::Text(..) => Transition(self).dead(tok),
                TestToken::SetCtxVal(val) => {
                    ctx.val = val;
                    Transition(Self::Done).incomplete()
                }
            }
        }

        fn is_accepting(&self) -> bool {
            *self == Self::Done
        }
    }

    #[derive(Debug, PartialEq, Eq)]
    enum EchoStateError {
        InnerError(TestToken),
    }

    impl Display for EchoStateError {
        fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            unimplemented!()
        }
    }

    impl Error for EchoStateError {
        fn source(&self) -> Option<&(dyn Error + 'static)> {
            None
        }
    }

    impl Diagnostic for EchoStateError {
        fn describe(&self) -> Vec<AnnotatedSpan> {
            unimplemented!()
        }
    }

    type Sut<I> = Parser<EchoState, I>;

    #[test]
    fn successful_parse_in_accepting_state_with_spans() {
        // EchoState is placed into a Done state given Comment.
        let tok = TestToken::MarkDone(DS);
        let mut toks = once(tok.clone());

        let mut sut = Sut::from(&mut toks);

        // The first token should be processed normally.
        // EchoState proxies the token back.
        assert_eq!(Some(Ok(Parsed::Object(tok))), sut.next());

        // This is now the end of the token stream,
        //   which should be okay provided that the first token put us into
        //   a proper accepting state.
        assert_eq!(None, sut.next());

        // Further, finalizing should work in this state.
        assert!(sut.finalize().is_ok());
    }

    #[test]
    fn fails_on_end_of_stream_when_not_in_accepting_state() {
        let span = Span::new(10, 20, "ctx".intern());
        let mut toks = [TestToken::Close(span)].into_iter();

        let mut sut = Sut::from(&mut toks);

        // The first token is fine,
        //   and allows us to acquire our most recent span.
        sut.next();

        // Given that we have no tokens,
        //   and that EchoState::default does not start in an accepting
        //     state,
        //   we must fail when we encounter the end of the stream.
        assert_eq!(
            Some(Err(ParseError::UnexpectedEof(span.endpoints().1.unwrap()))),
            sut.next()
        );
    }

    #[test]
    fn returns_state_specific_error() {
        // TestToken::Close causes EchoState to produce an error.
        let errtok = TestToken::Close(DS);
        let mut toks = [errtok.clone()].into_iter();

        let mut sut = Sut::from(&mut toks);

        assert_eq!(
            Some(Err(ParseError::StateError(EchoStateError::InnerError(
                errtok
            )))),
            sut.next()
        );

        // The token must have been consumed.
        // It is up to a recovery process to either bail out or provide
        //   recovery tokens;
        //     continuing without recovery is unlikely to make sense.
        assert_eq!(0, toks.len());
    }

    #[test]
    fn fails_when_parser_is_finalized_in_non_accepting_state() {
        let span = Span::new(10, 10, "ctx".intern());

        // Set up so that we have a single token that we can use for
        //   recovery as part of the same iterator.
        let recovery = TestToken::MarkDone(DS);
        let mut toks = [
            // Used purely to populate a Span.
            TestToken::Close(span),
            // Recovery token here:
            recovery.clone(),
        ]
        .into_iter();

        let mut sut = Sut::from(&mut toks);

        // Populate our most recently seen token's span.
        sut.next();

        // Attempting to finalize now in a non-accepting state should fail
        //   in the same way that encountering an end-of-stream does,
        //     since we're effectively saying "we're done with the stream"
        //     and the parser will have no further opportunity to reach an
        //     accepting state.
        let result = sut.finalize();
        assert_matches!(
            result,
            Err((_, ParseError::UnexpectedEof(s))) if s == span.endpoints().1.unwrap()
        );

        // The sut should have been re-returned,
        //   allowing for attempted error recovery if the caller can manage
        //   to produce a sequence of tokens that will be considered valid.
        // `toks` above is set up already for this,
        //   which allows us to assert that we received back the same `sut`.
        let mut sut = result.unwrap_err().0;
        assert_eq!(Some(Ok(Parsed::Object(recovery))), sut.next());

        // And so we should now be in an accepting state,
        //   able to finalize.
        assert!(sut.finalize().is_ok());
    }

    #[test]
    fn unhandled_dead_state_results_in_error() {
        // A Text will cause our parser to return Dead.
        let tok = TestToken::Text(DS);
        let mut toks = once(tok.clone());

        let mut sut = Sut::from(&mut toks);

        // Our parser returns a Dead status,
        //   which is unhandled by any parent context
        //     (since we're not composing parsers),
        //     which causes an error due to an unhandled Dead state.
        assert_eq!(sut.next(), Some(Err(ParseError::UnexpectedToken(tok))),);
    }

    // A context can be both retrieved from a finished parser and provided
    //   to a new one.
    #[test]
    fn provide_and_retrieve_context() {
        // First, verify that it's initialized to a default context.
        let mut toks = vec![TestToken::MarkDone(DS)].into_iter();
        let mut sut = Sut::from(&mut toks);
        sut.next().unwrap().unwrap();
        let ctx = sut.finalize().unwrap();
        assert_eq!(ctx, Default::default());

        // Next, verify that the context that is manipulated is the context
        //   that is returned to us.
        let val = 5;
        let mut toks = vec![TestToken::SetCtxVal(5)].into_iter();
        let mut sut = Sut::from(&mut toks);
        sut.next().unwrap().unwrap();
        let ctx = sut.finalize().unwrap();
        assert_eq!(ctx, StubContext { val });

        // Finally, verify that the context provided is the context that is
        //   used.
        let val = 10;
        let given_ctx = StubContext { val };
        let mut toks = vec![TestToken::MarkDone(DS)].into_iter();
        let mut sut = EchoState::parse_with_context(&mut toks, given_ctx);
        sut.next().unwrap().unwrap();
        let ctx = sut.finalize().unwrap();
        assert_eq!(ctx, StubContext { val });
    }
}