tame/tamer/src/parse/state/transition.rs

// Parsing automaton
//
//  Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
//  This file is part of TAME.
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.

//! State transitions for parser automata.

use super::{
    ClosedParseState, ParseState, ParseStateResult, ParseStatus,
    PartiallyStitchableParseState, StitchableParseState, Token,
};
use crate::{diagnose::Annotate, diagnostic_panic};
use std::{
    convert::Infallible,
    hint::unreachable_unchecked,
    ops::{ControlFlow, FromResidual},
};

#[cfg(doc)]
use super::Parser;

/// A state transition with associated data.
///
/// Conceptually,
///   imagine the act of a state transition producing data.
/// See [`Transition`] for convenience methods for producing this tuple.
///
/// Sometimes a parser is not able to complete the operation requested
///   based on the provided input token.
/// Since TAMER uses a streaming parsing framework that places strict
///   limits on control flow,
///     a single token can be returned as lookahead to indicate that the
///     token could not be parsed yet and should be provided once again
///     in place of the next token from the input stream.
/// This allows,
///   for example,
///   for multiple data to be emitted in response to a single token.
///
/// If a [`ParseState`] is not a [`ClosedParseState`],
///   the transition will be to its superstate ([`ParseState::Super`]);
///     this conversion is performed automatically by the [`Transition`]
///     methods that produce [`TransitionResult`],
///       (such as [`Transition::ok`]).
///
/// This struct is opaque to ensure that critical invariants involving
///   transitions and lookahead are properly upheld;
///     callers must use the appropriate parsing APIs.
#[derive(Debug, PartialEq)]
pub struct TransitionResult<S: ParseState>(
    /// New parser state.
    pub(in super::super) Transition<S>,
    /// Result of the parsing operation.
    pub(in super::super) TransitionData<S>,
);

impl<S: ParseState> TransitionResult<S> {
    pub fn into_super(self) -> TransitionResult<S::Super> {
        match self {
            Self(t, data) => {
                TransitionResult(t.into_super(), data.into_super())
            }
        }
    }

    /// Indicate that this transition include a single token of lookahead,
    ///   which should be provided back to the parser in place of the
    ///   next token from the input stream.
    ///
    /// Panics
    /// ======
    /// A critical invariant of this system is that lookahead tokens must
    ///   never be discarded without explicit handling.
    /// If this [`TransitionResult`] contains an existing token of lookahead,
    ///   the system will panic when attempting to overwrite it.
    /// This represents a bug in the system,
    ///   since parsers should never permit this to occur.
    ///
    /// Ideally this will be enforced using the type system in the future.
    pub fn with_lookahead<T: Into<S::Token>>(self, lookahead: T) -> Self {
        match self {
            Self(transition, TransitionData::Result(result, None)) => Self(
                transition,
                TransitionData::Result(
                    result,
                    Some(Lookahead(lookahead.into())),
                ),
            ),

            // This represents a problem with the parser;
            //   we should never specify a lookahead token more than once.
            // This could be enforced statically with the type system if
            //   ever such a thing is deemed to be worth doing.
            Self(
                ..,
                TransitionData::Result(_, Some(prev))
                | TransitionData::Dead(prev),
            ) => prev.overwrite_panic(
                lookahead.into(),
                "cannot overwrite unused lookahead token",
            ),
        }
    }

    /// Possibly indicate that this transition includes a single token of
    ///   lookahead.
    ///
    /// If the argument is [`None`],
    ///   this returns `self` unchanged.
    ///
    /// This is useful when working with the output of other parsers.
    /// See [`with_lookahead`](TransitionResult::with_lookahead) for more
    ///   information.
    pub(in super::super) fn maybe_with_lookahead(
        self,
        lookahead: Option<Lookahead<S::Token>>,
    ) -> Self {
        match lookahead {
            Some(Lookahead(lookahead)) => self.with_lookahead(lookahead),
            None => self,
        }
    }

    /// Map over both the [`Transition`] and its associated
    ///   [`TransitionData`],
    ///     translating to another [`ParseState`] `SB`.
    ///
    /// The inner [`Transition`]'s [`ParseState`] is mapped over for
    ///   convenience and brevity,
    ///     despite the verbose convention of mandating the use of
    ///     [`Transition`] elsewhere.
    /// However,
    ///   [`TransitionData`] is too complex of a structure,
    ///     so determining how to map over its data is left as an exercise
    ///     for `fdata`.
    pub(in super::super) fn bimap<SB: ParseState>(
        self,
        fst: impl FnOnce(S) -> SB,
        fdata: impl FnOnce(TransitionData<S>) -> TransitionData<SB>,
    ) -> TransitionResult<SB> {
        match self {
            Self(Transition(st), data) => {
                TransitionResult(Transition(fst(st)), fdata(data))
            }
        }
    }

    /// Conditionally map to a [`TransitionResult`] based on whether the
    ///   inner [`TransitionData`] represents a dead state transition
    ///     ([`TransitionData::Dead`]).
    ///
    /// Inner values are unwrapped before applying one of `fdead` or
    ///   `falive`.
    ///
    /// Lookahead is automatically propagated to the resulting
    ///   [`TransitionResult`],
    ///     ensuring that the token cannot be lost.
    /// Consequently,
    ///   it is important that the [`TransitionResult`] returned by `fdead`
    ///   or `falive` _does not contain a token of lookahead_,
    ///     otherwise the system will panic,
    ///       since two tokens of lookahead cannot be accommodated.
    /// This is not as bad as it sounds in practice,
    ///   since no token of input is provided to either of the branches,
    ///   and so would have to be manufactured by
    ///     (or have been previously stored by)
    ///     a calling parser.
    ///
    /// Ownership and Branching
    /// =======================
    /// At the time of writing (2023),
    ///   Rust's borrow checker cannot understand that the arguments to
    ///   `fdead` and `falive` are utilized in exclusive branches;
    ///     the borrowing happens at the call to `branch_dead` itself.
    /// The causes ownership problems when both branches want to utilize the
    ///   same data.
    ///
    /// To work around this limitation,
    ///   this method accepts an arbitrary branching context `bctx` that
    ///   will be passed to either `fdead` or `falive`;
    ///     this can be utilized in place of closure.
    pub fn branch_dead<SB: ParseState, C>(
        self,
        fdead: impl FnOnce(S, C) -> TransitionResult<<SB as ParseState>::Super>,
        falive: impl FnOnce(
            S,
            ParseStateResult<S>,
            C,
        ) -> TransitionResult<<SB as ParseState>::Super>,
        bctx: C,
    ) -> TransitionResult<<SB as ParseState>::Super>
    where
        S: PartiallyStitchableParseState<SB>,
    {
        self.branch_dead_la(
            |st, la, bctx| {
                fdead(st, bctx)
                    .maybe_with_lookahead(Some(la.into_super::<SB>()))
            },
            |st, result, la, bctx| {
                falive(st, result, bctx)
                    .maybe_with_lookahead(la.map(Lookahead::into_super::<SB>))
            },
            bctx,
        )
    }

    /// Conditionally map to a [`TransitionResult`] based on whether the
    ///   inner [`TransitionData`] represents a dead state transition
    ///     ([`TransitionData::Dead`]).
    ///
    /// This is like [`Self::branch_dead`],
    ///   but exposes the token of lookahead (if any) and therefore _puts
    ///   the onus on the caller to ensure that the token is not lost_.
    /// As such,
    ///   this method is private to the `parse` module.
    ///
    /// For information about the branch context `bctx`,
    ///   see the public-facing method [`Self::branch_dead`].
    pub(in super::super) fn branch_dead_la<SB: ParseState, C>(
        self,
        fdead: impl FnOnce(
            S,
            Lookahead<<S as ParseState>::Token>,
            C,
        ) -> TransitionResult<<SB as ParseState>::Super>,
        falive: impl FnOnce(
            S,
            ParseStateResult<S>,
            Option<Lookahead<<S as ParseState>::Token>>,
            C,
        ) -> TransitionResult<<SB as ParseState>::Super>,
        bctx: C,
    ) -> TransitionResult<<SB as ParseState>::Super>
    where
        S: PartiallyStitchableParseState<SB>,
    {
        use TransitionData::{Dead, Result};

        let Self(Transition(st), data) = self;

        match data {
            Dead(la) => fdead(st, la, bctx),
            Result(result, la) => falive(st, result, la, bctx),
        }
    }
}

/// Token to use as a lookahead token in place of the next token from the
///   input stream.
#[derive(Debug, PartialEq)]
pub struct Lookahead<T: Token>(pub(in super::super) T);

impl<T: Token> Lookahead<T> {
    /// Panic with diagnostic information about a lookup token and its
    ///   attempted replacement.
    ///
    /// A critical system invariant is that lookahead tokens must never be
    ///   lost without explicit handling.
    /// Since this is not yet enforced using the type system,
    ///   these checks must be performed at runtime.
    pub(in super::super) fn overwrite_panic(self, other: T, msg: &str) -> ! {
        let Self(prev) = self;

        let desc = vec![
            prev.span().note("this token of lookahead would be lost"),
            other.span().internal_error(
                "attempting to replace previous lookahead token \
                   with this one",
            ),
        ];

        diagnostic_panic!(desc, "{msg}",)
    }

    pub fn inner_into<U: Token>(self) -> Lookahead<U>
    where
        T: Into<U>,
    {
        match self {
            Self(tok) => Lookahead(tok.into()),
        }
    }

    /// Convert the inner [`Token`] of lookahead into the token expected by
    ///   the superstate [`S::Super`](ParseState::Super).
    ///
    /// This simply sets strict trait bounds to serve as a checkpoint where
    ///   we know for certain what types are involved;
    ///     there's a whole lot of types involved in the parsing framework
    ///     and it gets very difficult to understand when errors occur.
    pub fn into_super<S: ParseState>(
        self,
    ) -> Lookahead<<S::Super as ParseState>::Token>
    where
        T: Into<<S::Super as ParseState>::Token>,
    {
        self.inner_into::<<S::Super as ParseState>::Token>()
    }
}

/// Information about the state transition.
///
/// Note: Ideally a state wouldn't even be required for
///   [`Dead`](TransitionData::Dead),
///     but [`ParseState`] does not implement [`Default`] and [`Parser`]
///     requires _some_ state exist.
#[derive(Debug, PartialEq)]
pub(in super::super) enum TransitionData<S: ParseState> {
    /// State transition was successful or not attempted,
    ///   with an optional token of [`Lookahead`].
    ///
    /// Note that a successful state transition _does not_ imply a
    ///   successful [`ParseStateResult`]---the
    ///     parser may choose to successfully transition into an error
    ///     recovery state to accommodate future tokens.
    Result(ParseStateResult<S>, Option<Lookahead<S::Token>>),

    /// No valid state transition exists from the current state for the
    ///   given input token,
    ///     which is returned as a token of [`Lookahead`].
    ///
    /// A dead state is an accepting state that has no state transition for
    ///   the given token.
    /// This could simply mean that the parser has completed its job and
    ///   that control must be returned to a parent context.
    /// Note that this differs from an error state,
    ///   where a parser is unable to reach an accepting state because it
    ///   received unexpected input.
    ///
    /// Note that the parser may still choose to perform a state transition
    ///   for the sake of error recovery,
    ///     but note that the dead state is generally interpreted to mean
    ///       "I have no further work that I am able to perform"
    ///       and may lead to finalization of the parser.
    /// If a parser intends to do additional work,
    ///   it should return an error instead via [`TransitionData::Result`].
    Dead(Lookahead<S::Token>),
}

impl<S: ParseState> TransitionData<S> {
    pub fn into_super(self) -> TransitionData<S::Super> {
        match self {
            Self::Result(st_result, ola) => TransitionData::Result(
                st_result.map(ParseStatus::into_super).map_err(|e| e.into()),
                ola.map(Lookahead::inner_into),
            ),
            Self::Dead(la) => TransitionData::Dead(la.inner_into()),
        }
    }

    /// Associate this [`TransitionData`] with a state transition for a
    ///   [`ParseState`] `SB`,
    ///     translating from `S` if necessary.
    pub fn transition<SB: ParseState>(
        self,
        to: impl Into<Transition<SB>>,
    ) -> TransitionResult<<SB as ParseState>::Super>
    where
        S: StitchableParseState<SB>,
    {
        TransitionResult(to.into().into_super(), self.inner_into())
    }

    /// Reference to the token of lookahead,
    ///   if any.
    pub(in super::super) fn lookahead_ref(
        &self,
    ) -> Option<&Lookahead<S::Token>> {
        match self {
            TransitionData::Dead(ref la)
            | TransitionData::Result(_, Some(ref la)) => Some(la),
            _ => None,
        }
    }

    /// Reference to parsed object,
    ///   if any.
    pub(in super::super) fn object_ref(&self) -> Option<&S::Object> {
        match self {
            TransitionData::Result(Ok(ParseStatus::Object(obj)), _) => {
                Some(obj)
            }
            _ => None,
        }
    }

    /// Reference to parsing error,
    ///   if any.
    pub(in super::super) fn err_ref(&self) -> Option<&S::Error> {
        match self {
            TransitionData::Result(Err(e), _) => Some(e),
            _ => None,
        }
    }

    /// Asserts a reflexive relationship between the [`TransitionData`] of
    ///   our own [`ParseState`] `S` and a target [`ParseState`] `SB`.
    ///
    /// This is intended not just for translating between types,
    ///   but also documentation,
    ///   as an affirmative way to state "these two [`ParseState`]s
    ///     represent the same underlying data".
    /// For example,
    ///   this may be appropriate when `SB` wraps `S`.
    ///
    /// This is a stronger statement than saying two [`ParseState`]s are
    ///   _compatible_ withe one-another in some way,
    ///     which is the assertion made by
    ///     [`StitchableParseState`](super::StitchableParseState) and may
    ///     require data to be translated.
    ///
    /// While this method refers to the mathematical reflexive relation,
    ///   its exact name originates from the Coq tactic.
    pub fn reflexivity<SB: ParseState>(self) -> TransitionData<SB>
    where
        SB: ParseState<
            Token = <S as ParseState>::Token,
            Object = <S as ParseState>::Object,
            Error = <S as ParseState>::Error,
        >,
    {
        use TransitionData::*;

        match self {
            Result(result, la) => {
                Result(result.map(ParseStatus::reflexivity), la)
            }
            Dead(la) => Dead(la),
        }
    }

    /// Transform inner types using [`Into`] such that they are compatible
    ///   with the superstate of `SB`.
    pub fn inner_into<SB: ParseState>(
        self,
    ) -> TransitionData<<SB as ParseState>::Super>
    where
        S: StitchableParseState<SB>,
    {
        use TransitionData::*;

        match self {
            Dead(la) => Dead(la.into_super::<SB>()),
            Result(result, la) => Result(
                match result {
                    Ok(status) => Ok(status.inner_into()),
                    // First convert the error into `SB::Error`,
                    //   and then `SP::Super::Error`
                    //     (which will be the same type if SB is closed).
                    Err(e) => Err(e.into().into()),
                },
                la.map(Lookahead::into_super::<SB>),
            ),
        }
    }
}

impl<S: ParseState> From<ParseStateResult<S>> for TransitionData<S> {
    fn from(result: ParseStateResult<S>) -> Self {
        Self::Result(result, None)
    }
}

/// A verb denoting a state transition.
///
/// This is typically instantiated directly by a [`ParseState`] to perform a
///   state transition in [`ParseState::parse_token`].
///
/// This newtype was created to produce clear, self-documenting code;
///   parsers can get confusing to read with all of the types involved,
///     so this provides a mental synchronization point.
///
/// This also provides some convenience methods to help remove boilerplate
///   and further improve code clarity.
#[derive(Debug, PartialEq, Eq)]
pub struct Transition<S: ParseState>(pub S);

impl<S: ParseState> Transition<S> {
    /// Transform a [`Transition`] into a transition of its superstate
    ///   [`ParseState::Super`].
    ///
    /// This is needed because trait specialization does not yet have a path
    /// to stabilization as of the time of writing,
    ///   and so `From<Transition<S>> for Transition<S::Super>` cannot be
    ///   implemented because those types overlap.
    pub fn into_super(self) -> Transition<S::Super> {
        match self {
            Transition(st) => Transition(st.into()),
        }
    }

    /// A state transition with corresponding data.
    ///
    /// This allows [`ParseState::parse_token`] to emit a parsed object and
    ///   corresponds to [`ParseStatus::Object`].
    pub fn ok<T>(self, obj: T) -> TransitionResult<S::Super>
    where
        T: Into<ParseStatus<S::Super>>,
    {
        TransitionResult(
            self.into_super(),
            TransitionData::Result(Ok(obj.into()), None),
        )
    }

    /// A transition with corresponding error.
    ///
    /// This indicates a parsing failure.
    /// The state ought to be suitable for error recovery.
    pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S::Super> {
        // The first error conversion is into that expected by S,
        //   which will _then_ (below) be converted into S::Super
        //   (if they're not the same).
        let err_s: S::Error = err.into();

        TransitionResult(
            self.into_super(),
            TransitionData::Result(Err(err_s.into()), None),
        )
    }

    /// A state transition with corresponding [`Result`].
    ///
    /// This translates the provided [`Result`] in a manner equivalent to
    ///   [`Transition::ok`] and [`Transition::err`].
    pub fn result<T, E>(
        self,
        result: Result<T, E>,
    ) -> TransitionResult<S::Super>
    where
        T: Into<ParseStatus<S>>,
        E: Into<S::Error>,
    {
        TransitionResult(
            self.into_super(),
            TransitionData::Result(
                result
                    .map(Into::into)
                    .map(ParseStatus::into_super)
                    .map_err(Into::<S::Error>::into)
                    .map_err(Into::into),
                None,
            ),
        )
    }

    /// A state transition indicating that more data is needed before an
    ///   object can be emitted.
    ///
    /// This corresponds to [`ParseStatus::Incomplete`].
    pub fn incomplete(self) -> TransitionResult<S::Super> {
        TransitionResult(
            self.into_super(),
            TransitionData::Result(Ok(ParseStatus::Incomplete), None),
        )
    }

    /// A state transition could not be performed and parsing will not
    ///   continue.
    ///
    /// A dead state represents an _accepting state_ that has no edge to
    ///   another state for the given `tok`.
    /// Rather than throw an error,
    ///   a parser uses this status to indicate that it has completed
    ///   parsing and that the token should be utilized elsewhere;
    ///     the provided token will be used as a token of [`Lookahead`].
    ///
    /// If a parser is not prepared to be finalized and needs to yield an
    ///   object first,
    ///     use [`Transition::result`] or other methods along with a token
    ///     of [`Lookahead`].
    pub fn dead<T: Token + Into<<S::Super as ParseState>::Token>>(
        self,
        tok: T,
    ) -> TransitionResult<S::Super> {
        TransitionResult(
            self.into_super(),
            TransitionData::Dead(Lookahead(tok).into_super::<S>()),
        )
    }

    /// Produce a map over the inner [`ParseState`] `S` to another
    ///   [`ParseState`] `SB`.
    ///
    /// Note that this is a curried associated function,
    ///   not a method.
    /// The intent is to maintain self-documentation by invoking it
    ///   qualified as [`Transition::fmap`].
    pub fn fmap<SB: ParseState>(
        f: impl Fn(S) -> SB,
    ) -> impl Fn(Transition<S>) -> Transition<SB> {
        move |Self(st)| Transition(f(st))
    }
}

impl<S: ParseState> From<S> for Transition<S> {
    fn from(st: S) -> Self {
        Self(st)
    }
}

impl<S: ClosedParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
    for TransitionResult<S>
{
    fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
        match residual {
            (st, result) => Self(st, TransitionData::Result(result, None)),
        }
    }
}

impl<S: ParseState> FromResidual<Result<Infallible, TransitionResult<S>>>
    for TransitionResult<S>
{
    fn from_residual(
        residual: Result<Infallible, TransitionResult<S>>,
    ) -> Self {
        match residual {
            Err(e) => e,
            // SAFETY: This match arm doesn't seem to be required in
            //   core::result::Result's FromResidual implementation,
            //     but as of 1.61 nightly it is here.
            // Since this is Infallable,
            //   it cannot occur.
            Ok(_) => unsafe { unreachable_unchecked() },
        }
    }
}

impl<S: ParseState> FromResidual<ControlFlow<TransitionResult<S>, Infallible>>
    for TransitionResult<S>
{
    fn from_residual(
        residual: ControlFlow<TransitionResult<S>, Infallible>,
    ) -> Self {
        match residual {
            ControlFlow::Break(result) => result,
            // SAFETY: Infallible, so cannot hit.
            ControlFlow::Continue(_) => unsafe { unreachable_unchecked() },
        }
    }
}

/// An object able to be used as data for a state [`Transition`].
///
/// This flips the usual order of things:
///   rather than using a method of [`Transition`] to provide data,
///     this starts with the data and produces a transition from it.
/// This is sometimes necessary to satisfy ownership/borrowing rules.
///
/// This trait simply removes boilerplate associated with storing
///   intermediate values and translating into the resulting type.
pub trait Transitionable<S: ParseState> {
    /// Perform a state transition to `S` using [`Self`] as the associated
    ///   data.
    ///
    /// This may be necessary to satisfy ownership/borrowing rules when
    ///   state data from `S` is used to compute [`Self`].
    fn transition(self, to: S) -> TransitionResult<S::Super>;
}

impl<S, E> Transitionable<S> for Result<ParseStatus<S>, E>
where
    S: ParseState,
    <S as ParseState>::Error: From<E>,
{
    fn transition(self, to: S) -> TransitionResult<S::Super> {
        Transition(to).result(self)
    }
}

impl<S, E> Transitionable<S> for Result<(), E>
where
    S: ParseState,
    <S as ParseState>::Error: From<E>,
{
    fn transition(self, to: S) -> TransitionResult<S::Super> {
        Transition(to).result(self.map(|_| ParseStatus::Incomplete))
    }
}

impl<S> Transitionable<S> for Option<S::Object>
where
    S: ParseState,
{
    fn transition(self, to: S) -> TransitionResult<S::Super> {
        match self {
            Some(obj) => Transition(to).ok(obj),
            None => Transition(to).incomplete(),
        }
    }
}

impl<S: ParseState> Transitionable<S> for ParseStatus<S> {
    fn transition(self, to: S) -> TransitionResult<S::Super> {
        Transition(to).ok(self.into_super())
    }
}