tame/tamer/src/parse/state/transition.rs

// Parsing automaton
//
//  Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
//  This file is part of TAME.
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.

//! State transitions for parser automata.

use super::{
    ClosedParseState, ParseState, ParseStateResult, ParseStatus,
    PartiallyStitchableParseState, StitchableParseState, Token,
};
use crate::{diagnose::Annotate, diagnostic_panic};
use std::{
    convert::Infallible,
    hint::unreachable_unchecked,
    ops::{ControlFlow, FromResidual},
};

#[cfg(doc)]
use super::Parser;

/// A state transition with associated data.
///
/// Conceptually,
///   imagine the act of a state transition producing data.
/// See [`Transition`] for convenience methods for producing this tuple.
///
/// Sometimes a parser is not able to complete the operation requested
///   based on the provided input token.
/// Since TAMER uses a streaming parsing framework that places strict
///   limits on control flow,
///     a single token can be returned as lookahead to indicate that the
///     token could not be parsed yet and should be provided once again
///     in place of the next token from the input stream.
/// This allows,
///   for example,
///   for multiple data to be emitted in response to a single token.
///
/// If a [`ParseState`] is not a [`ClosedParseState`],
///   the transition will be to its superstate ([`ParseState::Super`]);
///     this conversion is performed automatically by the [`Transition`]
///     methods that produce [`TransitionResult`],
///       (such as [`Transition::ok`]).
///
/// This struct is opaque to ensure that critical invariants involving
///   transitions and lookahead are properly upheld;
///     callers must use the appropriate parsing APIs.
#[derive(Debug, PartialEq)]
pub struct TransitionResult<S: ParseState>(
    /// New parser state.
    pub(in super::super) Transition<S>,
    /// Result of the parsing operation.
    pub(in super::super) TransitionData<S>,
);

impl<S: ParseState> TransitionResult<S> {
    pub fn into_super(self) -> TransitionResult<S::Super> {
        match self {
            Self(t, data) => {
                TransitionResult(t.into_super(), data.into_super())
            }
        }
    }

    /// Indicate that this transition include a single token of lookahead,
    ///   which should be provided back to the parser in place of the
    ///   next token from the input stream.
    ///
    /// Panics
    /// ======
    /// A critical invariant of this system is that lookahead tokens must
    ///   never be discarded without explicit handling.
    /// If this [`TransitionResult`] contains an existing token of lookahead,
    ///   the system will panic when attempting to overwrite it.
    /// This represents a bug in the system,
    ///   since parsers should never permit this to occur.
    ///
    /// Ideally this will be enforced using the type system in the future.
    pub fn with_lookahead<T: Into<S::Token>>(self, lookahead: T) -> Self {
        match self {
            Self(transition, TransitionData::Result(result, None)) => Self(
                transition,
                TransitionData::Result(
                    result,
                    Some(Lookahead(lookahead.into())),
                ),
            ),

            // This represents a problem with the parser;
            //   we should never specify a lookahead token more than once.
            // This could be enforced statically with the type system if
            //   ever such a thing is deemed to be worth doing.
            Self(
                ..,
                TransitionData::Result(_, Some(prev))
                | TransitionData::Dead(prev),
            ) => prev.overwrite_panic(
                lookahead.into(),
                "cannot overwrite unused lookahead token",
            ),
        }
    }

    /// Possibly indicate that this transition includes a single token of
    ///   lookahead.
    ///
    /// If the argument is [`None`],
    ///   this returns `self` unchanged.
    ///
    /// This is useful when working with the output of other parsers.
    /// See [`with_lookahead`](TransitionResult::with_lookahead) for more
    ///   information.
    pub(in super::super) fn maybe_with_lookahead(
        self,
        lookahead: Option<Lookahead<S::Token>>,
    ) -> Self {
        match lookahead {
            Some(Lookahead(lookahead)) => self.with_lookahead(lookahead),
            None => self,
        }
    }

    /// Map over both the [`Transition`] and its associated
    ///   [`TransitionData`],
    ///     translating to another [`ParseState`] `SB`.
    ///
    /// The inner [`Transition`]'s [`ParseState`] is mapped over for
    ///   convenience and brevity,
    ///     despite the verbose convention of mandating the use of
    ///     [`Transition`] elsewhere.
    /// However,
    ///   [`TransitionData`] is too complex of a structure,
    ///     so determining how to map over its data is left as an exercise
    ///     for `fdata`.
    pub(in super::super) fn bimap<SB: ParseState>(
        self,
        fst: impl FnOnce(S) -> SB,
        fdata: impl FnOnce(TransitionData<S>) -> TransitionData<SB>,
    ) -> TransitionResult<SB> {
        match self {
            Self(Transition(st), data) => {
                TransitionResult(Transition(fst(st)), fdata(data))
            }
        }
    }

    /// Conditionally map to a [`TransitionResult`] based on whether the
    ///   inner [`TransitionData`] represents a dead state transition
    ///     ([`TransitionData::Dead`]).
    ///
    /// Inner values are unwrapped before applying one of `fdead` or
    ///   `falive`.
    ///
    /// Lookahead is automatically propagated to the resulting
    ///   [`TransitionResult`],
    ///     ensuring that the token cannot be lost.
    /// Consequently,
    ///   it is important that the [`TransitionResult`] returned by `fdead`
    ///   or `falive` _does not contain a token of lookahead_,
    ///     otherwise the system will panic,
    ///       since two tokens of lookahead cannot be accommodated.
    /// This is not as bad as it sounds in practice,
    ///   since no token of input is provided to either of the branches,
    ///   and so would have to be manufactured by
    ///     (or have been previously stored by)
    ///     a calling parser.
    ///
    /// Ownership and Branching
    /// =======================
    /// At the time of writing (2023),
    ///   Rust's borrow checker cannot understand that the arguments to
    ///   `fdead` and `falive` are utilized in exclusive branches;
    ///     the borrowing happens at the call to `branch_dead` itself.
    /// The causes ownership problems when both branches want to utilize the
    ///   same data.
    ///
    /// To work around this limitation,
    ///   this method accepts an arbitrary branching context `bctx` that
    ///   will be passed to either `fdead` or `falive`;
    ///     this can be utilized in place of closure.
    pub fn branch_dead<SB: ParseState, C>(
        self,
        fdead: impl FnOnce(S, C) -> TransitionResult<<SB as ParseState>::Super>,
        falive: impl FnOnce(
            S,
            ParseStateResult<S>,
            C,
        ) -> TransitionResult<<SB as ParseState>::Super>,
        bctx: C,
    ) -> TransitionResult<<SB as ParseState>::Super>
    where
        S: PartiallyStitchableParseState<SB>,
    {
        self.branch_dead_la(
            |st, Lookahead(la), bctx| {
                fdead(st, bctx)
                    .with_lookahead(<SB as ParseState>::Token::from(la))
            },
            |st, result, la, bctx| {
                falive(st, result, bctx)
                    .maybe_with_lookahead(la.map(Lookahead::inner_into))
            },
            bctx,
        )
    }

    /// Conditionally map to a [`TransitionResult`] based on whether the
    ///   inner [`TransitionData`] represents a dead state transition
    ///     ([`TransitionData::Dead`]).
    ///
    /// This is like [`Self::branch_dead`],
    ///   but exposes the token of lookahead (if any) and therefore _puts
    ///   the onus on the caller to ensure that the token is not lost_.
    /// As such,
    ///   this method is private to the `parse` module.
    ///
    /// For information about the branch context `bctx`,
    ///   see the public-facing method [`Self::branch_dead`].
    pub(in super::super) fn branch_dead_la<SB: ParseState, C>(
        self,
        fdead: impl FnOnce(
            S,
            Lookahead<<S as ParseState>::Token>,
            C,
        ) -> TransitionResult<<SB as ParseState>::Super>,
        falive: impl FnOnce(
            S,
            ParseStateResult<S>,
            Option<Lookahead<<S as ParseState>::Token>>,
            C,
        ) -> TransitionResult<<SB as ParseState>::Super>,
        bctx: C,
    ) -> TransitionResult<<SB as ParseState>::Super>
    where
        S: PartiallyStitchableParseState<SB>,
    {
        use TransitionData::{Dead, Result};

        let Self(Transition(st), data) = self;

        match data {
            Dead(la) => fdead(st, la, bctx),
            Result(result, la) => falive(st, result, la, bctx),
        }
    }

    /// Conditionally map to a [`TransitionResult`] based on whether the
    ///   inner [`TransitionData`] represents an object.
    pub(in super::super) fn branch_obj_la<SB: ParseState>(
        self,
        fobj: impl FnOnce(
            Transition<S>,
            <S as ParseState>::Object,
            Option<Lookahead<<S as ParseState>::Token>>,
        ) -> TransitionResult<<SB as ParseState>::Super>,
        fother: impl FnOnce(Transition<S>) -> Transition<SB>,
    ) -> TransitionResult<<SB as ParseState>::Super>
    where
        S: PartiallyStitchableParseState<SB>,
    {
        use ParseStatus::{Incomplete, Object};
        use TransitionData::{Dead, Result};

        let Self(st, data) = self;

        match data {
            Result(Ok(Object(obj)), la) => fobj(st, obj, la).into_super(),

            // Can't use `TransitionData::inner_into` since we only have a
            //   `PartiallyStitchableParseState`,
            //     and `into_inner` requires being able to convert the inner
            //     object that we handled above.
            Result(Ok(Incomplete), la) => fother(st)
                .incomplete()
                .maybe_with_lookahead(la.map(Lookahead::inner_into)),
            Result(Err(e), la) => fother(st)
                .err(e)
                .maybe_with_lookahead(la.map(Lookahead::inner_into)),
            Dead(Lookahead(la)) => fother(st).dead(la.into()),
        }
    }
}

/// Token to use as a lookahead token in place of the next token from the
///   input stream.
#[derive(Debug, PartialEq)]
pub struct Lookahead<T: Token>(pub(in super::super) T);

impl<T: Token> Lookahead<T> {
    /// Panic with diagnostic information about a lookup token and its
    ///   attempted replacement.
    ///
    /// A critical system invariant is that lookahead tokens must never be
    ///   lost without explicit handling.
    /// Since this is not yet enforced using the type system,
    ///   these checks must be performed at runtime.
    pub(in super::super) fn overwrite_panic(self, other: T, msg: &str) -> ! {
        let Self(prev) = self;

        let desc = vec![
            prev.span().note("this token of lookahead would be lost"),
            other.span().internal_error(
                "attempting to replace previous lookahead token \
                   with this one",
            ),
        ];

        diagnostic_panic!(desc, "{msg}",)
    }

    pub fn inner_into<U: Token>(self) -> Lookahead<U>
    where
        T: Into<U>,
    {
        match self {
            Self(tok) => Lookahead(tok.into()),
        }
    }
}

/// Information about the state transition.
///
/// Note: Ideally a state wouldn't even be required for
///   [`Dead`](TransitionData::Dead),
///     but [`ParseState`] does not implement [`Default`] and [`Parser`]
///     requires _some_ state exist.
#[derive(Debug, PartialEq)]
pub(in super::super) enum TransitionData<S: ParseState> {
    /// State transition was successful or not attempted,
    ///   with an optional token of [`Lookahead`].
    ///
    /// Note that a successful state transition _does not_ imply a
    ///   successful [`ParseStateResult`]---the
    ///     parser may choose to successfully transition into an error
    ///     recovery state to accommodate future tokens.
    Result(ParseStateResult<S>, Option<Lookahead<S::Token>>),

    /// No valid state transition exists from the current state for the
    ///   given input token,
    ///     which is returned as a token of [`Lookahead`].
    ///
    /// A dead state is an accepting state that has no state transition for
    ///   the given token.
    /// This could simply mean that the parser has completed its job and
    ///   that control must be returned to a parent context.
    /// Note that this differs from an error state,
    ///   where a parser is unable to reach an accepting state because it
    ///   received unexpected input.
    ///
    /// Note that the parser may still choose to perform a state transition
    ///   for the sake of error recovery,
    ///     but note that the dead state is generally interpreted to mean
    ///       "I have no further work that I am able to perform"
    ///       and may lead to finalization of the parser.
    /// If a parser intends to do additional work,
    ///   it should return an error instead via [`TransitionData::Result`].
    Dead(Lookahead<S::Token>),
}

impl<S: ParseState> TransitionData<S> {
    pub fn into_super(self) -> TransitionData<S::Super> {
        match self {
            Self::Result(st_result, ola) => TransitionData::Result(
                st_result.map(ParseStatus::into_super).map_err(|e| e.into()),
                ola,
            ),
            Self::Dead(la) => TransitionData::Dead(la),
        }
    }

    /// Associate this [`TransitionData`] with a state transition for a
    ///   [`ParseState`] `SB`,
    ///     translating from `S` if necessary.
    pub fn transition<SB: ParseState>(
        self,
        to: impl Into<Transition<SB>>,
    ) -> TransitionResult<<SB as ParseState>::Super>
    where
        S: StitchableParseState<SB>,
    {
        TransitionResult(to.into().into_super(), self.inner_into())
    }

    /// Reference to the token of lookahead,
    ///   if any.
    pub(in super::super) fn lookahead_ref(
        &self,
    ) -> Option<&Lookahead<S::Token>> {
        match self {
            TransitionData::Dead(ref la)
            | TransitionData::Result(_, Some(ref la)) => Some(la),
            _ => None,
        }
    }

    /// Reference to parsed object,
    ///   if any.
    pub(in super::super) fn object_ref(&self) -> Option<&S::Object> {
        match self {
            TransitionData::Result(Ok(ParseStatus::Object(obj)), _) => {
                Some(obj)
            }
            _ => None,
        }
    }

    /// Reference to parsing error,
    ///   if any.
    pub(in super::super) fn err_ref(&self) -> Option<&S::Error> {
        match self {
            TransitionData::Result(Err(e), _) => Some(e),
            _ => None,
        }
    }

    /// Asserts a reflexive relationship between the [`TransitionData`] of
    ///   our own [`ParseState`] `S` and a target [`ParseState`] `SB`.
    ///
    /// This is intended not just for translating between types,
    ///   but also documentation,
    ///   as an affirmative way to state "these two [`ParseState`]s
    ///     represent the same underlying data".
    /// For example,
    ///   this may be appropriate when `SB` wraps `S`.
    ///
    /// This is a stronger statement than saying two [`ParseState`]s are
    ///   _compatible_ withe one-another in some way,
    ///     which is the assertion made by
    ///     [`StitchableParseState`](super::StitchableParseState) and may
    ///     require data to be translated.
    ///
    /// While this method refers to the mathematical reflexive relation,
    ///   its exact name originates from the Coq tactic.
    pub fn reflexivity<SB: ParseState>(self) -> TransitionData<SB>
    where
        SB: ParseState<
            Token = <S as ParseState>::Token,
            Object = <S as ParseState>::Object,
            Error = <S as ParseState>::Error,
        >,
    {
        use TransitionData::*;

        match self {
            Result(result, la) => {
                Result(result.map(ParseStatus::reflexivity), la)
            }
            Dead(la) => Dead(la),
        }
    }

    /// Transform inner types using [`Into`] such that they are compatible
    ///   with the superstate of `SB`.
    pub fn inner_into<SB: ParseState>(
        self,
    ) -> TransitionData<<SB as ParseState>::Super>
    where
        S: StitchableParseState<SB>,
    {
        use TransitionData::*;

        match self {
            Dead(la) => Dead(la.inner_into()),
            Result(result, la) => Result(
                match result {
                    Ok(status) => Ok(status.inner_into()),
                    // First convert the error into `SB::Error`,
                    //   and then `SP::Super::Error`
                    //     (which will be the same type if SB is closed).
                    Err(e) => Err(e.into().into()),
                },
                la.map(Lookahead::inner_into),
            ),
        }
    }
}

impl<S: ParseState> From<ParseStateResult<S>> for TransitionData<S> {
    fn from(result: ParseStateResult<S>) -> Self {
        Self::Result(result, None)
    }
}

/// A verb denoting a state transition.
///
/// This is typically instantiated directly by a [`ParseState`] to perform a
///   state transition in [`ParseState::parse_token`].
///
/// This newtype was created to produce clear, self-documenting code;
///   parsers can get confusing to read with all of the types involved,
///     so this provides a mental synchronization point.
///
/// This also provides some convenience methods to help remove boilerplate
///   and further improve code clarity.
#[derive(Debug, PartialEq, Eq)]
pub struct Transition<S: ParseState>(pub S);

impl<S: ParseState> Transition<S> {
    /// Transform a [`Transition`] into a transition of its superstate
    ///   [`ParseState::Super`].
    ///
    /// This is needed because trait specialization does not yet have a path
    /// to stabilization as of the time of writing,
    ///   and so `From<Transition<S>> for Transition<S::Super>` cannot be
    ///   implemented because those types overlap.
    pub fn into_super(self) -> Transition<S::Super> {
        match self {
            Transition(st) => Transition(st.into()),
        }
    }

    /// A state transition with corresponding data.
    ///
    /// This allows [`ParseState::parse_token`] to emit a parsed object and
    ///   corresponds to [`ParseStatus::Object`].
    pub fn ok<T>(self, obj: T) -> TransitionResult<S::Super>
    where
        T: Into<ParseStatus<S::Super>>,
    {
        TransitionResult(
            self.into_super(),
            TransitionData::Result(Ok(obj.into()), None),
        )
    }

    /// A transition with corresponding error.
    ///
    /// This indicates a parsing failure.
    /// The state ought to be suitable for error recovery.
    pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S::Super> {
        // The first error conversion is into that expected by S,
        //   which will _then_ (below) be converted into S::Super
        //   (if they're not the same).
        let err_s: S::Error = err.into();

        TransitionResult(
            self.into_super(),
            TransitionData::Result(Err(err_s.into()), None),
        )
    }

    /// A state transition with corresponding [`Result`].
    ///
    /// This translates the provided [`Result`] in a manner equivalent to
    ///   [`Transition::ok`] and [`Transition::err`].
    pub fn result<T, E>(
        self,
        result: Result<T, E>,
    ) -> TransitionResult<S::Super>
    where
        T: Into<ParseStatus<S>>,
        E: Into<S::Error>,
    {
        TransitionResult(
            self.into_super(),
            TransitionData::Result(
                result
                    .map(Into::into)
                    .map(ParseStatus::into_super)
                    .map_err(Into::<S::Error>::into)
                    .map_err(Into::into),
                None,
            ),
        )
    }

    /// A state transition indicating that more data is needed before an
    ///   object can be emitted.
    ///
    /// This corresponds to [`ParseStatus::Incomplete`].
    pub fn incomplete(self) -> TransitionResult<S::Super> {
        TransitionResult(
            self.into_super(),
            TransitionData::Result(Ok(ParseStatus::Incomplete), None),
        )
    }

    /// A state transition could not be performed and parsing will not
    ///   continue.
    ///
    /// A dead state represents an _accepting state_ that has no edge to
    ///   another state for the given `tok`.
    /// Rather than throw an error,
    ///   a parser uses this status to indicate that it has completed
    ///   parsing and that the token should be utilized elsewhere;
    ///     the provided token will be used as a token of [`Lookahead`].
    ///
    /// If a parser is not prepared to be finalized and needs to yield an
    ///   object first,
    ///     use [`Transition::result`] or other methods along with a token
    ///     of [`Lookahead`].
    pub fn dead(self, tok: S::Token) -> TransitionResult<S::Super> {
        TransitionResult(
            self.into_super(),
            TransitionData::Dead(Lookahead(tok)),
        )
    }

    /// Produce a map over the inner [`ParseState`] `S` to another
    ///   [`ParseState`] `SB`.
    ///
    /// Note that this is a curried associated function,
    ///   not a method.
    /// The intent is to maintain self-documentation by invoking it
    ///   qualified as [`Transition::fmap`].
    pub fn fmap<SB: ParseState>(
        f: impl Fn(S) -> SB,
    ) -> impl Fn(Transition<S>) -> Transition<SB> {
        move |Self(st)| Transition(f(st))
    }
}

impl<S: ParseState> From<S> for Transition<S> {
    fn from(st: S) -> Self {
        Self(st)
    }
}

impl<S: ClosedParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
    for TransitionResult<S>
{
    fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
        match residual {
            (st, result) => Self(st, TransitionData::Result(result, None)),
        }
    }
}

impl<S: ParseState> FromResidual<Result<Infallible, TransitionResult<S>>>
    for TransitionResult<S>
{
    fn from_residual(
        residual: Result<Infallible, TransitionResult<S>>,
    ) -> Self {
        match residual {
            Err(e) => e,
            // SAFETY: This match arm doesn't seem to be required in
            //   core::result::Result's FromResidual implementation,
            //     but as of 1.61 nightly it is here.
            // Since this is Infallable,
            //   it cannot occur.
            Ok(_) => unsafe { unreachable_unchecked() },
        }
    }
}

impl<S: ParseState> FromResidual<ControlFlow<TransitionResult<S>, Infallible>>
    for TransitionResult<S>
{
    fn from_residual(
        residual: ControlFlow<TransitionResult<S>, Infallible>,
    ) -> Self {
        match residual {
            ControlFlow::Break(result) => result,
            // SAFETY: Infallible, so cannot hit.
            ControlFlow::Continue(_) => unsafe { unreachable_unchecked() },
        }
    }
}

/// An object able to be used as data for a state [`Transition`].
///
/// This flips the usual order of things:
///   rather than using a method of [`Transition`] to provide data,
///     this starts with the data and produces a transition from it.
/// This is sometimes necessary to satisfy ownership/borrowing rules.
///
/// This trait simply removes boilerplate associated with storing
///   intermediate values and translating into the resulting type.
pub trait Transitionable<S: ParseState> {
    /// Perform a state transition to `S` using [`Self`] as the associated
    ///   data.
    ///
    /// This may be necessary to satisfy ownership/borrowing rules when
    ///   state data from `S` is used to compute [`Self`].
    fn transition(self, to: S) -> TransitionResult<S::Super>;
}

impl<S, E> Transitionable<S> for Result<ParseStatus<S>, E>
where
    S: ParseState,
    <S as ParseState>::Error: From<E>,
{
    fn transition(self, to: S) -> TransitionResult<S::Super> {
        Transition(to).result(self)
    }
}

impl<S, E> Transitionable<S> for Result<(), E>
where
    S: ParseState,
    <S as ParseState>::Error: From<E>,
{
    fn transition(self, to: S) -> TransitionResult<S::Super> {
        Transition(to).result(self.map(|_| ParseStatus::Incomplete))
    }
}

impl<S> Transitionable<S> for Option<S::Object>
where
    S: ParseState,
{
    fn transition(self, to: S) -> TransitionResult<S::Super> {
        match self {
            Some(obj) => Transition(to).ok(obj),
            None => Transition(to).incomplete(),
        }
    }
}

impl<S: ParseState> Transitionable<S> for ParseStatus<S> {
    fn transition(self, to: S) -> TransitionResult<S::Super> {
        Transition(to).ok(self.into_super())
    }
}
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								// Parsing automaton
 								//
-												Copyright year and name update

Ryan Specialty Group (RSG) rebranded to Ryan Specialty after its IPO.

											
										
										
											2023-01-17 23:09:25 -05:00
+								//  Copyright (C) 2014-2023 Ryan Specialty, LLC.
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								//
 								//  This file is part of TAME.
 								//
 								//  This program is free software: you can redistribute it and/or modify
 								//  it under the terms of the GNU General Public License as published by
 								//  the Free Software Foundation, either version 3 of the License, or
 								//  (at your option) any later version.
 								//
 								//  This program is distributed in the hope that it will be useful,
 								//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 								//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								//  GNU General Public License for more details.
 								//
 								//  You should have received a copy of the GNU General Public License
 								//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 								//! State transitions for parser automata.
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								use super::{
-												tamer: parse::state: Begin to tame delegation methods

These delegation methods have been a pain in my ass for quite some time, and
their lack of generalization makes the introduction of new delegation
methods (in the general sense, not necessarily trait methods) very tedious
and prone to inconsistencies.

I'm going to progressively refactor them in separate commits so it's clear
what I'm doing, primarily for future me to reference if need be.

DEV-13156

											
										
										
											2022-11-15 22:16:30 -05:00
+								    ClosedParseState, ParseState, ParseStateResult, ParseStatus,
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								    PartiallyStitchableParseState, StitchableParseState, Token,
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								};
-												tamer: parse::state::transition::TransitionResult::with_lookahead: {=>diagnostic_}panic!

As in previous commits, this continues to replace panics with
`diagnostic_panic!`, which provides much more useful information both for
debugging and to help the user possibly work around the problem.  And lets
the user know that it's not their fault, and it's a TAMER bug that should be
reported.

...am I going to rationalize it in each commit message?

DEV-13156

											
										
										
											2022-11-16 14:20:58 -05:00
+								use crate::{diagnose::Annotate, diagnostic_panic};
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								use std::{
 								    convert::Infallible,
 								    hint::unreachable_unchecked,
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								    ops::{ControlFlow, FromResidual},
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								};
 								#[cfg(doc)]
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								use super::Parser;
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
 								/// A state transition with associated data.
 								///
 								/// Conceptually,
 								///   imagine the act of a state transition producing data.
 								/// See [`Transition`] for convenience methods for producing this tuple.
 								///
 								/// Sometimes a parser is not able to complete the operation requested
 								///   based on the provided input token.
 								/// Since TAMER uses a streaming parsing framework that places strict
 								///   limits on control flow,
 								///     a single token can be returned as lookahead to indicate that the
 								///     token could not be parsed yet and should be provided once again
 								///     in place of the next token from the input stream.
 								/// This allows,
 								///   for example,
 								///   for multiple data to be emitted in response to a single token.
 								///
-												tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145

											
										
										
											2022-08-03 12:53:50 -04:00
+								/// If a [`ParseState`] is not a [`ClosedParseState`],
 								///   the transition will be to its superstate ([`ParseState::Super`]);
 								///     this conversion is performed automatically by the [`Transition`]
 								///     methods that produce [`TransitionResult`],
 								///       (such as [`Transition::ok`]).
 								///
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								/// This struct is opaque to ensure that critical invariants involving
 								///   transitions and lookahead are properly upheld;
 								///     callers must use the appropriate parsing APIs.
 								#[derive(Debug, PartialEq)]
 								pub struct TransitionResult<S: ParseState>(
 								    /// New parser state.
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    pub(in super::super) Transition<S>,
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    /// Result of the parsing operation.
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								    pub(in super::super) TransitionData<S>,
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								);
 								impl<S: ParseState> TransitionResult<S> {
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    pub fn into_super(self) -> TransitionResult<S::Super> {
 								        match self {
 								            Self(t, data) => {
 								                TransitionResult(t.into_super(), data.into_super())
 								            }
 								        }
 								    }
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    /// Indicate that this transition include a single token of lookahead,
 								    ///   which should be provided back to the parser in place of the
 								    ///   next token from the input stream.
-												tamer: parse: Abstract lookahead token replacement panic

There's no use in duplicating this in util::expand.

Lookahead tokens are one of the few invariants that I haven't taken the time
of enforcing using the type system, because it'd be quite a bit of work that
I do not have time for, and may not be worth it with changes that may make
the system less ergonomic.  Nonetheless, I do hope to address it at some
point in the (possibly-far) future.

If ever you encounter this diagnostic message, ask yourself how stable TAMER
otherwise is and how many other issues like this have been entirely
prevented through compile-time proofs using the type system.

DEV-13156

											
										
										
											2022-11-16 15:25:52 -05:00
+								    ///
 								    /// Panics
 								    /// ======
 								    /// A critical invariant of this system is that lookahead tokens must
 								    ///   never be discarded without explicit handling.
 								    /// If this [`TransitionResult`] contains an existing token of lookahead,
 								    ///   the system will panic when attempting to overwrite it.
 								    /// This represents a bug in the system,
 								    ///   since parsers should never permit this to occur.
 								    ///
 								    /// Ideally this will be enforced using the type system in the future.
-												tamer: asg::air: Expression parser

This delegates expression parsing to `AirExprAggregate`, in an effort to
both begin to simplify the understanding and maintenance of `AirAggregate`;
and allow for parser composition for template parsing.

This utilizes the prior changes for token sum types to precisely define the
subset of AIR tokens supported by the expression parser.  This differs from
prior approaches which delegated until a dead state, relying on runtime
information to determine if a parser has finished.  This allows us to
determine that statically.

I do want to be able to eliminate the dead state from the parser so we can
get rid of the `unreachable!`, but I need to move on; that's something I had
tried to do in the past too, which ended up adding a bit of complexity, and
I'll have to consider my options in the future, including whether the dead
state transition can be entirely eliminated in favor of the combination of
these sum types and recovery; the parsing framework decisions were made
while recovery was still an open question, at least in practice.

DEV-13708

											
										
										
											2023-03-06 12:03:55 -05:00
+								    pub fn with_lookahead<T: Into<S::Token>>(self, lookahead: T) -> Self {
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								        match self {
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								            Self(transition, TransitionData::Result(result, None)) => Self(
 								                transition,
-												tamer: asg::air: Expression parser

This delegates expression parsing to `AirExprAggregate`, in an effort to
both begin to simplify the understanding and maintenance of `AirAggregate`;
and allow for parser composition for template parsing.

This utilizes the prior changes for token sum types to precisely define the
subset of AIR tokens supported by the expression parser.  This differs from
prior approaches which delegated until a dead state, relying on runtime
information to determine if a parser has finished.  This allows us to
determine that statically.

I do want to be able to eliminate the dead state from the parser so we can
get rid of the `unreachable!`, but I need to move on; that's something I had
tried to do in the past too, which ended up adding a bit of complexity, and
I'll have to consider my options in the future, including whether the dead
state transition can be entirely eliminated in favor of the combination of
these sum types and recovery; the parsing framework decisions were made
while recovery was still an open question, at least in practice.

DEV-13708

											
										
										
											2023-03-06 12:03:55 -05:00
+								                TransitionData::Result(
 								                    result,
 								                    Some(Lookahead(lookahead.into())),
 								                ),
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								            ),
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
 								            // This represents a problem with the parser;
 								            //   we should never specify a lookahead token more than once.
 								            // This could be enforced statically with the type system if
 								            //   ever such a thing is deemed to be worth doing.
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								            Self(
 								                ..,
-												tamer: parse: Abstract lookahead token replacement panic

There's no use in duplicating this in util::expand.

Lookahead tokens are one of the few invariants that I haven't taken the time
of enforcing using the type system, because it'd be quite a bit of work that
I do not have time for, and may not be worth it with changes that may make
the system less ergonomic.  Nonetheless, I do hope to address it at some
point in the (possibly-far) future.

If ever you encounter this diagnostic message, ask yourself how stable TAMER
otherwise is and how many other issues like this have been entirely
prevented through compile-time proofs using the type system.

DEV-13156

											
										
										
											2022-11-16 15:25:52 -05:00
+								                TransitionData::Result(_, Some(prev))
 								                | TransitionData::Dead(prev),
 								            ) => prev.overwrite_panic(
-												tamer: asg::air: Expression parser

This delegates expression parsing to `AirExprAggregate`, in an effort to
both begin to simplify the understanding and maintenance of `AirAggregate`;
and allow for parser composition for template parsing.

This utilizes the prior changes for token sum types to precisely define the
subset of AIR tokens supported by the expression parser.  This differs from
prior approaches which delegated until a dead state, relying on runtime
information to determine if a parser has finished.  This allows us to
determine that statically.

I do want to be able to eliminate the dead state from the parser so we can
get rid of the `unreachable!`, but I need to move on; that's something I had
tried to do in the past too, which ended up adding a bit of complexity, and
I'll have to consider my options in the future, including whether the dead
state transition can be entirely eliminated in favor of the combination of
these sum types and recovery; the parsing framework decisions were made
while recovery was still an open question, at least in practice.

DEV-13708

											
										
										
											2023-03-06 12:03:55 -05:00
+								                lookahead.into(),
-												tamer: parse: Abstract lookahead token replacement panic

There's no use in duplicating this in util::expand.

Lookahead tokens are one of the few invariants that I haven't taken the time
of enforcing using the type system, because it'd be quite a bit of work that
I do not have time for, and may not be worth it with changes that may make
the system less ergonomic.  Nonetheless, I do hope to address it at some
point in the (possibly-far) future.

If ever you encounter this diagnostic message, ask yourself how stable TAMER
otherwise is and how many other issues like this have been entirely
prevented through compile-time proofs using the type system.

DEV-13156

											
										
										
											2022-11-16 15:25:52 -05:00
+								                "cannot overwrite unused lookahead token",
 								            ),
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								        }
 								    }
-												tamer: xir::parse::ele: Initial element parser generator concept

This begins generating parsers that are capable of parsing elements.  I need
to move on, so this abstraction isn't going to go as far as it could, but
let's see where it takes me.

This was the work that required the recent lookahead changes, which has been
detailed in previous commits.

This initial support is basic, but robust.  It supports parsing elements
with attributes and children, but it does not yet support the equivalent of
the Kleene star (`*`).  Such support will likely be added by supporting
parsers that are able to recurse on their own definition in tail position,
which will also require supporting parsers that do not add to the stack.

This generates parsers that, like all the other parsers, use enums to
provide a typed stack.  Stitched parsers produce a nested stack that is
always bounded in size.  Fortunately, expressions---which can nest
deeply---do not need to maintain ancestor context on the stack, and so this
should work fine; we can get away with this because XIRF ensures proper
nesting for us.  Statements that _do_ need to maintain such context are not
nested.

This also does not yet support emitting an object on closing tag, which
will be necessary for NIR, which will be a streaming IR that is "near" to
the source XML in structure.  This will then be used to lower into AIR for
the ASG, which gives structure needed for further analysis.

More information to come; I just want to get this committed to serve as a
mental synchronization point and clear my head, since I've been sitting on
these changes for so long and have to keep stashing them as I tumble down
rabbit holes covered in yak hair.

DEV-7145

											
										
										
											2022-07-13 13:55:32 -04:00
 								    /// Possibly indicate that this transition includes a single token of
 								    ///   lookahead.
 								    ///
 								    /// If the argument is [`None`],
 								    ///   this returns `self` unchanged.
 								    ///
 								    /// This is useful when working with the output of other parsers.
 								    /// See [`with_lookahead`](TransitionResult::with_lookahead) for more
 								    ///   information.
 								    pub(in super::super) fn maybe_with_lookahead(
 								        self,
 								        lookahead: Option<Lookahead<S::Token>>,
 								    ) -> Self {
 								        match lookahead {
 								            Some(Lookahead(lookahead)) => self.with_lookahead(lookahead),
 								            None => self,
 								        }
 								    }
-												tamer: parse::state: EchoState and TransitionResult constituent primitives

This beings to introduce more primitive operations to `TransitionResult` and
its components so that I can actually work with them without having to write
a bunch of concrete, boilerplate implementations.  This is demonstrated in
part by `EchoState` (which is nearly all boilerplate, but whose correctness
should be verifiable at a glance), which will be used going forward as a
basis for default implementations for parsers (e.g. expansion delegation).

DEV-13156

											
										
										
											2022-11-15 16:50:11 -05:00
 								    /// Map over both the [`Transition`] and its associated
 								    ///   [`TransitionData`],
 								    ///     translating to another [`ParseState`] `SB`.
 								    ///
 								    /// The inner [`Transition`]'s [`ParseState`] is mapped over for
 								    ///   convenience and brevity,
 								    ///     despite the verbose convention of mandating the use of
 								    ///     [`Transition`] elsewhere.
 								    /// However,
 								    ///   [`TransitionData`] is too complex of a structure,
 								    ///     so determining how to map over its data is left as an exercise
 								    ///     for `fdata`.
 								    pub(in super::super) fn bimap<SB: ParseState>(
 								        self,
 								        fst: impl FnOnce(S) -> SB,
 								        fdata: impl FnOnce(TransitionData<S>) -> TransitionData<SB>,
 								    ) -> TransitionResult<SB> {
 								        match self {
 								            Self(Transition(st), data) => {
 								                TransitionResult(Transition(fst(st)), fdata(data))
 								            }
 								        }
 								    }
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
 								    /// Conditionally map to a [`TransitionResult`] based on whether the
 								    ///   inner [`TransitionData`] represents a dead state transition
 								    ///     ([`TransitionData::Dead`]).
 								    ///
 								    /// Inner values are unwrapped before applying one of `fdead` or
 								    ///   `falive`.
 								    ///
 								    /// Lookahead is automatically propagated to the resulting
 								    ///   [`TransitionResult`],
 								    ///     ensuring that the token cannot be lost.
 								    /// Consequently,
 								    ///   it is important that the [`TransitionResult`] returned by `fdead`
 								    ///   or `falive` _does not contain a token of lookahead_,
 								    ///     otherwise the system will panic,
 								    ///       since two tokens of lookahead cannot be accommodated.
 								    /// This is not as bad as it sounds in practice,
 								    ///   since no token of input is provided to either of the branches,
 								    ///   and so would have to be manufactured by
 								    ///     (or have been previously stored by)
 								    ///     a calling parser.
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								    ///
 								    /// Ownership and Branching
 								    /// =======================
 								    /// At the time of writing (2023),
 								    ///   Rust's borrow checker cannot understand that the arguments to
 								    ///   `fdead` and `falive` are utilized in exclusive branches;
 								    ///     the borrowing happens at the call to `branch_dead` itself.
 								    /// The causes ownership problems when both branches want to utilize the
 								    ///   same data.
 								    ///
 								    /// To work around this limitation,
 								    ///   this method accepts an arbitrary branching context `bctx` that
 								    ///   will be passed to either `fdead` or `falive`;
 								    ///     this can be utilized in place of closure.
 								    pub fn branch_dead<SB: ParseState, C>(
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								        self,
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								        fdead: impl FnOnce(S, C) -> TransitionResult<<SB as ParseState>::Super>,
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								        falive: impl FnOnce(
 								            S,
 								            ParseStateResult<S>,
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								            C,
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								        ) -> TransitionResult<<SB as ParseState>::Super>,
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								        bctx: C,
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								    ) -> TransitionResult<<SB as ParseState>::Super>
 								    where
 								        S: PartiallyStitchableParseState<SB>,
 								    {
 								        self.branch_dead_la(
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								            |st, Lookahead(la), bctx| {
 								                fdead(st, bctx)
 								                    .with_lookahead(<SB as ParseState>::Token::from(la))
-												tamer: asg::air: Expression parser

This delegates expression parsing to `AirExprAggregate`, in an effort to
both begin to simplify the understanding and maintenance of `AirAggregate`;
and allow for parser composition for template parsing.

This utilizes the prior changes for token sum types to precisely define the
subset of AIR tokens supported by the expression parser.  This differs from
prior approaches which delegated until a dead state, relying on runtime
information to determine if a parser has finished.  This allows us to
determine that statically.

I do want to be able to eliminate the dead state from the parser so we can
get rid of the `unreachable!`, but I need to move on; that's something I had
tried to do in the past too, which ended up adding a bit of complexity, and
I'll have to consider my options in the future, including whether the dead
state transition can be entirely eliminated in favor of the combination of
these sum types and recovery; the parsing framework decisions were made
while recovery was still an open question, at least in practice.

DEV-13708

											
										
										
											2023-03-06 12:03:55 -05:00
+								            },
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								            |st, result, la, bctx| {
 								                falive(st, result, bctx)
-												tamer: parse: SP::Token: From<Self::Token>

Of course I would run into integration issues.  My foresight is lacking.

The purpose of this is to allow for type narrowing before passing data to a
more specialized ParseState, so that the other ParseState doesn't need to
concern itself with the entire domain of inputs that it doesn't need, and
repeat unnecessary narrowing.

For example, consider XIRF: it has an `Attr` variant, which holds an `Attr`
object.  We'll want to desugar that object.  It does not make sense to
require that the desugaring process accept `XirfToken` when we've already
narrowed it to an `Attr`---we should accept an Attr.

However, we run into a problem immediately: what happens with tokens that
bubble back up due to lookahead or errors?  Those tokens need to be
converted _back_ (widened).  Fortunately, widening is a much easier process
than narrowing---we can simply use `From`, as we do today so many other
places.

So, this still keeps the onus of narrowing on the caller, but for now that
seems most appropriate.  I suspect Rust would optimize away duplicate
checks, but that still leaves the maintenance concern---the two narrowings
could get out of sync, and that's not acceptable.

Unfortunately, this is just one of the problems with integration...

DEV-13156

											
										
										
											2022-11-17 23:52:28 -05:00
+								                    .maybe_with_lookahead(la.map(Lookahead::inner_into))
 								            },
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								            bctx,
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								        )
 								    }
 								    /// Conditionally map to a [`TransitionResult`] based on whether the
 								    ///   inner [`TransitionData`] represents a dead state transition
 								    ///     ([`TransitionData::Dead`]).
 								    ///
 								    /// This is like [`Self::branch_dead`],
 								    ///   but exposes the token of lookahead (if any) and therefore _puts
 								    ///   the onus on the caller to ensure that the token is not lost_.
 								    /// As such,
 								    ///   this method is private to the `parse` module.
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								    ///
 								    /// For information about the branch context `bctx`,
 								    ///   see the public-facing method [`Self::branch_dead`].
 								    pub(in super::super) fn branch_dead_la<SB: ParseState, C>(
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								        self,
 								        fdead: impl FnOnce(
 								            S,
 								            Lookahead<<S as ParseState>::Token>,
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								            C,
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								        ) -> TransitionResult<<SB as ParseState>::Super>,
 								        falive: impl FnOnce(
 								            S,
 								            ParseStateResult<S>,
 								            Option<Lookahead<<S as ParseState>::Token>>,
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								            C,
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								        ) -> TransitionResult<<SB as ParseState>::Super>,
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								        bctx: C,
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								    ) -> TransitionResult<<SB as ParseState>::Super>
 								    where
 								        S: PartiallyStitchableParseState<SB>,
 								    {
 								        use TransitionData::{Dead, Result};
 								        let Self(Transition(st), data) = self;
 								        match data {
-												parse::state::transition::TransitionResult::branch_dead: Add branch context

This works around limitations of Rust's borrow checker as of the time of
writing.  See the provided documentation for more information.

The branch context is not yet exposed to the `delegate` family of methods;
it will be added only as needed in the future.

DEV-13708

											
										
										
											2023-03-07 10:44:28 -05:00
+								            Dead(la) => fdead(st, la, bctx),
 								            Result(result, la) => falive(st, result, la, bctx),
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								        }
 								    }
-												tamer: parse::util::expand::StitchExpansion: Began transition from ParseState to method

My initial plan with expansion was to wrap a `PasteState` in another that
unwraps `Expansion` and converts into a `Dead` state, so that existing
`TransitionResult` stitching methods (`delegate`, specifically) could be
used.

But the desire to use that existing method was primarily because stitching
was a complex operation that was abstracted away _as part of the `delegate`
method_, which made writing new ones verbose and difficult.  Thus began the
previous commits to begin to move that responsibility elsewhere so that it
could be more composable.

This continues with that, introducing a new trait that will culminate in the
removal of a wrapping `ParseState` in favor of a stitching method.  The old
`StitchableExpansionState` is still used for tests, which demonstrates that
the boilerplate problem still exists despite improvements made here  These
will become more generalized in the future as I have time (and the
functional aspects of the code more formalized too, now that they're taking
shape).

The benefit of this is that we avoid having to warp our abstractions in ways
that don't make sense (use of a dead state transition) just to satisfy
existing APIs.  It also means that we do not need the boilerplate of a
`ParseState` any time we want to introduce this type of
stitching/delegation.  It also means that those methods can eventually be
extracted into more general traits in the future as well.

Ultimately, though, the two would have accomplished the same thing.  But the
difference is most emphasized in the _parent_---the actual stitching still
has to take place for desugaring in the attribute parser, and I'd like for
that abstraction to still be in terms of expansion.  But if I utilized
`StitchableExpansionState`, which converted into a dead state, I'd have to
either forego the expansion abstraction---which would make the parser even
more confusing---or I'd have to create _another_ abstraction around the dead
state, which would mean that I stripped one abstraction just to introduce
another one that's essentially the same thing.  It didn't feel right, but it
would have worked.

The use of `PhantomData` in `StitchableExpansionState` was also a sign that
something wasn't quite right, in terms of how the abstractions were
integrating with one-another.

And so here we are, as I struggle to wade my way through all of the yak
shavings and make any meaningful progress on this project, while others
continue to suffer due to slow build times.

I'm sorry.  Even if the system is improving.

DEV-13156

											
										
										
											2022-11-17 14:41:49 -05:00
 								    /// Conditionally map to a [`TransitionResult`] based on whether the
 								    ///   inner [`TransitionData`] represents an object.
 								    pub(in super::super) fn branch_obj_la<SB: ParseState>(
 								        self,
 								        fobj: impl FnOnce(
 								            Transition<S>,
 								            <S as ParseState>::Object,
 								            Option<Lookahead<<S as ParseState>::Token>>,
 								        ) -> TransitionResult<<SB as ParseState>::Super>,
 								        fother: impl FnOnce(Transition<S>) -> Transition<SB>,
 								    ) -> TransitionResult<<SB as ParseState>::Super>
 								    where
 								        S: PartiallyStitchableParseState<SB>,
 								    {
 								        use ParseStatus::{Incomplete, Object};
 								        use TransitionData::{Dead, Result};
 								        let Self(st, data) = self;
 								        match data {
 								            Result(Ok(Object(obj)), la) => fobj(st, obj, la).into_super(),
 								            // Can't use `TransitionData::inner_into` since we only have a
 								            //   `PartiallyStitchableParseState`,
 								            //     and `into_inner` requires being able to convert the inner
 								            //     object that we handled above.
-												tamer: parse: SP::Token: From<Self::Token>

Of course I would run into integration issues.  My foresight is lacking.

The purpose of this is to allow for type narrowing before passing data to a
more specialized ParseState, so that the other ParseState doesn't need to
concern itself with the entire domain of inputs that it doesn't need, and
repeat unnecessary narrowing.

For example, consider XIRF: it has an `Attr` variant, which holds an `Attr`
object.  We'll want to desugar that object.  It does not make sense to
require that the desugaring process accept `XirfToken` when we've already
narrowed it to an `Attr`---we should accept an Attr.

However, we run into a problem immediately: what happens with tokens that
bubble back up due to lookahead or errors?  Those tokens need to be
converted _back_ (widened).  Fortunately, widening is a much easier process
than narrowing---we can simply use `From`, as we do today so many other
places.

So, this still keeps the onus of narrowing on the caller, but for now that
seems most appropriate.  I suspect Rust would optimize away duplicate
checks, but that still leaves the maintenance concern---the two narrowings
could get out of sync, and that's not acceptable.

Unfortunately, this is just one of the problems with integration...

DEV-13156

											
										
										
											2022-11-17 23:52:28 -05:00
+								            Result(Ok(Incomplete), la) => fother(st)
 								                .incomplete()
 								                .maybe_with_lookahead(la.map(Lookahead::inner_into)),
 								            Result(Err(e), la) => fother(st)
 								                .err(e)
 								                .maybe_with_lookahead(la.map(Lookahead::inner_into)),
 								            Dead(Lookahead(la)) => fother(st).dead(la.into()),
-												tamer: parse::util::expand::StitchExpansion: Began transition from ParseState to method

My initial plan with expansion was to wrap a `PasteState` in another that
unwraps `Expansion` and converts into a `Dead` state, so that existing
`TransitionResult` stitching methods (`delegate`, specifically) could be
used.

But the desire to use that existing method was primarily because stitching
was a complex operation that was abstracted away _as part of the `delegate`
method_, which made writing new ones verbose and difficult.  Thus began the
previous commits to begin to move that responsibility elsewhere so that it
could be more composable.

This continues with that, introducing a new trait that will culminate in the
removal of a wrapping `ParseState` in favor of a stitching method.  The old
`StitchableExpansionState` is still used for tests, which demonstrates that
the boilerplate problem still exists despite improvements made here  These
will become more generalized in the future as I have time (and the
functional aspects of the code more formalized too, now that they're taking
shape).

The benefit of this is that we avoid having to warp our abstractions in ways
that don't make sense (use of a dead state transition) just to satisfy
existing APIs.  It also means that we do not need the boilerplate of a
`ParseState` any time we want to introduce this type of
stitching/delegation.  It also means that those methods can eventually be
extracted into more general traits in the future as well.

Ultimately, though, the two would have accomplished the same thing.  But the
difference is most emphasized in the _parent_---the actual stitching still
has to take place for desugaring in the attribute parser, and I'd like for
that abstraction to still be in terms of expansion.  But if I utilized
`StitchableExpansionState`, which converted into a dead state, I'd have to
either forego the expansion abstraction---which would make the parser even
more confusing---or I'd have to create _another_ abstraction around the dead
state, which would mean that I stripped one abstraction just to introduce
another one that's essentially the same thing.  It didn't feel right, but it
would have worked.

The use of `PhantomData` in `StitchableExpansionState` was also a sign that
something wasn't quite right, in terms of how the abstractions were
integrating with one-another.

And so here we are, as I struggle to wade my way through all of the yak
shavings and make any meaningful progress on this project, while others
continue to suffer due to slow build times.

I'm sorry.  Even if the system is improving.

DEV-13156

											
										
										
											2022-11-17 14:41:49 -05:00
+								        }
 								    }
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								}
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								/// Token to use as a lookahead token in place of the next token from the
 								///   input stream.
 								#[derive(Debug, PartialEq)]
-												tamer: parse::transition::Lookahead: ParseState=>Token type param

Having the lookahead token generic over the `ParseState` was a pain in the
ass for stitching, since they shared the same token type but not the same
parser.  I don't expect there to be any need to be able to infer other
parser-related types for a token of lookahead, so I'd rather just make my
life easier until such a thing is needed.

DEV-7145

											
										
										
											2022-07-13 10:13:35 -04:00
+								pub struct Lookahead<T: Token>(pub(in super::super) T);
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
-												tamer: parse: Abstract lookahead token replacement panic

There's no use in duplicating this in util::expand.

Lookahead tokens are one of the few invariants that I haven't taken the time
of enforcing using the type system, because it'd be quite a bit of work that
I do not have time for, and may not be worth it with changes that may make
the system less ergonomic.  Nonetheless, I do hope to address it at some
point in the (possibly-far) future.

If ever you encounter this diagnostic message, ask yourself how stable TAMER
otherwise is and how many other issues like this have been entirely
prevented through compile-time proofs using the type system.

DEV-13156

											
										
										
											2022-11-16 15:25:52 -05:00
+								impl<T: Token> Lookahead<T> {
 								    /// Panic with diagnostic information about a lookup token and its
 								    ///   attempted replacement.
 								    ///
 								    /// A critical system invariant is that lookahead tokens must never be
 								    ///   lost without explicit handling.
 								    /// Since this is not yet enforced using the type system,
 								    ///   these checks must be performed at runtime.
 								    pub(in super::super) fn overwrite_panic(self, other: T, msg: &str) -> ! {
 								        let Self(prev) = self;
 								        let desc = vec![
 								            prev.span().note("this token of lookahead would be lost"),
 								            other.span().internal_error(
 								                "attempting to replace previous lookahead token \
 								                   with this one",
 								            ),
 								        ];
 								        diagnostic_panic!(desc, "{msg}",)
 								    }
-												tamer: parse: SP::Token: From<Self::Token>

Of course I would run into integration issues.  My foresight is lacking.

The purpose of this is to allow for type narrowing before passing data to a
more specialized ParseState, so that the other ParseState doesn't need to
concern itself with the entire domain of inputs that it doesn't need, and
repeat unnecessary narrowing.

For example, consider XIRF: it has an `Attr` variant, which holds an `Attr`
object.  We'll want to desugar that object.  It does not make sense to
require that the desugaring process accept `XirfToken` when we've already
narrowed it to an `Attr`---we should accept an Attr.

However, we run into a problem immediately: what happens with tokens that
bubble back up due to lookahead or errors?  Those tokens need to be
converted _back_ (widened).  Fortunately, widening is a much easier process
than narrowing---we can simply use `From`, as we do today so many other
places.

So, this still keeps the onus of narrowing on the caller, but for now that
seems most appropriate.  I suspect Rust would optimize away duplicate
checks, but that still leaves the maintenance concern---the two narrowings
could get out of sync, and that's not acceptable.

Unfortunately, this is just one of the problems with integration...

DEV-13156

											
										
										
											2022-11-17 23:52:28 -05:00
 								    pub fn inner_into<U: Token>(self) -> Lookahead<U>
 								    where
 								        T: Into<U>,
 								    {
 								        match self {
 								            Self(tok) => Lookahead(tok.into()),
 								        }
 								    }
-												tamer: parse: Abstract lookahead token replacement panic

There's no use in duplicating this in util::expand.

Lookahead tokens are one of the few invariants that I haven't taken the time
of enforcing using the type system, because it'd be quite a bit of work that
I do not have time for, and may not be worth it with changes that may make
the system less ergonomic.  Nonetheless, I do hope to address it at some
point in the (possibly-far) future.

If ever you encounter this diagnostic message, ask yourself how stable TAMER
otherwise is and how many other issues like this have been entirely
prevented through compile-time proofs using the type system.

DEV-13156

											
										
										
											2022-11-16 15:25:52 -05:00
+								}
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								/// Information about the state transition.
 								///
 								/// Note: Ideally a state wouldn't even be required for
 								///   [`Dead`](TransitionData::Dead),
 								///     but [`ParseState`] does not implement [`Default`] and [`Parser`]
 								///     requires _some_ state exist.
 								#[derive(Debug, PartialEq)]
 								pub(in super::super) enum TransitionData<S: ParseState> {
 								    /// State transition was successful or not attempted,
 								    ///   with an optional token of [`Lookahead`].
 								    ///
 								    /// Note that a successful state transition _does not_ imply a
 								    ///   successful [`ParseStateResult`]---the
 								    ///     parser may choose to successfully transition into an error
 								    ///     recovery state to accommodate future tokens.
-												tamer: parse::transition::Lookahead: ParseState=>Token type param

Having the lookahead token generic over the `ParseState` was a pain in the
ass for stitching, since they shared the same token type but not the same
parser.  I don't expect there to be any need to be able to infer other
parser-related types for a token of lookahead, so I'd rather just make my
life easier until such a thing is needed.

DEV-7145

											
										
										
											2022-07-13 10:13:35 -04:00
+								    Result(ParseStateResult<S>, Option<Lookahead<S::Token>>),
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
 								    /// No valid state transition exists from the current state for the
 								    ///   given input token,
 								    ///     which is returned as a token of [`Lookahead`].
 								    ///
 								    /// A dead state is an accepting state that has no state transition for
 								    ///   the given token.
 								    /// This could simply mean that the parser has completed its job and
 								    ///   that control must be returned to a parent context.
 								    /// Note that this differs from an error state,
 								    ///   where a parser is unable to reach an accepting state because it
 								    ///   received unexpected input.
 								    ///
 								    /// Note that the parser may still choose to perform a state transition
 								    ///   for the sake of error recovery,
 								    ///     but note that the dead state is generally interpreted to mean
 								    ///       "I have no further work that I am able to perform"
 								    ///       and may lead to finalization of the parser.
 								    /// If a parser intends to do additional work,
 								    ///   it should return an error instead via [`TransitionData::Result`].
-												tamer: parse::transition::Lookahead: ParseState=>Token type param

Having the lookahead token generic over the `ParseState` was a pain in the
ass for stitching, since they shared the same token type but not the same
parser.  I don't expect there to be any need to be able to infer other
parser-related types for a token of lookahead, so I'd rather just make my
life easier until such a thing is needed.

DEV-7145

											
										
										
											2022-07-13 10:13:35 -04:00
+								    Dead(Lookahead<S::Token>),
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								}
-												tamer: parser::Parser: cfg(test) tracing

This produces useful parse traces that are output as part of a failing test
case.  The parser generator macros can be a bit confusing to deal with when
things go wrong, so this helps to clarify matters.

This is _not_ intended to be machine-readable, but it does show that it
would be possible to generate machine-readable output to visualize the
entire lowering pipeline.  Perhaps something for the future.

I left these inline in Parser::feed_tok because they help to elucidate what
is going on, just by reading what the trace would output---that is, it helps
to make the method more self-documenting, albeit a tad bit more
verbose.  But with that said, it should probably be extracted at some point;
I don't want this to set a precedent where composition is feasible.

Here's an example from test cases:

  [Parser::feed_tok] (input IR: XIRF)
  |  ==> Parser before tok is parsing attributes for `package`.
  |   |  Attrs_(SutAttrsState_ { ___ctx: (QName(None, LocalPart(NCName(SymbolId(46 "package")))), OpenSpan(Span { len: 0, offset: 0, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10)), ___done: false })
  |
  |  ==> XIRF tok: `<unexpected>`
  |   |  Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))
  |
  |  ==> Parser after tok is expecting opening tag `<classify>`.
  |   |  ChildA(Expecting_)
  |   |  Lookahead: Some(Lookahead(Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))))
  = note: this trace was output as a debugging aid because `cfg(test)`.

  [Parser::feed_tok] (input IR: XIRF)
  |  ==> Parser before tok is expecting opening tag `<classify>`.
  |   |  ChildA(Expecting_)
  |
  |  ==> XIRF tok: `<unexpected>`
  |   |  Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))
  |
  |  ==> Parser after tok is attempting to recover by ignoring element with unexpected name `unexpected` (expected `classify`).
  |   |  ChildA(RecoverEleIgnore_(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1)))
  |   |  Lookahead: None
  = note: this trace was output as a debugging aid because `cfg(test)`.

DEV-7145

											
										
										
											2022-07-18 14:32:34 -04:00
+								impl<S: ParseState> TransitionData<S> {
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    pub fn into_super(self) -> TransitionData<S::Super> {
 								        match self {
 								            Self::Result(st_result, ola) => TransitionData::Result(
 								                st_result.map(ParseStatus::into_super).map_err(|e| e.into()),
 								                ola,
 								            ),
 								            Self::Dead(la) => TransitionData::Dead(la),
 								        }
 								    }
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								    /// Associate this [`TransitionData`] with a state transition for a
 								    ///   [`ParseState`] `SB`,
 								    ///     translating from `S` if necessary.
 								    pub fn transition<SB: ParseState>(
 								        self,
 								        to: impl Into<Transition<SB>>,
 								    ) -> TransitionResult<<SB as ParseState>::Super>
 								    where
 								        S: StitchableParseState<SB>,
 								    {
 								        TransitionResult(to.into().into_super(), self.inner_into())
 								    }
-												tamer: parser::Parser: cfg(test) tracing

This produces useful parse traces that are output as part of a failing test
case.  The parser generator macros can be a bit confusing to deal with when
things go wrong, so this helps to clarify matters.

This is _not_ intended to be machine-readable, but it does show that it
would be possible to generate machine-readable output to visualize the
entire lowering pipeline.  Perhaps something for the future.

I left these inline in Parser::feed_tok because they help to elucidate what
is going on, just by reading what the trace would output---that is, it helps
to make the method more self-documenting, albeit a tad bit more
verbose.  But with that said, it should probably be extracted at some point;
I don't want this to set a precedent where composition is feasible.

Here's an example from test cases:

  [Parser::feed_tok] (input IR: XIRF)
  |  ==> Parser before tok is parsing attributes for `package`.
  |   |  Attrs_(SutAttrsState_ { ___ctx: (QName(None, LocalPart(NCName(SymbolId(46 "package")))), OpenSpan(Span { len: 0, offset: 0, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10)), ___done: false })
  |
  |  ==> XIRF tok: `<unexpected>`
  |   |  Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))
  |
  |  ==> Parser after tok is expecting opening tag `<classify>`.
  |   |  ChildA(Expecting_)
  |   |  Lookahead: Some(Lookahead(Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))))
  = note: this trace was output as a debugging aid because `cfg(test)`.

  [Parser::feed_tok] (input IR: XIRF)
  |  ==> Parser before tok is expecting opening tag `<classify>`.
  |   |  ChildA(Expecting_)
  |
  |  ==> XIRF tok: `<unexpected>`
  |   |  Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))
  |
  |  ==> Parser after tok is attempting to recover by ignoring element with unexpected name `unexpected` (expected `classify`).
  |   |  ChildA(RecoverEleIgnore_(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1)))
  |   |  Lookahead: None
  = note: this trace was output as a debugging aid because `cfg(test)`.

DEV-7145

											
										
										
											2022-07-18 14:32:34 -04:00
+								    /// Reference to the token of lookahead,
 								    ///   if any.
 								    pub(in super::super) fn lookahead_ref(
 								        &self,
 								    ) -> Option<&Lookahead<S::Token>> {
 								        match self {
 								            TransitionData::Dead(ref la)
 								            | TransitionData::Result(_, Some(ref la)) => Some(la),
 								            _ => None,
 								        }
 								    }
-												tamer: parse::parser::Parser: Include errors in parse trace

Because of recovery, the trace otherwise paints a really confusing-looking
picture when given unexpected input.

This is large enough now that it really ought to be extracted from
`feed_tok`, but I'll wait to see how this evolves further.  I considered
adding color too, but it's not yet clear to me that the visual noise will be
all that helpful.

DEV-7145

											
										
										
											2022-07-21 22:32:58 -04:00
-												tamer: parse::parser: Include object in parser trace

This information is likely redundant in a lowering pipeline, but is more
useful outside of such a pipeline.  It's also more clear.

`Object` does not implement `Display`, though, because that's too burdensome
for how it's currently used.  Many `Object`s are also `Token`s though and,
if fed to another `Parser` for lowering, it'll get `Display::fmt`'d.

DEV-7145

											
										
										
											2022-07-22 12:51:05 -04:00
+								    /// Reference to parsed object,
 								    ///   if any.
 								    pub(in super::super) fn object_ref(&self) -> Option<&S::Object> {
 								        match self {
 								            TransitionData::Result(Ok(ParseStatus::Object(obj)), _) => {
 								                Some(obj)
 								            }
 								            _ => None,
 								        }
 								    }
-												tamer: parse::parser::Parser: Include errors in parse trace

Because of recovery, the trace otherwise paints a really confusing-looking
picture when given unexpected input.

This is large enough now that it really ought to be extracted from
`feed_tok`, but I'll wait to see how this evolves further.  I considered
adding color too, but it's not yet clear to me that the visual noise will be
all that helpful.

DEV-7145

											
										
										
											2022-07-21 22:32:58 -04:00
+								    /// Reference to parsing error,
 								    ///   if any.
 								    pub(in super::super) fn err_ref(&self) -> Option<&S::Error> {
 								        match self {
 								            TransitionData::Result(Err(e), _) => Some(e),
 								            _ => None,
 								        }
 								    }
-												tamer: parse::util: Introduce StitchableExpansionState

This parser really just allows me to continue developing the NIR
interpolation system using `Expansion` terminology, and avoid having to use
dead states in tests.  This allows for the appropriate level of abstraction
to be used in isolation, and then only be stripped when stitching is
necessary.

Future commits will show how this is actually integrated and may introduce
additional abstraction to help.

DEV-13156

											
										
										
											2022-11-14 16:15:52 -05:00
-												tamer: parse::state: EchoState and TransitionResult constituent primitives

This beings to introduce more primitive operations to `TransitionResult` and
its components so that I can actually work with them without having to write
a bunch of concrete, boilerplate implementations.  This is demonstrated in
part by `EchoState` (which is nearly all boilerplate, but whose correctness
should be verifiable at a glance), which will be used going forward as a
basis for default implementations for parsers (e.g. expansion delegation).

DEV-13156

											
										
										
											2022-11-15 16:50:11 -05:00
+								    /// Asserts a reflexive relationship between the [`TransitionData`] of
 								    ///   our own [`ParseState`] `S` and a target [`ParseState`] `SB`.
 								    ///
 								    /// This is intended not just for translating between types,
 								    ///   but also documentation,
 								    ///   as an affirmative way to state "these two [`ParseState`]s
 								    ///     represent the same underlying data".
 								    /// For example,
 								    ///   this may be appropriate when `SB` wraps `S`.
 								    ///
 								    /// This is a stronger statement than saying two [`ParseState`]s are
 								    ///   _compatible_ withe one-another in some way,
 								    ///     which is the assertion made by
 								    ///     [`StitchableParseState`](super::StitchableParseState) and may
 								    ///     require data to be translated.
 								    ///
 								    /// While this method refers to the mathematical reflexive relation,
 								    ///   its exact name originates from the Coq tactic.
 								    pub fn reflexivity<SB: ParseState>(self) -> TransitionData<SB>
 								    where
 								        SB: ParseState<
 								            Token = <S as ParseState>::Token,
 								            Object = <S as ParseState>::Object,
 								            Error = <S as ParseState>::Error,
 								        >,
 								    {
 								        use TransitionData::*;
 								        match self {
 								            Result(result, la) => {
 								                Result(result.map(ParseStatus::reflexivity), la)
 								            }
 								            Dead(la) => Dead(la),
 								        }
 								    }
-												tamer: parse::state: Begin to tame delegation methods

These delegation methods have been a pain in my ass for quite some time, and
their lack of generalization makes the introduction of new delegation
methods (in the general sense, not necessarily trait methods) very tedious
and prone to inconsistencies.

I'm going to progressively refactor them in separate commits so it's clear
what I'm doing, primarily for future me to reference if need be.

DEV-13156

											
										
										
											2022-11-15 22:16:30 -05:00
 								    /// Transform inner types using [`Into`] such that they are compatible
 								    ///   with the superstate of `SB`.
 								    pub fn inner_into<SB: ParseState>(
 								        self,
 								    ) -> TransitionData<<SB as ParseState>::Super>
 								    where
 								        S: StitchableParseState<SB>,
 								    {
 								        use TransitionData::*;
 								        match self {
-												tamer: parse: SP::Token: From<Self::Token>

Of course I would run into integration issues.  My foresight is lacking.

The purpose of this is to allow for type narrowing before passing data to a
more specialized ParseState, so that the other ParseState doesn't need to
concern itself with the entire domain of inputs that it doesn't need, and
repeat unnecessary narrowing.

For example, consider XIRF: it has an `Attr` variant, which holds an `Attr`
object.  We'll want to desugar that object.  It does not make sense to
require that the desugaring process accept `XirfToken` when we've already
narrowed it to an `Attr`---we should accept an Attr.

However, we run into a problem immediately: what happens with tokens that
bubble back up due to lookahead or errors?  Those tokens need to be
converted _back_ (widened).  Fortunately, widening is a much easier process
than narrowing---we can simply use `From`, as we do today so many other
places.

So, this still keeps the onus of narrowing on the caller, but for now that
seems most appropriate.  I suspect Rust would optimize away duplicate
checks, but that still leaves the maintenance concern---the two narrowings
could get out of sync, and that's not acceptable.

Unfortunately, this is just one of the problems with integration...

DEV-13156

											
										
										
											2022-11-17 23:52:28 -05:00
+								            Dead(la) => Dead(la.inner_into()),
-												tamer: parse::state: Begin to tame delegation methods

These delegation methods have been a pain in my ass for quite some time, and
their lack of generalization makes the introduction of new delegation
methods (in the general sense, not necessarily trait methods) very tedious
and prone to inconsistencies.

I'm going to progressively refactor them in separate commits so it's clear
what I'm doing, primarily for future me to reference if need be.

DEV-13156

											
										
										
											2022-11-15 22:16:30 -05:00
+								            Result(result, la) => Result(
 								                match result {
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								                    Ok(status) => Ok(status.inner_into()),
-												tamer: parse::state: Begin to tame delegation methods

These delegation methods have been a pain in my ass for quite some time, and
their lack of generalization makes the introduction of new delegation
methods (in the general sense, not necessarily trait methods) very tedious
and prone to inconsistencies.

I'm going to progressively refactor them in separate commits so it's clear
what I'm doing, primarily for future me to reference if need be.

DEV-13156

											
										
										
											2022-11-15 22:16:30 -05:00
+								                    // First convert the error into `SB::Error`,
 								                    //   and then `SP::Super::Error`
 								                    //     (which will be the same type if SB is closed).
 								                    Err(e) => Err(e.into().into()),
 								                },
-												tamer: parse: SP::Token: From<Self::Token>

Of course I would run into integration issues.  My foresight is lacking.

The purpose of this is to allow for type narrowing before passing data to a
more specialized ParseState, so that the other ParseState doesn't need to
concern itself with the entire domain of inputs that it doesn't need, and
repeat unnecessary narrowing.

For example, consider XIRF: it has an `Attr` variant, which holds an `Attr`
object.  We'll want to desugar that object.  It does not make sense to
require that the desugaring process accept `XirfToken` when we've already
narrowed it to an `Attr`---we should accept an Attr.

However, we run into a problem immediately: what happens with tokens that
bubble back up due to lookahead or errors?  Those tokens need to be
converted _back_ (widened).  Fortunately, widening is a much easier process
than narrowing---we can simply use `From`, as we do today so many other
places.

So, this still keeps the onus of narrowing on the caller, but for now that
seems most appropriate.  I suspect Rust would optimize away duplicate
checks, but that still leaves the maintenance concern---the two narrowings
could get out of sync, and that's not acceptable.

Unfortunately, this is just one of the problems with integration...

DEV-13156

											
										
										
											2022-11-17 23:52:28 -05:00
+								                la.map(Lookahead::inner_into),
-												tamer: parse::state: Begin to tame delegation methods

These delegation methods have been a pain in my ass for quite some time, and
their lack of generalization makes the introduction of new delegation
methods (in the general sense, not necessarily trait methods) very tedious
and prone to inconsistencies.

I'm going to progressively refactor them in separate commits so it's clear
what I'm doing, primarily for future me to reference if need be.

DEV-13156

											
										
										
											2022-11-15 22:16:30 -05:00
+								            ),
 								        }
 								    }
-												tamer: parser::Parser: cfg(test) tracing

This produces useful parse traces that are output as part of a failing test
case.  The parser generator macros can be a bit confusing to deal with when
things go wrong, so this helps to clarify matters.

This is _not_ intended to be machine-readable, but it does show that it
would be possible to generate machine-readable output to visualize the
entire lowering pipeline.  Perhaps something for the future.

I left these inline in Parser::feed_tok because they help to elucidate what
is going on, just by reading what the trace would output---that is, it helps
to make the method more self-documenting, albeit a tad bit more
verbose.  But with that said, it should probably be extracted at some point;
I don't want this to set a precedent where composition is feasible.

Here's an example from test cases:

  [Parser::feed_tok] (input IR: XIRF)
  |  ==> Parser before tok is parsing attributes for `package`.
  |   |  Attrs_(SutAttrsState_ { ___ctx: (QName(None, LocalPart(NCName(SymbolId(46 "package")))), OpenSpan(Span { len: 0, offset: 0, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10)), ___done: false })
  |
  |  ==> XIRF tok: `<unexpected>`
  |   |  Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))
  |
  |  ==> Parser after tok is expecting opening tag `<classify>`.
  |   |  ChildA(Expecting_)
  |   |  Lookahead: Some(Lookahead(Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))))
  = note: this trace was output as a debugging aid because `cfg(test)`.

  [Parser::feed_tok] (input IR: XIRF)
  |  ==> Parser before tok is expecting opening tag `<classify>`.
  |   |  ChildA(Expecting_)
  |
  |  ==> XIRF tok: `<unexpected>`
  |   |  Open(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1))
  |
  |  ==> Parser after tok is attempting to recover by ignoring element with unexpected name `unexpected` (expected `classify`).
  |   |  ChildA(RecoverEleIgnore_(QName(None, LocalPart(NCName(SymbolId(82 "unexpected")))), OpenSpan(Span { len: 0, offset: 1, ctx: Context(SymbolId(1 "#!DUMMY")) }, 10), Depth(1)))
  |   |  Lookahead: None
  = note: this trace was output as a debugging aid because `cfg(test)`.

DEV-7145

											
										
										
											2022-07-18 14:32:34 -04:00
+								}
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								impl<S: ParseState> From<ParseStateResult<S>> for TransitionData<S> {
 								    fn from(result: ParseStateResult<S>) -> Self {
 								        Self::Result(result, None)
 								    }
 								}
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								/// A verb denoting a state transition.
 								///
 								/// This is typically instantiated directly by a [`ParseState`] to perform a
 								///   state transition in [`ParseState::parse_token`].
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								///
 								/// This newtype was created to produce clear, self-documenting code;
 								///   parsers can get confusing to read with all of the types involved,
 								///     so this provides a mental synchronization point.
 								///
 								/// This also provides some convenience methods to help remove boilerplate
 								///   and further improve code clarity.
 								#[derive(Debug, PartialEq, Eq)]
 								pub struct Transition<S: ParseState>(pub S);
 								impl<S: ParseState> Transition<S> {
-												tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145

											
										
										
											2022-08-03 12:53:50 -04:00
+								    /// Transform a [`Transition`] into a transition of its superstate
 								    ///   [`ParseState::Super`].
 								    ///
 								    /// This is needed because trait specialization does not yet have a path
 								    /// to stabilization as of the time of writing,
 								    ///   and so `From<Transition<S>> for Transition<S::Super>` cannot be
 								    ///   implemented because those types overlap.
 								    pub fn into_super(self) -> Transition<S::Super> {
 								        match self {
 								            Transition(st) => Transition(st.into()),
 								        }
 								    }
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    /// A state transition with corresponding data.
 								    ///
 								    /// This allows [`ParseState::parse_token`] to emit a parsed object and
 								    ///   corresponds to [`ParseStatus::Object`].
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    pub fn ok<T>(self, obj: T) -> TransitionResult<S::Super>
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    where
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								        T: Into<ParseStatus<S::Super>>,
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    {
-												tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145

											
										
										
											2022-08-03 12:53:50 -04:00
+								        TransitionResult(
 								            self.into_super(),
 								            TransitionData::Result(Ok(obj.into()), None),
 								        )
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    }
 								    /// A transition with corresponding error.
 								    ///
 								    /// This indicates a parsing failure.
 								    /// The state ought to be suitable for error recovery.
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S::Super> {
 								        // The first error conversion is into that expected by S,
 								        //   which will _then_ (below) be converted into S::Super
 								        //   (if they're not the same).
 								        let err_s: S::Error = err.into();
-												tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145

											
										
										
											2022-08-03 12:53:50 -04:00
+								        TransitionResult(
 								            self.into_super(),
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								            TransitionData::Result(Err(err_s.into()), None),
-												tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145

											
										
										
											2022-08-03 12:53:50 -04:00
+								        )
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    }
 								    /// A state transition with corresponding [`Result`].
 								    ///
 								    /// This translates the provided [`Result`] in a manner equivalent to
 								    ///   [`Transition::ok`] and [`Transition::err`].
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    pub fn result<T, E>(
 								        self,
 								        result: Result<T, E>,
 								    ) -> TransitionResult<S::Super>
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    where
 								        T: Into<ParseStatus<S>>,
 								        E: Into<S::Error>,
 								    {
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								        TransitionResult(
-												tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145

											
										
										
											2022-08-03 12:53:50 -04:00
+								            self.into_super(),
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								            TransitionData::Result(
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								                result
 								                    .map(Into::into)
 								                    .map(ParseStatus::into_super)
 								                    .map_err(Into::<S::Error>::into)
 								                    .map_err(Into::into),
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								                None,
 								            ),
 								        )
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    }
 								    /// A state transition indicating that more data is needed before an
 								    ///   object can be emitted.
 								    ///
 								    /// This corresponds to [`ParseStatus::Incomplete`].
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    pub fn incomplete(self) -> TransitionResult<S::Super> {
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								        TransitionResult(
-												tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145

											
										
										
											2022-08-03 12:53:50 -04:00
+								            self.into_super(),
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								            TransitionData::Result(Ok(ParseStatus::Incomplete), None),
 								        )
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    }
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								    /// A state transition could not be performed and parsing will not
 								    ///   continue.
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    ///
-												tamer: Replace ParseStatus::Dead with generic lookahead

Oh what a tortured journey.  I had originally tried to avoid formalizing
lookahead for all parsers by pretending that it was only needed for dead
state transitions (that is---states that have no transitions for a given
input token), but then I needed to yield information for aggregation.  So I
added the ability to override the token for `Dead` to yield that, in
addition to the token.  But then I also needed to yield lookahead for error
conditions.  It was a mess that didn't make sense.

This eliminates `ParseStatus::Dead` entirely and fully integrates the
lookahead token in `Parser` that was previously implemented.

Notably, the lookahead token is encapsulated in `TransitionResult` and
unavailable to `ParseState` implementations, forcing them to rely on
`Parser` for recursion.  This not only prevents `ParseState` from recursing,
but also simplifies delegation by removing the need to manually handle
tokens of lookahead.

The awkward case here is XIRT, which does not follow the streaming parsing
convention, because it was conceived before the parsing framework.  It needs
to go away, but doing so right now would be a lot of work, so it has to
stick around for a little bit longer until the new parser generators can be
used instead.  It is a persistent thorn in my side, going against the grain.

`Parser` will immediately recurse if it sees a token of lookahead with an
incomplete parse.  This is because stitched parsers will frequently yield a
dead state indication when they're done parsing, and there's no use in
propagating an `Incomplete` status down the entire lowering pipeline.  But,
that does mean that the toplevel is not the only thing recursing.  _But_,
the behavior doesn't really change, in the sense that it would infinitely
recurse down the entire lowering stack (though there'd be an opportunity to
detect that).  This should never happen with a correct parser, but it's not
worth the effort right now to try to force such a thing with Rust's type
system.  Something like TLA+ is better suited here as an aid, but it
shouldn't be necessary with clear implementations and proper test
cases.  Parser generators will also ensure such a thing cannot occur.

I had hoped to remove ParseStatus entirely in favor of Parsed, but there's a
lot of type inference that happens based on the fact that `ParseStatus` has
a `ParseState` type parameter; `Parsed` has only `Object`.  It is desirable
for a public-facing `Parsed` to not be tied to `ParseState`, since consumers
need not be concerned with such a heavy type; however, we _do_ want that
heavy type internally, as it carries a lot of useful information that allows
for significant and powerful type inference, which in turn creates
expressive and convenient APIs.

DEV-7145

											
										
										
											2022-07-11 23:49:57 -04:00
+								    /// A dead state represents an _accepting state_ that has no edge to
 								    ///   another state for the given `tok`.
 								    /// Rather than throw an error,
 								    ///   a parser uses this status to indicate that it has completed
 								    ///   parsing and that the token should be utilized elsewhere;
 								    ///     the provided token will be used as a token of [`Lookahead`].
 								    ///
 								    /// If a parser is not prepared to be finalized and needs to yield an
 								    ///   object first,
 								    ///     use [`Transition::result`] or other methods along with a token
 								    ///     of [`Lookahead`].
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    pub fn dead(self, tok: S::Token) -> TransitionResult<S::Super> {
-												tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145

											
										
										
											2022-08-03 12:53:50 -04:00
+								        TransitionResult(
 								            self.into_super(),
 								            TransitionData::Dead(Lookahead(tok)),
 								        )
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    }
-												tamer: parse::state: EchoState and TransitionResult constituent primitives

This beings to introduce more primitive operations to `TransitionResult` and
its components so that I can actually work with them without having to write
a bunch of concrete, boilerplate implementations.  This is demonstrated in
part by `EchoState` (which is nearly all boilerplate, but whose correctness
should be verifiable at a glance), which will be used going forward as a
basis for default implementations for parsers (e.g. expansion delegation).

DEV-13156

											
										
										
											2022-11-15 16:50:11 -05:00
-												tamer: parse::util::expand::StitchExpansion: Began transition from ParseState to method

My initial plan with expansion was to wrap a `PasteState` in another that
unwraps `Expansion` and converts into a `Dead` state, so that existing
`TransitionResult` stitching methods (`delegate`, specifically) could be
used.

But the desire to use that existing method was primarily because stitching
was a complex operation that was abstracted away _as part of the `delegate`
method_, which made writing new ones verbose and difficult.  Thus began the
previous commits to begin to move that responsibility elsewhere so that it
could be more composable.

This continues with that, introducing a new trait that will culminate in the
removal of a wrapping `ParseState` in favor of a stitching method.  The old
`StitchableExpansionState` is still used for tests, which demonstrates that
the boilerplate problem still exists despite improvements made here  These
will become more generalized in the future as I have time (and the
functional aspects of the code more formalized too, now that they're taking
shape).

The benefit of this is that we avoid having to warp our abstractions in ways
that don't make sense (use of a dead state transition) just to satisfy
existing APIs.  It also means that we do not need the boilerplate of a
`ParseState` any time we want to introduce this type of
stitching/delegation.  It also means that those methods can eventually be
extracted into more general traits in the future as well.

Ultimately, though, the two would have accomplished the same thing.  But the
difference is most emphasized in the _parent_---the actual stitching still
has to take place for desugaring in the attribute parser, and I'd like for
that abstraction to still be in terms of expansion.  But if I utilized
`StitchableExpansionState`, which converted into a dead state, I'd have to
either forego the expansion abstraction---which would make the parser even
more confusing---or I'd have to create _another_ abstraction around the dead
state, which would mean that I stripped one abstraction just to introduce
another one that's essentially the same thing.  It didn't feel right, but it
would have worked.

The use of `PhantomData` in `StitchableExpansionState` was also a sign that
something wasn't quite right, in terms of how the abstractions were
integrating with one-another.

And so here we are, as I struggle to wade my way through all of the yak
shavings and make any meaningful progress on this project, while others
continue to suffer due to slow build times.

I'm sorry.  Even if the system is improving.

DEV-13156

											
										
										
											2022-11-17 14:41:49 -05:00
+								    /// Produce a map over the inner [`ParseState`] `S` to another
-												tamer: parse::state: EchoState and TransitionResult constituent primitives

This beings to introduce more primitive operations to `TransitionResult` and
its components so that I can actually work with them without having to write
a bunch of concrete, boilerplate implementations.  This is demonstrated in
part by `EchoState` (which is nearly all boilerplate, but whose correctness
should be verifiable at a glance), which will be used going forward as a
basis for default implementations for parsers (e.g. expansion delegation).

DEV-13156

											
										
										
											2022-11-15 16:50:11 -05:00
+								    ///   [`ParseState`] `SB`.
 								    ///
-												tamer: parse::util::expand::StitchExpansion: Began transition from ParseState to method

My initial plan with expansion was to wrap a `PasteState` in another that
unwraps `Expansion` and converts into a `Dead` state, so that existing
`TransitionResult` stitching methods (`delegate`, specifically) could be
used.

But the desire to use that existing method was primarily because stitching
was a complex operation that was abstracted away _as part of the `delegate`
method_, which made writing new ones verbose and difficult.  Thus began the
previous commits to begin to move that responsibility elsewhere so that it
could be more composable.

This continues with that, introducing a new trait that will culminate in the
removal of a wrapping `ParseState` in favor of a stitching method.  The old
`StitchableExpansionState` is still used for tests, which demonstrates that
the boilerplate problem still exists despite improvements made here  These
will become more generalized in the future as I have time (and the
functional aspects of the code more formalized too, now that they're taking
shape).

The benefit of this is that we avoid having to warp our abstractions in ways
that don't make sense (use of a dead state transition) just to satisfy
existing APIs.  It also means that we do not need the boilerplate of a
`ParseState` any time we want to introduce this type of
stitching/delegation.  It also means that those methods can eventually be
extracted into more general traits in the future as well.

Ultimately, though, the two would have accomplished the same thing.  But the
difference is most emphasized in the _parent_---the actual stitching still
has to take place for desugaring in the attribute parser, and I'd like for
that abstraction to still be in terms of expansion.  But if I utilized
`StitchableExpansionState`, which converted into a dead state, I'd have to
either forego the expansion abstraction---which would make the parser even
more confusing---or I'd have to create _another_ abstraction around the dead
state, which would mean that I stripped one abstraction just to introduce
another one that's essentially the same thing.  It didn't feel right, but it
would have worked.

The use of `PhantomData` in `StitchableExpansionState` was also a sign that
something wasn't quite right, in terms of how the abstractions were
integrating with one-another.

And so here we are, as I struggle to wade my way through all of the yak
shavings and make any meaningful progress on this project, while others
continue to suffer due to slow build times.

I'm sorry.  Even if the system is improving.

DEV-13156

											
										
										
											2022-11-17 14:41:49 -05:00
+								    /// Note that this is a curried associated function,
 								    ///   not a method.
 								    /// The intent is to maintain self-documentation by invoking it
 								    ///   qualified as [`Transition::fmap`].
 								    pub fn fmap<SB: ParseState>(
 								        f: impl Fn(S) -> SB,
 								    ) -> impl Fn(Transition<S>) -> Transition<SB> {
 								        move |Self(st)| Transition(f(st))
-												tamer: parse::state: EchoState and TransitionResult constituent primitives

This beings to introduce more primitive operations to `TransitionResult` and
its components so that I can actually work with them without having to write
a bunch of concrete, boilerplate implementations.  This is demonstrated in
part by `EchoState` (which is nearly all boilerplate, but whose correctness
should be verifiable at a glance), which will be used going forward as a
basis for default implementations for parsers (e.g. expansion delegation).

DEV-13156

											
										
										
											2022-11-15 16:50:11 -05:00
+								    }
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								}
-												tamer: parse::state: Further generalize ParseState::delegate

This moves enough of the handling of complex type conversions into the
various components of `TransitionResult` (and itself), which simplifies
delegation and opens up the possibility of having specialized
delegation/stitching methods implemented atop of `TransitionResult`.

DEV-13156

											
										
										
											2022-11-16 10:34:04 -05:00
+								impl<S: ParseState> From<S> for Transition<S> {
 								    fn from(st: S) -> Self {
 								        Self(st)
 								    }
 								}
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								impl<S: ClosedParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								    for TransitionResult<S>
 								{
 								    fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
 								        match residual {
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								            (st, result) => Self(st, TransitionData::Result(result, None)),
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								        }
 								    }
 								}
 								impl<S: ParseState> FromResidual<Result<Infallible, TransitionResult<S>>>
 								    for TransitionResult<S>
 								{
 								    fn from_residual(
 								        residual: Result<Infallible, TransitionResult<S>>,
 								    ) -> Self {
 								        match residual {
 								            Err(e) => e,
 								            // SAFETY: This match arm doesn't seem to be required in
 								            //   core::result::Result's FromResidual implementation,
 								            //     but as of 1.61 nightly it is here.
 								            // Since this is Infallable,
 								            //   it cannot occur.
 								            Ok(_) => unsafe { unreachable_unchecked() },
 								        }
 								    }
 								}
 								impl<S: ParseState> FromResidual<ControlFlow<TransitionResult<S>, Infallible>>
 								    for TransitionResult<S>
 								{
 								    fn from_residual(
 								        residual: ControlFlow<TransitionResult<S>, Infallible>,
 								    ) -> Self {
 								        match residual {
 								            ControlFlow::Break(result) => result,
 								            // SAFETY: Infallible, so cannot hit.
 								            ControlFlow::Continue(_) => unsafe { unreachable_unchecked() },
 								        }
 								    }
 								}
 								/// An object able to be used as data for a state [`Transition`].
 								///
 								/// This flips the usual order of things:
 								///   rather than using a method of [`Transition`] to provide data,
 								///     this starts with the data and produces a transition from it.
 								/// This is sometimes necessary to satisfy ownership/borrowing rules.
 								///
 								/// This trait simply removes boilerplate associated with storing
 								///   intermediate values and translating into the resulting type.
 								pub trait Transitionable<S: ParseState> {
 								    /// Perform a state transition to `S` using [`Self`] as the associated
 								    ///   data.
 								    ///
 								    /// This may be necessary to satisfy ownership/borrowing rules when
 								    ///   state data from `S` is used to compute [`Self`].
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    fn transition(self, to: S) -> TransitionResult<S::Super>;
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								}
 								impl<S, E> Transitionable<S> for Result<ParseStatus<S>, E>
 								where
 								    S: ParseState,
 								    <S as ParseState>::Error: From<E>,
 								{
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    fn transition(self, to: S) -> TransitionResult<S::Super> {
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								        Transition(to).result(self)
 								    }
 								}
 								impl<S, E> Transitionable<S> for Result<(), E>
 								where
 								    S: ParseState,
 								    <S as ParseState>::Error: From<E>,
 								{
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    fn transition(self, to: S) -> TransitionResult<S::Super> {
-												tamer: parse::state::transition: Extract module into own file

That's it.  Just preparing for changes that will change how lookahaeds and
dead state transitions will work.

DEV-7145

											
										
										
											2022-07-07 12:47:31 -04:00
+								        Transition(to).result(self.map(|_| ParseStatus::Incomplete))
 								    }
 								}
-												tamer: xir::parse::ele: Initial Close mapping support

Since the parsers produce streaming IRs, we need to be able to emit tokens
representing closing delimiters, where they are important.

This notably doesn't use spans; I'll add those next, since they're also
needed for the previous work.

DEV-7145

											
										
										
											2022-07-13 15:02:46 -04:00
-												tamer: asg::graph::xmli: Extract xmli generation from parse_token

This begins to develop a pattern for doing these transformations.  I had
tried a number of things using iterators, but I wasn't satisfied with either
how they were turning out; had to fight too much with the type system; or
had to resort to heap allocations.  Sticking with an explicit
`push`/`push_all` for now works just fine.

Almost done cleaning up `AsgTreeToXirf::parse_token`, and then I can move on
to introducing more objects.

DEV-13708

											
										
										
											2023-02-24 13:17:16 -05:00
+								impl<S> Transitionable<S> for Option<S::Object>
 								where
 								    S: ParseState,
 								{
 								    fn transition(self, to: S) -> TransitionResult<S::Super> {
 								        match self {
 								            Some(obj) => Transition(to).ok(obj),
 								            None => Transition(to).incomplete(),
 								        }
 								    }
 								}
-												tamer: xir::parse::ele: Initial Close mapping support

Since the parsers produce streaming IRs, we need to be able to emit tokens
representing closing delimiters, where they are important.

This notably doesn't use spans; I'll add those next, since they're also
needed for the previous work.

DEV-7145

											
										
										
											2022-07-13 15:02:46 -04:00
+								impl<S: ParseState> Transitionable<S> for ParseStatus<S> {
-												tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145

											
										
										
											2022-08-10 00:21:45 -04:00
+								    fn transition(self, to: S) -> TransitionResult<S::Super> {
 								        Transition(to).ok(self.into_super())
-												tamer: xir::parse::ele: Initial Close mapping support

Since the parsers produce streaming IRs, we need to be able to emit tokens
representing closing delimiters, where they are important.

This notably doesn't use spans; I'll add those next, since they're also
needed for the previous work.

DEV-7145

											
										
										
											2022-07-13 15:02:46 -04:00
+								    }
 								}