tamer: parse::state::ParseState::Super: Superstate concept

I'm disappointed that I keep having to implement features that I had hoped
to avoid implementing.

This introduces a "superstate" feature, which is intended really just to be
a sum type that is able to delegate to stitched `ParseState`s.  This then
allows a `ParseState` to transition directly to another `ParseState` and
have the parent `ParseState` handle the delegation---a trampoline.

This issue naturally arises out of the recursive nature of parsing a TAME
XML document, where certain statements can be nested (like `<section>`), and
where expressions can be nested.  I had gotten away with composition-based
delegation for now because `xmlo` headers do not have such nesting.

The composition-based approach falls flat for recursive structures.  The
typical naive solution is boxing, which I cannot do, because not only is
this on an extremely hot code path, but I require that Rust be able to
deeply introspect and optimize away the lowering pipeline as much as
possible.

Many months ago, I figured that such a solution would require a trampoline,
as it typically does in stack-based languages, but I was hoping to avoid
it.  Well, no longer; let's just get on with it.

This intends to implement trampolining in a `ParseState` that serves as that
sum type, rather than introducing it as yet another feature to `Parser`; the
latter would provide a more convenient API, but it would continue to bloat
`Parser` itself.  Right now, only the element parser generator will require
use of this, so if it's needed beyond that, then I'll debate whether it's
worth providing a better abstraction.  For now, the intent will be to use
the `Context` to store a stack that it can pop off of to restore the
previous `ParseState` before delegation.

DEV-7145
main
Mike Gerwitz 2022-08-03 12:53:50 -04:00
parent 7a5f731cac
commit 53a689741b
9 changed files with 126 additions and 51 deletions

View File

@ -24,8 +24,8 @@ use crate::{
num::{Dim, Dtype},
obj::xmlo::SymType,
parse::{
self, EmptyContext, NoContext, ParseState, Token, Transition,
TransitionResult, Transitionable,
self, ClosedParseState, EmptyContext, NoContext, ParseState, Token,
Transition, TransitionResult, Transitionable,
},
span::Span,
sym::{st::raw, SymbolId},
@ -140,11 +140,12 @@ impl Display for XmloToken {
}
/// A parser capable of being composed with [`XmloReader`].
pub trait XmloState = ParseState<Token = Xirf<Text>, Context = EmptyContext>
where
Self: Default,
<Self as ParseState>::Error: Into<XmloError>,
<Self as ParseState>::Object: Into<XmloToken>;
pub trait XmloState =
ClosedParseState<Token = Xirf<Text>, Context = EmptyContext>
where
Self: Default,
<Self as ParseState>::Error: Into<XmloError>,
<Self as ParseState>::Object: Into<XmloToken>;
#[derive(Debug, Default, PartialEq, Eq)]
pub enum XmloReader<

View File

@ -32,8 +32,8 @@ pub use lower::{Lower, LowerIter, ParsedObject};
pub use parser::{Parsed, ParsedResult, Parser};
pub use state::{
context::{Context, Empty as EmptyContext, NoContext},
ParseResult, ParseState, ParseStatus, Transition, TransitionResult,
Transitionable,
ClosedParseState, ParseResult, ParseState, ParseStatus, Transition,
TransitionResult, Transitionable,
};
use crate::span::{Span, UNKNOWN_SPAN};

View File

@ -20,8 +20,8 @@
//! IR lowering operation between [`Parser`]s.
use super::{
NoContext, Object, ParseError, ParseState, Parsed, ParsedResult, Parser,
Token, TransitionResult, UnknownToken,
state::ClosedParseState, NoContext, Object, ParseError, ParseState, Parsed,
ParsedResult, Parser, Token, TransitionResult, UnknownToken,
};
use crate::{
diagnose::Diagnostic,
@ -40,7 +40,7 @@ pub struct LowerIter<'a, 'b, S, I, LS>
where
S: ParseState,
I: Iterator<Item = ParsedResult<S>>,
LS: ParseState<Token = S::Object>,
LS: ClosedParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
{
/// A push [`Parser`].
@ -60,7 +60,7 @@ impl<'a, 'b, S, I, LS> LowerIter<'a, 'b, S, I, LS>
where
S: ParseState,
I: Iterator<Item = ParsedResult<S>>,
LS: ParseState<Token = S::Object>,
LS: ClosedParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
{
/// Consume inner parser and yield its context.
@ -77,7 +77,7 @@ where
pub trait Lower<S, LS>
where
S: ParseState,
LS: ParseState<Token = S::Object> + Default,
LS: ClosedParseState<Token = S::Object> + Default,
<S as ParseState>::Object: Token,
{
/// Lower the IR produced by this [`Parser`] into another IR by piping
@ -164,7 +164,7 @@ impl<S, LS, I> Lower<S, LS> for I
where
I: Iterator<Item = ParsedResult<S>> + Sized,
S: ParseState,
LS: ParseState<Token = S::Object> + Default,
LS: ClosedParseState<Token = S::Object> + Default,
<S as ParseState>::Object: Token,
{
}
@ -173,7 +173,7 @@ impl<'a, 'b, S, I, LS> Iterator for LowerIter<'a, 'b, S, I, LS>
where
S: ParseState,
I: Iterator<Item = ParsedResult<S>>,
LS: ParseState<Token = S::Object>,
LS: ClosedParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
{
type Item = ParsedResult<LS>;

View File

@ -20,6 +20,7 @@
//! High-level parsing abstraction.
use super::{
state::ClosedParseState,
trace::{self, ParserTrace},
ParseError, ParseResult, ParseState, ParseStatus, TokenStream, Transition,
TransitionResult,
@ -77,7 +78,7 @@ impl<S: ParseState> From<ParseStatus<S>> for Parsed<S::Object> {
/// call [`finalize`](Parser::finalize) to ensure that parsing has
/// completed in an accepting state.
#[derive(Debug, PartialEq)]
pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
pub struct Parser<S: ClosedParseState, I: TokenStream<S::Token>> {
/// Input token stream to be parsed by the [`ParseState`] `S`.
toks: I,
@ -137,7 +138,7 @@ pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
tracer: trace::VoidTrace,
}
impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// Create a parser with a pre-initialized [`ParseState`].
///
/// If the provided [`ParseState`] does not require context
@ -313,7 +314,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
match result {
Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
Err(e) => Err(e.into()),
Err(e) => Err(ParseError::from(e)),
}
}
}
@ -368,7 +369,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
}
}
impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
type Item = ParsedResult<S>;
/// Parse a single [`Token`] according to the current
@ -400,7 +401,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
impl<S, I> From<I> for Parser<S, I>
where
S: ParseState + Default,
S: ClosedParseState + Default,
I: TokenStream<S::Token>,
<S as ParseState>::Context: Default,
{
@ -429,7 +430,7 @@ where
impl<S, I, C> From<(I, C)> for Parser<S, I>
where
S: ParseState<Context = C> + Default,
S: ClosedParseState<Context = C> + Default,
I: TokenStream<S::Token>,
{
/// Create a new parser with a provided context.

View File

@ -53,6 +53,15 @@ impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
}
}
/// A [`ParseState`] that transitions only to itself
/// (is closed under transition).
///
/// These are the only [`ParseState`]s that can be used directly by
/// [`Parser`],
/// since [`Parser`] must be able to both handle every provided
/// [`Transition`] and know how to delegate to inner [`ParseState`]s.
pub trait ClosedParseState = ParseState<Super = Self>;
/// A parsing automaton.
///
/// These states are utilized by a [`Parser`].
@ -74,7 +83,10 @@ impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
/// but is not necessarily true for smaller, specialized parsers intended
/// for use as components of a larger parser
/// (in a spirit similar to parser combinators).
pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
pub trait ParseState: PartialEq + Eq + Display + Debug + Sized
where
Self: Into<Self::Super>,
{
/// Input tokens to the parser.
type Token: Token;
@ -82,7 +94,33 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
type Object: Object;
/// Errors specific to this set of states.
type Error: Debug + Diagnostic + PartialEq;
type Error: Debug
+ Diagnostic
+ PartialEq
+ Into<<Self::Super as ParseState>::Error>;
/// Superstate (parent state).
///
/// This is applicable only if the [`ParseState`] is capable of
/// transitioning to a state outside of its own.
/// It was initially introduced for implementing trampolines in place of
/// composition-based delegation,
/// the latter of which would otherwise require boxing on
/// (extremely) hot code paths for otherwise-recursive data
/// structures.
///
/// Intuitively,
/// the superstate represents a sum type of the pool of all possible
/// [`ParseState`]s that we can request transfer of control to.
/// This is the same concept as [`StitchableParseState`],
/// but operating in reverse
/// (delegation via trampoline instead of direct function call).
type Super: ClosedParseState<
Token = Self::Token,
Object = Self::Object,
Error = Self::Error,
Context = Self::Context,
> = Self;
/// Object provided to parser alongside each token.
///
@ -97,7 +135,7 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
/// the context and the types that are able to be inferred.
fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I>
where
Self: Default,
Self: ClosedParseState + Default,
Self::Context: Default,
{
Parser::from(toks)
@ -123,7 +161,7 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
ctx: Self::Context,
) -> Parser<Self, I>
where
Self: Default,
Self: ClosedParseState + Default,
{
Parser::from((toks, ctx))
}
@ -217,7 +255,9 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
self,
tok: <Self as ParseState>::Token,
mut context: C,
into: impl FnOnce(Self) -> Transition<SP>,
into: impl FnOnce(
<Self as ParseState>::Super,
) -> Transition<<SP as ParseState>::Super>,
dead: impl FnOnce() -> Transition<SP>,
) -> TransitionResult<SP>
where
@ -270,10 +310,12 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
self,
tok: <Self as ParseState>::Token,
mut context: C,
into: impl FnOnce(Self) -> Transition<SP>,
into: impl FnOnce(
<Self as ParseState>::Super,
) -> Transition<<SP as ParseState>::Super>,
_dead: impl FnOnce() -> Transition<SP>,
objf: impl FnOnce(
Self,
<Self as ParseState>::Super,
<Self as ParseState>::Object,
) -> TransitionResult<SP>,
) -> TransitionResult<SP>
@ -322,10 +364,10 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
mut context: C,
env: X,
into: impl FnOnce(
Self,
<Self as ParseState>::Super,
Option<<Self as ParseState>::Object>,
X,
) -> Transition<SP>,
) -> Transition<<SP as ParseState>::Super>,
dead: impl FnOnce(X) -> Transition<SP>,
) -> TransitionResult<SP>
where

View File

@ -27,7 +27,7 @@ use std::{
};
#[cfg(doc)]
use super::Parser;
use super::{ClosedParseState, Parser};
/// A state transition with associated data.
///
@ -46,13 +46,19 @@ use super::Parser;
/// for example,
/// for multiple data to be emitted in response to a single token.
///
/// If a [`ParseState`] is not a [`ClosedParseState`],
/// the transition will be to its superstate ([`ParseState::Super`]);
/// this conversion is performed automatically by the [`Transition`]
/// methods that produce [`TransitionResult`],
/// (such as [`Transition::ok`]).
///
/// This struct is opaque to ensure that critical invariants involving
/// transitions and lookahead are properly upheld;
/// callers must use the appropriate parsing APIs.
#[derive(Debug, PartialEq)]
pub struct TransitionResult<S: ParseState>(
/// New parser state.
pub(in super::super) Transition<S>,
pub(in super::super) Transition<S::Super>,
/// Result of the parsing operation.
pub(in super::super) TransitionData<S>,
);
@ -195,6 +201,19 @@ impl<S: ParseState> TransitionData<S> {
pub struct Transition<S: ParseState>(pub S);
impl<S: ParseState> Transition<S> {
/// Transform a [`Transition`] into a transition of its superstate
/// [`ParseState::Super`].
///
/// This is needed because trait specialization does not yet have a path
/// to stabilization as of the time of writing,
/// and so `From<Transition<S>> for Transition<S::Super>` cannot be
/// implemented because those types overlap.
pub fn into_super(self) -> Transition<S::Super> {
match self {
Transition(st) => Transition(st.into()),
}
}
/// A state transition with corresponding data.
///
/// This allows [`ParseState::parse_token`] to emit a parsed object and
@ -203,7 +222,10 @@ impl<S: ParseState> Transition<S> {
where
T: Into<ParseStatus<S>>,
{
TransitionResult(self, TransitionData::Result(Ok(obj.into()), None))
TransitionResult(
self.into_super(),
TransitionData::Result(Ok(obj.into()), None),
)
}
/// A transition with corresponding error.
@ -211,7 +233,10 @@ impl<S: ParseState> Transition<S> {
/// This indicates a parsing failure.
/// The state ought to be suitable for error recovery.
pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S> {
TransitionResult(self, TransitionData::Result(Err(err.into()), None))
TransitionResult(
self.into_super(),
TransitionData::Result(Err(err.into()), None),
)
}
/// A state transition with corresponding [`Result`].
@ -224,7 +249,7 @@ impl<S: ParseState> Transition<S> {
E: Into<S::Error>,
{
TransitionResult(
self,
self.into_super(),
TransitionData::Result(
result.map(Into::into).map_err(Into::into),
None,
@ -238,7 +263,7 @@ impl<S: ParseState> Transition<S> {
/// This corresponds to [`ParseStatus::Incomplete`].
pub fn incomplete(self) -> TransitionResult<S> {
TransitionResult(
self,
self.into_super(),
TransitionData::Result(Ok(ParseStatus::Incomplete), None),
)
}
@ -258,7 +283,10 @@ impl<S: ParseState> Transition<S> {
/// use [`Transition::result`] or other methods along with a token
/// of [`Lookahead`].
pub fn dead(self, tok: S::Token) -> TransitionResult<S> {
TransitionResult(self, TransitionData::Dead(Lookahead(tok)))
TransitionResult(
self.into_super(),
TransitionData::Dead(Lookahead(tok)),
)
}
}
@ -267,7 +295,9 @@ impl<S: ParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
{
fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
match residual {
(st, result) => Self(st, TransitionData::Result(result, None)),
(st, result) => {
Self(st.into_super(), TransitionData::Result(result, None))
}
}
}
}

View File

@ -48,8 +48,8 @@ use super::{
use crate::{
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
parse::{
Context, Object, ParseState, ParsedResult, Token, Transition,
TransitionResult,
ClosedParseState, Context, Object, ParseState, ParsedResult, Token,
Transition, TransitionResult,
},
span::Span,
sym::{st::is_common_whitespace, GlobalSymbolResolve, SymbolId},
@ -285,7 +285,7 @@ impl From<Text> for RefinedText {
/// XIRF-compatible attribute parser.
pub trait FlatAttrParseState<const MAX_DEPTH: usize> =
ParseState<Token = XirToken, Object = Attr>
ClosedParseState<Token = XirToken, Object = Attr>
where
Self: Default,
<Self as ParseState>::Error: Into<XirToXirfError>,

View File

@ -40,7 +40,7 @@
use crate::{
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
fmt::ListDisplayWrapper,
parse::ParseState,
parse::ClosedParseState,
span::Span,
xir::{attr::Attr, fmt::XmlAttrList, EleSpan, OpenSpan, QName},
};
@ -177,7 +177,7 @@ impl<S: AttrParseState> Diagnostic for AttrParseError<S> {
/// Attribute parsing automaton.
///
/// These parsers are generated by [`attr_parse!`](crate::attr_parse).
pub trait AttrParseState: ParseState {
pub trait AttrParseState: ClosedParseState {
/// Type of error for failed parsing of attribute values.
///
/// These originate from [`TryFrom`] conversions on the attribute

View File

@ -181,8 +181,8 @@ use super::{
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
parse::{
self, EmptyContext, NoContext, ParseError, ParseResult, ParseState,
ParsedResult, Transition, TransitionResult,
self, ClosedParseState, EmptyContext, NoContext, ParseError,
ParseResult, ParseState, ParsedResult, Transition, TransitionResult,
},
span::Span,
sym::SymbolId,
@ -504,11 +504,12 @@ where
Done,
}
pub trait StackAttrParseState = ParseState<Token = XirToken, Object = Attr>
where
Self: Default,
<Self as ParseState>::Error: Into<StackError>,
EmptyContext: AsMut<<Self as ParseState>::Context>;
pub trait StackAttrParseState =
ClosedParseState<Token = XirToken, Object = Attr>
where
Self: Default,
<Self as ParseState>::Error: Into<StackError>,
EmptyContext: AsMut<<Self as ParseState>::Context>;
impl<SA: StackAttrParseState> Default for Stack<SA> {
fn default() -> Self {