tamer: parse::state::ParseState::Super: Superstate concept
I'm disappointed that I keep having to implement features that I had hoped to avoid implementing. This introduces a "superstate" feature, which is intended really just to be a sum type that is able to delegate to stitched `ParseState`s. This then allows a `ParseState` to transition directly to another `ParseState` and have the parent `ParseState` handle the delegation---a trampoline. This issue naturally arises out of the recursive nature of parsing a TAME XML document, where certain statements can be nested (like `<section>`), and where expressions can be nested. I had gotten away with composition-based delegation for now because `xmlo` headers do not have such nesting. The composition-based approach falls flat for recursive structures. The typical naive solution is boxing, which I cannot do, because not only is this on an extremely hot code path, but I require that Rust be able to deeply introspect and optimize away the lowering pipeline as much as possible. Many months ago, I figured that such a solution would require a trampoline, as it typically does in stack-based languages, but I was hoping to avoid it. Well, no longer; let's just get on with it. This intends to implement trampolining in a `ParseState` that serves as that sum type, rather than introducing it as yet another feature to `Parser`; the latter would provide a more convenient API, but it would continue to bloat `Parser` itself. Right now, only the element parser generator will require use of this, so if it's needed beyond that, then I'll debate whether it's worth providing a better abstraction. For now, the intent will be to use the `Context` to store a stack that it can pop off of to restore the previous `ParseState` before delegation. DEV-7145main
parent
7a5f731cac
commit
53a689741b
|
@ -24,8 +24,8 @@ use crate::{
|
|||
num::{Dim, Dtype},
|
||||
obj::xmlo::SymType,
|
||||
parse::{
|
||||
self, EmptyContext, NoContext, ParseState, Token, Transition,
|
||||
TransitionResult, Transitionable,
|
||||
self, ClosedParseState, EmptyContext, NoContext, ParseState, Token,
|
||||
Transition, TransitionResult, Transitionable,
|
||||
},
|
||||
span::Span,
|
||||
sym::{st::raw, SymbolId},
|
||||
|
@ -140,11 +140,12 @@ impl Display for XmloToken {
|
|||
}
|
||||
|
||||
/// A parser capable of being composed with [`XmloReader`].
|
||||
pub trait XmloState = ParseState<Token = Xirf<Text>, Context = EmptyContext>
|
||||
where
|
||||
Self: Default,
|
||||
<Self as ParseState>::Error: Into<XmloError>,
|
||||
<Self as ParseState>::Object: Into<XmloToken>;
|
||||
pub trait XmloState =
|
||||
ClosedParseState<Token = Xirf<Text>, Context = EmptyContext>
|
||||
where
|
||||
Self: Default,
|
||||
<Self as ParseState>::Error: Into<XmloError>,
|
||||
<Self as ParseState>::Object: Into<XmloToken>;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub enum XmloReader<
|
||||
|
|
|
@ -32,8 +32,8 @@ pub use lower::{Lower, LowerIter, ParsedObject};
|
|||
pub use parser::{Parsed, ParsedResult, Parser};
|
||||
pub use state::{
|
||||
context::{Context, Empty as EmptyContext, NoContext},
|
||||
ParseResult, ParseState, ParseStatus, Transition, TransitionResult,
|
||||
Transitionable,
|
||||
ClosedParseState, ParseResult, ParseState, ParseStatus, Transition,
|
||||
TransitionResult, Transitionable,
|
||||
};
|
||||
|
||||
use crate::span::{Span, UNKNOWN_SPAN};
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
//! IR lowering operation between [`Parser`]s.
|
||||
|
||||
use super::{
|
||||
NoContext, Object, ParseError, ParseState, Parsed, ParsedResult, Parser,
|
||||
Token, TransitionResult, UnknownToken,
|
||||
state::ClosedParseState, NoContext, Object, ParseError, ParseState, Parsed,
|
||||
ParsedResult, Parser, Token, TransitionResult, UnknownToken,
|
||||
};
|
||||
use crate::{
|
||||
diagnose::Diagnostic,
|
||||
|
@ -40,7 +40,7 @@ pub struct LowerIter<'a, 'b, S, I, LS>
|
|||
where
|
||||
S: ParseState,
|
||||
I: Iterator<Item = ParsedResult<S>>,
|
||||
LS: ParseState<Token = S::Object>,
|
||||
LS: ClosedParseState<Token = S::Object>,
|
||||
<S as ParseState>::Object: Token,
|
||||
{
|
||||
/// A push [`Parser`].
|
||||
|
@ -60,7 +60,7 @@ impl<'a, 'b, S, I, LS> LowerIter<'a, 'b, S, I, LS>
|
|||
where
|
||||
S: ParseState,
|
||||
I: Iterator<Item = ParsedResult<S>>,
|
||||
LS: ParseState<Token = S::Object>,
|
||||
LS: ClosedParseState<Token = S::Object>,
|
||||
<S as ParseState>::Object: Token,
|
||||
{
|
||||
/// Consume inner parser and yield its context.
|
||||
|
@ -77,7 +77,7 @@ where
|
|||
pub trait Lower<S, LS>
|
||||
where
|
||||
S: ParseState,
|
||||
LS: ParseState<Token = S::Object> + Default,
|
||||
LS: ClosedParseState<Token = S::Object> + Default,
|
||||
<S as ParseState>::Object: Token,
|
||||
{
|
||||
/// Lower the IR produced by this [`Parser`] into another IR by piping
|
||||
|
@ -164,7 +164,7 @@ impl<S, LS, I> Lower<S, LS> for I
|
|||
where
|
||||
I: Iterator<Item = ParsedResult<S>> + Sized,
|
||||
S: ParseState,
|
||||
LS: ParseState<Token = S::Object> + Default,
|
||||
LS: ClosedParseState<Token = S::Object> + Default,
|
||||
<S as ParseState>::Object: Token,
|
||||
{
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ impl<'a, 'b, S, I, LS> Iterator for LowerIter<'a, 'b, S, I, LS>
|
|||
where
|
||||
S: ParseState,
|
||||
I: Iterator<Item = ParsedResult<S>>,
|
||||
LS: ParseState<Token = S::Object>,
|
||||
LS: ClosedParseState<Token = S::Object>,
|
||||
<S as ParseState>::Object: Token,
|
||||
{
|
||||
type Item = ParsedResult<LS>;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
//! High-level parsing abstraction.
|
||||
|
||||
use super::{
|
||||
state::ClosedParseState,
|
||||
trace::{self, ParserTrace},
|
||||
ParseError, ParseResult, ParseState, ParseStatus, TokenStream, Transition,
|
||||
TransitionResult,
|
||||
|
@ -77,7 +78,7 @@ impl<S: ParseState> From<ParseStatus<S>> for Parsed<S::Object> {
|
|||
/// call [`finalize`](Parser::finalize) to ensure that parsing has
|
||||
/// completed in an accepting state.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
|
||||
pub struct Parser<S: ClosedParseState, I: TokenStream<S::Token>> {
|
||||
/// Input token stream to be parsed by the [`ParseState`] `S`.
|
||||
toks: I,
|
||||
|
||||
|
@ -137,7 +138,7 @@ pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
|
|||
tracer: trace::VoidTrace,
|
||||
}
|
||||
|
||||
impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
||||
impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
||||
/// Create a parser with a pre-initialized [`ParseState`].
|
||||
///
|
||||
/// If the provided [`ParseState`] does not require context
|
||||
|
@ -313,7 +314,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
|||
|
||||
match result {
|
||||
Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
|
||||
Err(e) => Err(e.into()),
|
||||
Err(e) => Err(ParseError::from(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -368,7 +369,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
|
||||
impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
|
||||
type Item = ParsedResult<S>;
|
||||
|
||||
/// Parse a single [`Token`] according to the current
|
||||
|
@ -400,7 +401,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
|
|||
|
||||
impl<S, I> From<I> for Parser<S, I>
|
||||
where
|
||||
S: ParseState + Default,
|
||||
S: ClosedParseState + Default,
|
||||
I: TokenStream<S::Token>,
|
||||
<S as ParseState>::Context: Default,
|
||||
{
|
||||
|
@ -429,7 +430,7 @@ where
|
|||
|
||||
impl<S, I, C> From<(I, C)> for Parser<S, I>
|
||||
where
|
||||
S: ParseState<Context = C> + Default,
|
||||
S: ClosedParseState<Context = C> + Default,
|
||||
I: TokenStream<S::Token>,
|
||||
{
|
||||
/// Create a new parser with a provided context.
|
||||
|
|
|
@ -53,6 +53,15 @@ impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
|
|||
}
|
||||
}
|
||||
|
||||
/// A [`ParseState`] that transitions only to itself
|
||||
/// (is closed under transition).
|
||||
///
|
||||
/// These are the only [`ParseState`]s that can be used directly by
|
||||
/// [`Parser`],
|
||||
/// since [`Parser`] must be able to both handle every provided
|
||||
/// [`Transition`] and know how to delegate to inner [`ParseState`]s.
|
||||
pub trait ClosedParseState = ParseState<Super = Self>;
|
||||
|
||||
/// A parsing automaton.
|
||||
///
|
||||
/// These states are utilized by a [`Parser`].
|
||||
|
@ -74,7 +83,10 @@ impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
|
|||
/// but is not necessarily true for smaller, specialized parsers intended
|
||||
/// for use as components of a larger parser
|
||||
/// (in a spirit similar to parser combinators).
|
||||
pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
|
||||
pub trait ParseState: PartialEq + Eq + Display + Debug + Sized
|
||||
where
|
||||
Self: Into<Self::Super>,
|
||||
{
|
||||
/// Input tokens to the parser.
|
||||
type Token: Token;
|
||||
|
||||
|
@ -82,7 +94,33 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
|
|||
type Object: Object;
|
||||
|
||||
/// Errors specific to this set of states.
|
||||
type Error: Debug + Diagnostic + PartialEq;
|
||||
type Error: Debug
|
||||
+ Diagnostic
|
||||
+ PartialEq
|
||||
+ Into<<Self::Super as ParseState>::Error>;
|
||||
|
||||
/// Superstate (parent state).
|
||||
///
|
||||
/// This is applicable only if the [`ParseState`] is capable of
|
||||
/// transitioning to a state outside of its own.
|
||||
/// It was initially introduced for implementing trampolines in place of
|
||||
/// composition-based delegation,
|
||||
/// the latter of which would otherwise require boxing on
|
||||
/// (extremely) hot code paths for otherwise-recursive data
|
||||
/// structures.
|
||||
///
|
||||
/// Intuitively,
|
||||
/// the superstate represents a sum type of the pool of all possible
|
||||
/// [`ParseState`]s that we can request transfer of control to.
|
||||
/// This is the same concept as [`StitchableParseState`],
|
||||
/// but operating in reverse
|
||||
/// (delegation via trampoline instead of direct function call).
|
||||
type Super: ClosedParseState<
|
||||
Token = Self::Token,
|
||||
Object = Self::Object,
|
||||
Error = Self::Error,
|
||||
Context = Self::Context,
|
||||
> = Self;
|
||||
|
||||
/// Object provided to parser alongside each token.
|
||||
///
|
||||
|
@ -97,7 +135,7 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
|
|||
/// the context and the types that are able to be inferred.
|
||||
fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I>
|
||||
where
|
||||
Self: Default,
|
||||
Self: ClosedParseState + Default,
|
||||
Self::Context: Default,
|
||||
{
|
||||
Parser::from(toks)
|
||||
|
@ -123,7 +161,7 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
|
|||
ctx: Self::Context,
|
||||
) -> Parser<Self, I>
|
||||
where
|
||||
Self: Default,
|
||||
Self: ClosedParseState + Default,
|
||||
{
|
||||
Parser::from((toks, ctx))
|
||||
}
|
||||
|
@ -217,7 +255,9 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
|
|||
self,
|
||||
tok: <Self as ParseState>::Token,
|
||||
mut context: C,
|
||||
into: impl FnOnce(Self) -> Transition<SP>,
|
||||
into: impl FnOnce(
|
||||
<Self as ParseState>::Super,
|
||||
) -> Transition<<SP as ParseState>::Super>,
|
||||
dead: impl FnOnce() -> Transition<SP>,
|
||||
) -> TransitionResult<SP>
|
||||
where
|
||||
|
@ -270,10 +310,12 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
|
|||
self,
|
||||
tok: <Self as ParseState>::Token,
|
||||
mut context: C,
|
||||
into: impl FnOnce(Self) -> Transition<SP>,
|
||||
into: impl FnOnce(
|
||||
<Self as ParseState>::Super,
|
||||
) -> Transition<<SP as ParseState>::Super>,
|
||||
_dead: impl FnOnce() -> Transition<SP>,
|
||||
objf: impl FnOnce(
|
||||
Self,
|
||||
<Self as ParseState>::Super,
|
||||
<Self as ParseState>::Object,
|
||||
) -> TransitionResult<SP>,
|
||||
) -> TransitionResult<SP>
|
||||
|
@ -322,10 +364,10 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
|
|||
mut context: C,
|
||||
env: X,
|
||||
into: impl FnOnce(
|
||||
Self,
|
||||
<Self as ParseState>::Super,
|
||||
Option<<Self as ParseState>::Object>,
|
||||
X,
|
||||
) -> Transition<SP>,
|
||||
) -> Transition<<SP as ParseState>::Super>,
|
||||
dead: impl FnOnce(X) -> Transition<SP>,
|
||||
) -> TransitionResult<SP>
|
||||
where
|
||||
|
|
|
@ -27,7 +27,7 @@ use std::{
|
|||
};
|
||||
|
||||
#[cfg(doc)]
|
||||
use super::Parser;
|
||||
use super::{ClosedParseState, Parser};
|
||||
|
||||
/// A state transition with associated data.
|
||||
///
|
||||
|
@ -46,13 +46,19 @@ use super::Parser;
|
|||
/// for example,
|
||||
/// for multiple data to be emitted in response to a single token.
|
||||
///
|
||||
/// If a [`ParseState`] is not a [`ClosedParseState`],
|
||||
/// the transition will be to its superstate ([`ParseState::Super`]);
|
||||
/// this conversion is performed automatically by the [`Transition`]
|
||||
/// methods that produce [`TransitionResult`],
|
||||
/// (such as [`Transition::ok`]).
|
||||
///
|
||||
/// This struct is opaque to ensure that critical invariants involving
|
||||
/// transitions and lookahead are properly upheld;
|
||||
/// callers must use the appropriate parsing APIs.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct TransitionResult<S: ParseState>(
|
||||
/// New parser state.
|
||||
pub(in super::super) Transition<S>,
|
||||
pub(in super::super) Transition<S::Super>,
|
||||
/// Result of the parsing operation.
|
||||
pub(in super::super) TransitionData<S>,
|
||||
);
|
||||
|
@ -195,6 +201,19 @@ impl<S: ParseState> TransitionData<S> {
|
|||
pub struct Transition<S: ParseState>(pub S);
|
||||
|
||||
impl<S: ParseState> Transition<S> {
|
||||
/// Transform a [`Transition`] into a transition of its superstate
|
||||
/// [`ParseState::Super`].
|
||||
///
|
||||
/// This is needed because trait specialization does not yet have a path
|
||||
/// to stabilization as of the time of writing,
|
||||
/// and so `From<Transition<S>> for Transition<S::Super>` cannot be
|
||||
/// implemented because those types overlap.
|
||||
pub fn into_super(self) -> Transition<S::Super> {
|
||||
match self {
|
||||
Transition(st) => Transition(st.into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// A state transition with corresponding data.
|
||||
///
|
||||
/// This allows [`ParseState::parse_token`] to emit a parsed object and
|
||||
|
@ -203,7 +222,10 @@ impl<S: ParseState> Transition<S> {
|
|||
where
|
||||
T: Into<ParseStatus<S>>,
|
||||
{
|
||||
TransitionResult(self, TransitionData::Result(Ok(obj.into()), None))
|
||||
TransitionResult(
|
||||
self.into_super(),
|
||||
TransitionData::Result(Ok(obj.into()), None),
|
||||
)
|
||||
}
|
||||
|
||||
/// A transition with corresponding error.
|
||||
|
@ -211,7 +233,10 @@ impl<S: ParseState> Transition<S> {
|
|||
/// This indicates a parsing failure.
|
||||
/// The state ought to be suitable for error recovery.
|
||||
pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S> {
|
||||
TransitionResult(self, TransitionData::Result(Err(err.into()), None))
|
||||
TransitionResult(
|
||||
self.into_super(),
|
||||
TransitionData::Result(Err(err.into()), None),
|
||||
)
|
||||
}
|
||||
|
||||
/// A state transition with corresponding [`Result`].
|
||||
|
@ -224,7 +249,7 @@ impl<S: ParseState> Transition<S> {
|
|||
E: Into<S::Error>,
|
||||
{
|
||||
TransitionResult(
|
||||
self,
|
||||
self.into_super(),
|
||||
TransitionData::Result(
|
||||
result.map(Into::into).map_err(Into::into),
|
||||
None,
|
||||
|
@ -238,7 +263,7 @@ impl<S: ParseState> Transition<S> {
|
|||
/// This corresponds to [`ParseStatus::Incomplete`].
|
||||
pub fn incomplete(self) -> TransitionResult<S> {
|
||||
TransitionResult(
|
||||
self,
|
||||
self.into_super(),
|
||||
TransitionData::Result(Ok(ParseStatus::Incomplete), None),
|
||||
)
|
||||
}
|
||||
|
@ -258,7 +283,10 @@ impl<S: ParseState> Transition<S> {
|
|||
/// use [`Transition::result`] or other methods along with a token
|
||||
/// of [`Lookahead`].
|
||||
pub fn dead(self, tok: S::Token) -> TransitionResult<S> {
|
||||
TransitionResult(self, TransitionData::Dead(Lookahead(tok)))
|
||||
TransitionResult(
|
||||
self.into_super(),
|
||||
TransitionData::Dead(Lookahead(tok)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -267,7 +295,9 @@ impl<S: ParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
|
|||
{
|
||||
fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
|
||||
match residual {
|
||||
(st, result) => Self(st, TransitionData::Result(result, None)),
|
||||
(st, result) => {
|
||||
Self(st.into_super(), TransitionData::Result(result, None))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,8 +48,8 @@ use super::{
|
|||
use crate::{
|
||||
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
|
||||
parse::{
|
||||
Context, Object, ParseState, ParsedResult, Token, Transition,
|
||||
TransitionResult,
|
||||
ClosedParseState, Context, Object, ParseState, ParsedResult, Token,
|
||||
Transition, TransitionResult,
|
||||
},
|
||||
span::Span,
|
||||
sym::{st::is_common_whitespace, GlobalSymbolResolve, SymbolId},
|
||||
|
@ -285,7 +285,7 @@ impl From<Text> for RefinedText {
|
|||
|
||||
/// XIRF-compatible attribute parser.
|
||||
pub trait FlatAttrParseState<const MAX_DEPTH: usize> =
|
||||
ParseState<Token = XirToken, Object = Attr>
|
||||
ClosedParseState<Token = XirToken, Object = Attr>
|
||||
where
|
||||
Self: Default,
|
||||
<Self as ParseState>::Error: Into<XirToXirfError>,
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
use crate::{
|
||||
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
|
||||
fmt::ListDisplayWrapper,
|
||||
parse::ParseState,
|
||||
parse::ClosedParseState,
|
||||
span::Span,
|
||||
xir::{attr::Attr, fmt::XmlAttrList, EleSpan, OpenSpan, QName},
|
||||
};
|
||||
|
@ -177,7 +177,7 @@ impl<S: AttrParseState> Diagnostic for AttrParseError<S> {
|
|||
/// Attribute parsing automaton.
|
||||
///
|
||||
/// These parsers are generated by [`attr_parse!`](crate::attr_parse).
|
||||
pub trait AttrParseState: ParseState {
|
||||
pub trait AttrParseState: ClosedParseState {
|
||||
/// Type of error for failed parsing of attribute values.
|
||||
///
|
||||
/// These originate from [`TryFrom`] conversions on the attribute
|
||||
|
|
|
@ -181,8 +181,8 @@ use super::{
|
|||
use crate::{
|
||||
diagnose::{AnnotatedSpan, Diagnostic},
|
||||
parse::{
|
||||
self, EmptyContext, NoContext, ParseError, ParseResult, ParseState,
|
||||
ParsedResult, Transition, TransitionResult,
|
||||
self, ClosedParseState, EmptyContext, NoContext, ParseError,
|
||||
ParseResult, ParseState, ParsedResult, Transition, TransitionResult,
|
||||
},
|
||||
span::Span,
|
||||
sym::SymbolId,
|
||||
|
@ -504,11 +504,12 @@ where
|
|||
Done,
|
||||
}
|
||||
|
||||
pub trait StackAttrParseState = ParseState<Token = XirToken, Object = Attr>
|
||||
where
|
||||
Self: Default,
|
||||
<Self as ParseState>::Error: Into<StackError>,
|
||||
EmptyContext: AsMut<<Self as ParseState>::Context>;
|
||||
pub trait StackAttrParseState =
|
||||
ClosedParseState<Token = XirToken, Object = Attr>
|
||||
where
|
||||
Self: Default,
|
||||
<Self as ParseState>::Error: Into<StackError>,
|
||||
EmptyContext: AsMut<<Self as ParseState>::Context>;
|
||||
|
||||
impl<SA: StackAttrParseState> Default for Stack<SA> {
|
||||
fn default() -> Self {
|
||||
|
|
Loading…
Reference in New Issue