tamer: xir::parse::ele: Transition trampoline

This properly integrates the trampoline into `ele_parse!`.  The
implementation leaves some TODOs, most notably broken mixed text handling
since we can no longer intercept those tokens before passing to the
child.  That is temporarily marked as incomplete; see a future commit.

The introduced test `ParseState`s were to help me reason about the system
intuitively as I struggled to track down some type errors in the monstrosity
that is `ele_parse!`.  It will fail to compile if those invariants are
violated.  (In the end, the problems were pretty simple to resolve, and the
struggle was the type system doing its job in telling me that I needed to
step back and try to reason about the problem again until it was intuitive.)

This keeps around the NT states for now, which are quickly used to
transition to the next NT state, like a couple of bounces on a trampoline:

  NT -> Dead -> Parent -> Next NT

This could be optimized in the future, if it's worth doing.

This also makes no attempt to implement tail calls; that would have to come
after fixing mixed content and really isn't worth the added complexity
now.  I (desperately) need to move on, and still have a bunch of cleanup to
do.

I had hoped for a smaller commit, but that was too difficult to do with all
the types involved.

DEV-7145
main
Mike Gerwitz 2022-08-10 00:21:45 -04:00
parent 233fa7de6a
commit 15e04d63e2
6 changed files with 614 additions and 141 deletions

View File

@ -398,4 +398,238 @@ pub mod test {
let ctx = sut.finalize().unwrap();
assert_eq!(ctx, StubContext { val });
}
// This healthy block of mostly-boilerplate verifies that the practical
// use case of the trampoline system actually type-checks,
// and was used during development as a simpler example than having
// to content with the mammoth `ele_parse!`.
// There is no runtime test;
// it will fail to compile if there's a problem.
mod superst {
use crate::span::dummy::S1;
use super::*;
#[derive(Debug, PartialEq, Eq)]
enum Sup {
SubA(SubA),
SubB(SubB),
}
#[derive(Debug, PartialEq, Eq)]
enum SubA {
A,
}
#[derive(Debug, PartialEq, Eq)]
enum SubB {
B,
}
impl Display for Sup {
fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
unimplemented!()
}
}
impl Display for SubA {
fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
unimplemented!()
}
}
impl Display for SubB {
fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
unimplemented!()
}
}
impl From<SubA> for Sup {
fn from(sub: SubA) -> Self {
Self::SubA(sub)
}
}
impl From<SubB> for Sup {
fn from(sub: SubB) -> Self {
Self::SubB(sub)
}
}
#[derive(Debug, PartialEq)]
enum SupError {
SubA(SubAError),
SubB(SubBError),
}
#[derive(Debug, PartialEq)]
enum SubAError {}
#[derive(Debug, PartialEq)]
enum SubBError {}
impl Error for SupError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
None
}
}
impl Display for SupError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "SupError")
}
}
impl Diagnostic for SupError {
fn describe(&self) -> Vec<AnnotatedSpan> {
vec![]
}
}
impl Error for SubAError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
None
}
}
impl Display for SubAError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "SubAError")
}
}
impl Diagnostic for SubAError {
fn describe(&self) -> Vec<AnnotatedSpan> {
vec![]
}
}
impl Error for SubBError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
None
}
}
impl Display for SubBError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "SubBError")
}
}
impl Diagnostic for SubBError {
fn describe(&self) -> Vec<AnnotatedSpan> {
vec![]
}
}
impl From<SubAError> for SupError {
fn from(sub: SubAError) -> Self {
Self::SubA(sub)
}
}
impl From<SubBError> for SupError {
fn from(sub: SubBError) -> Self {
Self::SubB(sub)
}
}
#[allow(dead_code)] // Used only for type checking atm.
#[derive(Debug, PartialEq, Eq)]
enum SupToken {
ToA,
ToB,
}
impl Token for SupToken {
fn ir_name() -> &'static str {
"SupTest"
}
fn span(&self) -> Span {
S1
}
}
impl Display for SupToken {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "SupToken")
}
}
#[derive(Debug, PartialEq, Eq)]
enum SupObject {
FromA(SupToken),
FromB(SupToken),
}
impl Object for SupObject {}
impl ParseState for Sup {
type Token = SupToken;
type Object = SupObject;
type Error = SupError;
fn parse_token(
self,
tok: Self::Token,
ctx: &mut Self::Context,
) -> TransitionResult<Self> {
match self {
Self::SubA(st) => st.parse_token(tok, ctx),
Self::SubB(st) => st.parse_token(tok, ctx),
}
}
fn is_accepting(&self) -> bool {
true
}
}
impl ParseState for SubA {
type Token = SupToken;
type Object = SupObject;
type Error = SubAError;
type Super = Sup;
fn parse_token(
self,
tok: Self::Token,
_ctx: &mut Self::Context,
) -> TransitionResult<Self::Super> {
match tok {
SupToken::ToA => Transition(self).ok(SupObject::FromA(tok)),
SupToken::ToB => {
Transition(SubB::B).ok(SupObject::FromA(tok))
}
}
}
fn is_accepting(&self) -> bool {
true
}
}
impl ParseState for SubB {
type Token = SupToken;
type Object = SupObject;
type Error = SubBError;
type Super = Sup;
fn parse_token(
self,
tok: Self::Token,
_ctx: &mut Self::Context,
) -> TransitionResult<Self::Super> {
match tok {
SupToken::ToA => Transition(self).ok(SupObject::FromB(tok)),
SupToken::ToB => {
Transition(SubA::A).ok(SupObject::FromB(tok))
}
}
}
fn is_accepting(&self) -> bool {
true
}
}
}
}

View File

@ -47,6 +47,15 @@ pub enum ParseStatus<S: ParseState> {
Object(S::Object),
}
impl<S: ParseState> ParseStatus<S> {
pub fn into_super(self) -> ParseStatus<S::Super> {
match self {
Self::Incomplete => ParseStatus::Incomplete,
Self::Object(obj) => ParseStatus::Object(obj),
}
}
}
impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
fn from(obj: T) -> Self {
Self::Object(obj)
@ -86,6 +95,7 @@ pub trait ClosedParseState = ParseState<Super = Self>;
pub trait ParseState: PartialEq + Eq + Display + Debug + Sized
where
Self: Into<Self::Super>,
Self::Error: Into<<Self::Super as ParseState>::Error>,
{
/// Input tokens to the parser.
type Token: Token;
@ -205,7 +215,7 @@ where
self,
tok: Self::Token,
ctx: &mut Self::Context,
) -> TransitionResult<Self>;
) -> TransitionResult<Self::Super>;
/// Whether the current state represents an accepting state.
///
@ -254,11 +264,9 @@ where
self,
tok: <Self as ParseState>::Token,
mut context: C,
into: impl FnOnce(
<Self as ParseState>::Super,
) -> Transition<<SP as ParseState>::Super>,
into: impl FnOnce(<Self as ParseState>::Super) -> Transition<SP>,
dead: impl FnOnce() -> Transition<SP>,
) -> TransitionResult<SP>
) -> TransitionResult<<SP as ParseState>::Super>
where
Self: StitchableParseState<SP>,
C: AsMut<<Self as ParseState>::Context>,
@ -281,12 +289,15 @@ where
dead().incomplete().with_lookahead(lookahead)
}
TransitionData::Result(result, lookahead) => TransitionResult(
into(newst),
into(newst).into_super(),
TransitionData::Result(
match result {
Ok(Incomplete) => Ok(Incomplete),
Ok(Obj(obj)) => Ok(Obj(obj.into())),
Err(e) => Err(e.into()),
// First convert the error into `SP::Error`,
// and then `SP::Super::Error`
// (which will be the same type if SP is closed).
Err(e) => Err(e.into().into()),
},
lookahead,
),
@ -294,6 +305,37 @@ where
}
}
/// Delegate parsing of a token from our superstate
/// [`ParseState::Super`].
///
/// This operates just as [`ParseState::delegate`];
/// the API is simplified because [`TransitionResult`] already has
/// data mapped to the superstate.
/// `dead` indicates when the child (`self`) has finished parsing.
fn delegate_child<C>(
self,
tok: Self::Token,
mut context: C,
dead: impl FnOnce(Self::Token, C) -> TransitionResult<Self::Super>,
) -> TransitionResult<Self::Super>
where
C: AsMut<<Self as ParseState>::Context>,
{
let TransitionResult(Transition(newst), data) =
self.parse_token(tok, context.as_mut());
match data {
TransitionData::Dead(Lookahead(lookahead)) => {
dead(lookahead, context)
}
// Since this is child state,
// [`TransitionResult`] has already converted into the
// superstate for us.
_ => TransitionResult(Transition(newst), data),
}
}
/// Delegate parsing from a compatible, stitched [`ParseState`] `SP`
/// until this parser yields an [`Object`].
///
@ -309,15 +351,13 @@ where
self,
tok: <Self as ParseState>::Token,
mut context: C,
into: impl FnOnce(
<Self as ParseState>::Super,
) -> Transition<<SP as ParseState>::Super>,
into: impl FnOnce(<Self as ParseState>::Super) -> Transition<SP>,
_dead: impl FnOnce() -> Transition<SP>,
objf: impl FnOnce(
<Self as ParseState>::Super,
<Self as ParseState>::Object,
) -> TransitionResult<SP>,
) -> TransitionResult<SP>
) -> TransitionResult<<SP as ParseState>::Super>,
) -> TransitionResult<<SP as ParseState>::Super>
where
Self: PartiallyStitchableParseState<SP>,
C: AsMut<<Self as ParseState>::Context>,
@ -339,11 +379,14 @@ where
}
TransitionData::Result(result, lookahead) => TransitionResult(
into(newst),
into(newst).into_super(),
TransitionData::Result(
match result {
Ok(_) => Ok(Incomplete),
Err(e) => Err(e.into()),
// First convert the error into `SP::Error`,
// and then `SP::Super::Error`
// (which will be the same type if SP is closed).
Err(e) => Err(e.into().into()),
},
lookahead,
),
@ -356,21 +399,24 @@ where
///
/// See [`ParseState::delegate`] for more information.
/// This method exists for a XIRT and ought to be removed when it is no
/// longer needed.
/// longer needed;
/// as such,
/// it works only with [`ClosedParseState`].
fn delegate_with_obj<SP, C, X>(
self,
tok: <Self as ParseState>::Token,
mut context: C,
env: X,
into: impl FnOnce(
<Self as ParseState>::Super,
Self,
Option<<Self as ParseState>::Object>,
X,
) -> Transition<<SP as ParseState>::Super>,
) -> Transition<SP>,
dead: impl FnOnce(X) -> Transition<SP>,
) -> TransitionResult<SP>
where
Self: PartiallyStitchableParseState<SP>,
SP: ClosedParseState,
C: AsMut<<Self as ParseState>::Context>,
{
use ParseStatus::{Incomplete, Object as Obj};
@ -424,11 +470,19 @@ pub type ParseStateResult<S> = Result<ParseStatus<S>, <S as ParseState>::Error>;
/// it is not necessary for parser composition,
/// provided that you perform the necessary wiring yourself in absence
/// of state stitching.
///
/// A [`ParseState`] can only be stitched if it is capable of standing on
/// its own with a [`Parser`],
/// meaning it must be a [`ClosedParseState`].
/// Otherwise,
/// the parser must return a transition to [`ParseState::Super`],
/// and delegation from [`ParseState::Super`] itself can be performed with
/// [`ParseState::delegate_child`].
pub trait StitchableParseState<SP: ParseState> =
PartiallyStitchableParseState<SP>
where <Self as ParseState>::Object: Into<<SP as ParseState>::Object>;
pub trait PartiallyStitchableParseState<SP: ParseState> = ParseState
pub trait PartiallyStitchableParseState<SP: ParseState> = ClosedParseState
where
SP: ParseState<Token = <Self as ParseState>::Token>,
<Self as ParseState>::Error: Into<<SP as ParseState>::Error>;

View File

@ -19,7 +19,9 @@
//! State transitions for parser automata.
use super::{ParseState, ParseStateResult, ParseStatus, Token};
use super::{
ClosedParseState, ParseState, ParseStateResult, ParseStatus, Token,
};
use std::{
convert::Infallible,
hint::unreachable_unchecked,
@ -27,7 +29,7 @@ use std::{
};
#[cfg(doc)]
use super::{ClosedParseState, Parser};
use super::Parser;
/// A state transition with associated data.
///
@ -58,12 +60,20 @@ use super::{ClosedParseState, Parser};
#[derive(Debug, PartialEq)]
pub struct TransitionResult<S: ParseState>(
/// New parser state.
pub(in super::super) Transition<S::Super>,
pub(in super::super) Transition<S>,
/// Result of the parsing operation.
pub(in super::super) TransitionData<S>,
);
impl<S: ParseState> TransitionResult<S> {
pub fn into_super(self) -> TransitionResult<S::Super> {
match self {
Self(t, data) => {
TransitionResult(t.into_super(), data.into_super())
}
}
}
/// Indicate that this transition include a single token of lookahead,
/// which should be provided back to the parser in place of the
/// next token from the input stream.
@ -153,6 +163,16 @@ pub(in super::super) enum TransitionData<S: ParseState> {
}
impl<S: ParseState> TransitionData<S> {
pub fn into_super(self) -> TransitionData<S::Super> {
match self {
Self::Result(st_result, ola) => TransitionData::Result(
st_result.map(ParseStatus::into_super).map_err(|e| e.into()),
ola,
),
Self::Dead(la) => TransitionData::Dead(la),
}
}
/// Reference to the token of lookahead,
/// if any.
pub(in super::super) fn lookahead_ref(
@ -218,9 +238,9 @@ impl<S: ParseState> Transition<S> {
///
/// This allows [`ParseState::parse_token`] to emit a parsed object and
/// corresponds to [`ParseStatus::Object`].
pub fn ok<T>(self, obj: T) -> TransitionResult<S>
pub fn ok<T>(self, obj: T) -> TransitionResult<S::Super>
where
T: Into<ParseStatus<S>>,
T: Into<ParseStatus<S::Super>>,
{
TransitionResult(
self.into_super(),
@ -232,10 +252,15 @@ impl<S: ParseState> Transition<S> {
///
/// This indicates a parsing failure.
/// The state ought to be suitable for error recovery.
pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S> {
pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S::Super> {
// The first error conversion is into that expected by S,
// which will _then_ (below) be converted into S::Super
// (if they're not the same).
let err_s: S::Error = err.into();
TransitionResult(
self.into_super(),
TransitionData::Result(Err(err.into()), None),
TransitionData::Result(Err(err_s.into()), None),
)
}
@ -243,7 +268,10 @@ impl<S: ParseState> Transition<S> {
///
/// This translates the provided [`Result`] in a manner equivalent to
/// [`Transition::ok`] and [`Transition::err`].
pub fn result<T, E>(self, result: Result<T, E>) -> TransitionResult<S>
pub fn result<T, E>(
self,
result: Result<T, E>,
) -> TransitionResult<S::Super>
where
T: Into<ParseStatus<S>>,
E: Into<S::Error>,
@ -251,7 +279,11 @@ impl<S: ParseState> Transition<S> {
TransitionResult(
self.into_super(),
TransitionData::Result(
result.map(Into::into).map_err(Into::into),
result
.map(Into::into)
.map(ParseStatus::into_super)
.map_err(Into::<S::Error>::into)
.map_err(Into::into),
None,
),
)
@ -261,7 +293,7 @@ impl<S: ParseState> Transition<S> {
/// object can be emitted.
///
/// This corresponds to [`ParseStatus::Incomplete`].
pub fn incomplete(self) -> TransitionResult<S> {
pub fn incomplete(self) -> TransitionResult<S::Super> {
TransitionResult(
self.into_super(),
TransitionData::Result(Ok(ParseStatus::Incomplete), None),
@ -282,7 +314,7 @@ impl<S: ParseState> Transition<S> {
/// object first,
/// use [`Transition::result`] or other methods along with a token
/// of [`Lookahead`].
pub fn dead(self, tok: S::Token) -> TransitionResult<S> {
pub fn dead(self, tok: S::Token) -> TransitionResult<S::Super> {
TransitionResult(
self.into_super(),
TransitionData::Dead(Lookahead(tok)),
@ -290,14 +322,12 @@ impl<S: ParseState> Transition<S> {
}
}
impl<S: ParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
impl<S: ClosedParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
for TransitionResult<S>
{
fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
match residual {
(st, result) => {
Self(st.into_super(), TransitionData::Result(result, None))
}
(st, result) => Self(st, TransitionData::Result(result, None)),
}
}
}
@ -349,7 +379,7 @@ pub trait Transitionable<S: ParseState> {
///
/// This may be necessary to satisfy ownership/borrowing rules when
/// state data from `S` is used to compute [`Self`].
fn transition(self, to: S) -> TransitionResult<S>;
fn transition(self, to: S) -> TransitionResult<S::Super>;
}
impl<S, E> Transitionable<S> for Result<ParseStatus<S>, E>
@ -357,7 +387,7 @@ where
S: ParseState,
<S as ParseState>::Error: From<E>,
{
fn transition(self, to: S) -> TransitionResult<S> {
fn transition(self, to: S) -> TransitionResult<S::Super> {
Transition(to).result(self)
}
}
@ -367,13 +397,13 @@ where
S: ParseState,
<S as ParseState>::Error: From<E>,
{
fn transition(self, to: S) -> TransitionResult<S> {
fn transition(self, to: S) -> TransitionResult<S::Super> {
Transition(to).result(self.map(|_| ParseStatus::Incomplete))
}
}
impl<S: ParseState> Transitionable<S> for ParseStatus<S> {
fn transition(self, to: S) -> TransitionResult<S> {
Transition(to).ok(self)
fn transition(self, to: S) -> TransitionResult<S::Super> {
Transition(to).ok(self.into_super())
}
}

View File

@ -26,4 +26,4 @@ mod attr;
mod ele;
pub use attr::{parse_attrs, AttrParseError, AttrParseState};
pub use ele::{EleParseCfg, EleParseState};
pub use ele::{EleParseCfg, EleParseState, StateStack, StateStackContext};

View File

@ -19,7 +19,20 @@
//! Element parser generator for parsing of [XIRF](super::super::flat).
use crate::parse::ParseState;
use arrayvec::ArrayVec;
use crate::{
diagnose::{panic::DiagnosticPanic, Annotate},
diagnostic_panic,
fmt::{DisplayWrapper, TtQuote},
parse::{
ClosedParseState, Context, ParseState, Token, Transition,
TransitionResult,
},
};
#[cfg(doc)]
use crate::{ele_parse, parse::Parser};
/// A parser accepting a single element.
pub trait EleParseState: ParseState {}
@ -44,6 +57,143 @@ impl From<EleParseCfg> for () {
}
}
/// Maximum level of nesting for source XML trees.
///
/// Technically this is the maximum level of nesting for _parsing_ those
/// trees,
/// which may end up being less than this value.
///
/// This should be set to something reasonable,
/// but is not an alternative to coming up with code conventions that
/// disallow ridiculous levels of nesting.
/// TAME does have a lot of nesting with primitives,
/// but that nesting is easily abstracted with templates.
/// Templates may expand into ridiculous levels of nesting---this
/// has no impact on the template expansion phase.
///
/// Note that this is assuming that this parser is used only for TAME
/// sources.
/// If that's not the case,
/// this can be made to be configurable like XIRF.
pub const MAX_DEPTH: usize = 16;
/// Parser stack for trampoline.
///
/// This can be used as a call stack for parsers while avoiding creating
/// otherwise-recursive data structures with composition-based delegation.
/// However,
/// it is more similar to CPS,
/// in that the parser popped off the stack need not be the parser that
/// initiated the request and merely represents the next step in
/// a delayed computation.
/// If such a return context is unneeded,
/// a [`ParseState`] may implement tail calls by simply not pushing itself
/// onto the stack before requesting transfer to another [`ParseState`].
#[derive(Debug, Default)]
pub struct StateStack<S: ClosedParseState>(ArrayVec<S, MAX_DEPTH>);
pub type StateStackContext<S> = Context<StateStack<S>>;
// Note that public visibility is needed because `ele_parse` expands outside
// of this module.
impl<S: ClosedParseState> StateStack<S> {
/// Request a transfer to another [`ParseState`],
/// expecting that control be returned to `ret` after it has
/// completed.
///
/// This can be reasoned about like calling a thunk:
/// the return [`ParseState`] is put onto the stack,
/// the target [`ParseState`] is used for the state transition to
/// cause [`Parser`] to perform the call to it,
/// and when it is done
/// (e.g. a dead state),
/// `ret` will be pop'd from the stack and we'll transition back to
/// it.
/// Note that this method is not responsible for returning;
/// see [`Self::ret`] to perform a return.
///
/// However,
/// the calling [`ParseState`] is not responsible for its return,
/// unlike a typical function call.
/// Instead,
/// this _actually_ more closely resembles CPS
/// (continuation passing style),
/// and so [`ele_parse!`] must be careful to ensure that stack
/// operations are properly paired.
/// On the upside,
/// if something is erroneously `ret`'d,
/// the parser is guaranteed to be in a consistent state since the
/// entire state has been reified
/// (but the input would then be parsed incorrectly).
///
/// Note that tail calls can be implemented by transferring control
/// without pushing an entry on the stack to return to,
/// but that hasn't been formalized \[yet\] and requires extra care.
pub fn transfer_with_ret<SA, ST>(
&mut self,
Transition(ret): Transition<SA>,
target: TransitionResult<ST>,
) -> TransitionResult<ST>
where
SA: ParseState<Super = S::Super>,
ST: ParseState,
{
let Self(stack) = self;
// TODO: Global configuration to (hopefully) ensure that XIRF will
// actually catch this.
if stack.is_full() {
// TODO: We need some spans here and ideally convert the
// parenthetical error message into a diagnostic footnote.
// TODO: Or should we have a special error type that tells the
// parent `Parser` to panic with context?
diagnostic_panic!(
vec![],
"maximum parsing depth of {} exceeded while attempting \
to push return state {} \
(expected XIRF configuration to prevent this error)",
MAX_DEPTH,
TtQuote::wrap(ret),
);
}
stack.push(ret.into());
target
}
/// Return to a previous [`ParseState`] that transferred control away
/// from itself.
///
/// Conceptually,
/// this is like returning from a function call,
/// where the function was invoked using [`Self::transfer_with_ret`].
/// However,
/// this system is more akin to CPS
/// (continuation passing style);
/// see [`Self::transfer_with_ret`] for important information.
pub fn ret(&mut self, lookahead: S::Token) -> TransitionResult<S> {
let Self(stack) = self;
// This should certainly never happen unless there is a bug in the
// `ele_parse!` parser-generator,
// since it means that we're trying to return to a caller that
// does not exist.
let st = stack.pop().diagnostic_expect(
lookahead
.span()
.internal_error("while processing this token")
.with_help(
"this implies a bug in TAMER's `ele_parse` \
parser-generator",
)
.into(),
"missing expected return ParseState",
);
Transition(st).incomplete().with_lookahead(lookahead)
}
}
#[macro_export]
macro_rules! ele_parse {
(
@ -150,9 +300,9 @@ macro_rules! ele_parse {
-> {
@ ->
$(
($nt::$ntref) [$($ntref_cfg)?],
($nt::$ntref, $ntref) [$($ntref_cfg)?],
($nt::$ntref) ->
)* ($nt::ExpectClose_) [],
)* ($nt::ExpectClose_, ()) [],
}
}
};
@ -181,6 +331,30 @@ macro_rules! ele_parse {
}
};
// Delegation when the destination type is `()`,
// indicating that the next state is not a child NT
// (it is likely the state expecting a closing tag).
(@!ntref_delegate
$stack:ident, $ret:expr, (), $_target:expr, $done:expr
) => {
$done
};
// Delegate to a child parser by pushing self onto the stack and
// yielding to one of the child's states.
// This uses a trampoline,
// which avoids recursive data structures
// (due to `ParseState` composition/stitching)
// and does not grow the call stack.
(@!ntref_delegate
$stack:ident, $ret:expr, $ntnext_st:ty, $target:expr, $_done:expr
) => {
$stack.transfer_with_ret(
Transition($ret),
$target,
)
};
(@!ele_dfn_body <$objty:ty, $($evty:ty)?>
$vis:vis $super:ident $nt:ident $qname:ident ($($open_span:ident)?)
@ -212,9 +386,9 @@ macro_rules! ele_parse {
}
-> {
@ -> ($ntfirst:path) [$($ntfirst_cfg:tt)?],
@ -> ($ntfirst:path, $ntfirst_st:ty) [$($ntfirst_cfg:tt)?],
$(
($ntprev:path) -> ($ntnext:path) [$($ntnext_cfg:tt)?],
($ntprev:path) -> ($ntnext:path, $ntnext_st:ty) [$($ntnext_cfg:tt)?],
)*
}
) => {
@ -286,7 +460,6 @@ macro_rules! ele_parse {
crate::span::Span,
crate::xir::flat::Depth
),
$ntref
),
)*
ExpectClose_(
@ -295,7 +468,6 @@ macro_rules! ele_parse {
crate::span::Span,
crate::xir::flat::Depth
),
()
),
/// Closing tag found and parsing of the element is
/// complete.
@ -321,11 +493,11 @@ macro_rules! ele_parse {
/// Yield the expected depth of child elements,
/// if known.
#[allow(dead_code)] // used by text special form
fn child_depth(&self) -> Option<Depth> {
fn child_depth(&self) -> Option<crate::xir::flat::Depth> {
match self {
$ntfirst((_, _, depth), _) => Some(depth.child_depth()),
$ntfirst((_, _, depth)) => Some(depth.child_depth()),
$(
$ntnext((_, _, depth), _) => Some(depth.child_depth()),
$ntnext((_, _, depth)) => Some(depth.child_depth()),
)*
_ => None,
}
@ -363,7 +535,7 @@ macro_rules! ele_parse {
),
Self::Attrs_(_, sa) => std::fmt::Display::fmt(sa, f),
Self::ExpectClose_((_, _, depth), _) => write!(
Self::ExpectClose_((_, _, depth)) => write!(
f,
"expecting closing element {} at depth {depth}",
TtCloseXmlEle::wrap($qname)
@ -374,8 +546,13 @@ macro_rules! ele_parse {
TtQuote::wrap($qname)
),
$(
Self::$ntref(_, st) => {
std::fmt::Display::fmt(st, f)
// TODO: A better description.
Self::$ntref(_) => {
write!(
f,
"preparing to transition to \
parser for next child element(s)"
)
},
)*
}
@ -497,13 +674,15 @@ macro_rules! ele_parse {
>;
type Object = $objty;
type Error = [<$nt Error_>];
type Context = crate::xir::parse::StateStackContext<Self::Super>;
type Super = $super;
fn parse_token(
self,
tok: Self::Token,
_: &mut Self::Context,
) -> crate::parse::TransitionResult<Self> {
#[allow(unused_variables)] // used only if child NTs
stack: &mut Self::Context,
) -> crate::parse::TransitionResult<Self::Super> {
use crate::{
parse::{EmptyContext, Transition, Transitionable},
xir::{
@ -513,6 +692,10 @@ macro_rules! ele_parse {
},
};
// Used only by _some_ expansions.
#[allow(unused_imports)]
use crate::xir::flat::Text;
use $nt::{
Attrs_, Expecting_, RecoverEleIgnore_,
CloseRecoverIgnore_, RecoverEleIgnoreClosed_,
@ -568,10 +751,10 @@ macro_rules! ele_parse {
}
(Attrs_(meta, sa), tok) => {
sa.delegate_until_obj(
sa.delegate_until_obj::<Self, _>(
tok,
EmptyContext,
|sa| Transition(Attrs_(meta, sa)).into_super(),
|sa| Transition(Attrs_(meta, sa)),
|| unreachable!("see ParseState::delegate_until_obj dead"),
|#[allow(unused_variables)] sa, attrs| {
let obj = match attrs {
@ -591,14 +774,24 @@ macro_rules! ele_parse {
},
};
Transition($ntfirst(
meta,
ele_parse!(@!ntref_cfg $($ntfirst_cfg)?).into(),
)).ok(obj)
// Lookahead is added by `delegate_until_obj`.
ele_parse!(@!ntref_delegate
stack,
$ntfirst(meta),
$ntfirst_st,
Transition(
Into::<$ntfirst_st>::into(
ele_parse!(@!ntref_cfg $($ntfirst_cfg)?)
)
).ok(obj),
Transition($ntfirst(meta)).ok(obj)
)
}
)
},
// TODO: This is partly broken by the trampoline
// implementation.
// Must come _after_ `Attrs_` above so that
// attributes are yielded before text that
// terminates attribute parsing.
@ -628,19 +821,17 @@ macro_rules! ele_parse {
)?
$(
($ntprev(meta, st_inner), tok) => {
st_inner.delegate(
tok,
EmptyContext,
// TODO: proper trampoline delegation;
// this is maintaining BC for now
|si| Transition($ntprev(meta, si.into())).into_super(),
|| {
Transition($ntnext(
meta,
ele_parse!(@!ntref_cfg $($ntnext_cfg)?).into()
))
},
($ntprev(meta), tok) => {
ele_parse!(@!ntref_delegate
stack,
$ntnext(meta),
$ntnext_st,
Transition(
Into::<$ntnext_st>::into(
ele_parse!(@!ntref_cfg $($ntnext_cfg)?)
)
).incomplete().with_lookahead(tok),
Transition($ntnext(meta)).incomplete().with_lookahead(tok)
)
},
)*
@ -648,7 +839,7 @@ macro_rules! ele_parse {
// XIRF ensures proper nesting,
// so we do not need to check the element name.
(
ExpectClose_((cfg, _, depth), ())
ExpectClose_((cfg, _, depth))
| CloseRecoverIgnore_((cfg, _, depth), _),
XirfToken::Close(_, span, tok_depth)
) if tok_depth == depth => {
@ -658,7 +849,7 @@ macro_rules! ele_parse {
$closemap.transition(Closed_(cfg, span.tag_span()))
},
(ExpectClose_(meta @ (_, otspan, _), ()), unexpected_tok) => {
(ExpectClose_(meta @ (_, otspan, _)), unexpected_tok) => {
use crate::parse::Token;
Transition(
CloseRecoverIgnore_(meta, unexpected_tok.span())
@ -718,9 +909,6 @@ macro_rules! ele_parse {
crate::xir::QName,
crate::xir::CloseSpan
),
$(
$ntref(crate::xir::parse::EleParseCfg, $ntref),
)*
/// Inner element has been parsed and is dead;
/// this indicates that this parser is also dead.
Done_,
@ -754,10 +942,6 @@ macro_rules! ele_parse {
given = TtQuote::wrap(name),
),
$(
Self::$ntref(_, st) => std::fmt::Display::fmt(st, f),
)*
Self::Done_ => write!(f, "done parsing {expected}"),
}
}
@ -847,13 +1031,14 @@ macro_rules! ele_parse {
>;
type Object = $objty;
type Error = [<$nt Error_>];
type Context = crate::xir::parse::StateStackContext<Self::Super>;
type Super = $super;
fn parse_token(
self,
tok: Self::Token,
_: &mut Self::Context,
) -> crate::parse::TransitionResult<Self> {
stack: &mut Self::Context,
) -> crate::parse::TransitionResult<Self::Super> {
use crate::{
parse::Transition,
xir::{
@ -882,12 +1067,21 @@ macro_rules! ele_parse {
Expecting_(cfg),
XirfToken::Open(qname, span, depth)
) if qname == $ntref::qname() => {
$ntref::from(EleParseCfg::default()).delegate(
XirfToken::Open(qname, span, depth),
&mut Self::Context::default(),
// TODO: proper trampoline delegation
|si| Transition(Self::$ntref(cfg, si.into())).into_super(),
|| todo!("inner dead (should not happen here)"),
ele_parse!(@!ntref_delegate
stack,
match cfg.repeat {
true => Expecting_(cfg),
false => Done_,
},
$ntref,
Transition(
$ntref::from(
EleParseCfg::default()
)
).incomplete().with_lookahead(
XirfToken::Open(qname, span, depth)
),
unreachable!("TODO: remove me (ntref_delegate done)")
)
},
)*
@ -922,19 +1116,6 @@ macro_rules! ele_parse {
Transition(st).incomplete()
},
$(
(Self::$ntref(cfg, si), tok) => si.delegate(
tok,
&mut Self::Context::default(),
// TODO: proper trampoline delegation
|si| Transition(Self::$ntref(cfg, si.into())).into_super(),
|| match cfg.repeat {
true => Transition(Expecting_(cfg)),
false => Transition(Done_),
}
),
)*
(st @ Self::Done_, tok) => Transition(st).dead(tok),
todo => todo!("sum {todo:?}"),
@ -944,19 +1125,6 @@ macro_rules! ele_parse {
fn is_accepting(&self) -> bool {
match self {
Self::RecoverEleIgnoreClosed_(..) | Self::Done_ => true,
// Delegate entirely to the inner ParseState.
// It is desirable to maintain this state even after
// the inner parser is completed so that the inner
// state can accurately describe what took place.
// With that said,
// we will transition to `Done_` on an inner dead
// state,
// because of current `delegate` limitations.
$(
Self::$ntref(_, si) => si.is_accepting(),
)*
_ => false,
}
}
@ -1027,22 +1195,6 @@ macro_rules! ele_parse {
}
)*
// TODO: This is used only until we remove composition-based
// delegation in favor of trampolines---the
// composed parsers yield their superstate,
// which we have to convert back.
$(
impl From<$super> for $nt {
fn from(sup: $super) -> Self {
match sup {
$super::$nt(st) => st,
#[allow(unreachable_patterns)]
_ => unreachable!("From<Super> for NT mismatch"),
}
}
}
)*
impl std::fmt::Display for $super {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
@ -1101,21 +1253,23 @@ macro_rules! ele_parse {
>;
type Object = $objty;
type Error = [<$super Error_>];
type Context = crate::xir::parse::StateStackContext<Self>;
fn parse_token(
self,
tok: Self::Token,
_: &mut Self::Context,
stack: &mut Self::Context,
) -> crate::parse::TransitionResult<Self> {
use crate::parse::Transition;
match self {
$(
Self::$nt(st) => st.delegate(
// Pass token directly to child until it reports
// a dead state,
// after which we return to the `ParseState`
// atop of the stack.
Self::$nt(st) => st.delegate_child(
tok,
&mut Self::Context::default(),
Transition,
|| todo!("DEAD super sum")
stack,
|tok, stack| stack.ret(tok),
),
)*
}

View File

@ -631,9 +631,9 @@ fn child_error_and_recovery() {
let err = sut.next().unwrap().unwrap_err();
assert_eq!(
// TODO: This references generated identifiers.
ParseError::StateError(SutError_::Root(RootError_::ChildA(
ParseError::StateError(SutError_::ChildA(
ChildAError_::UnexpectedEle_(unexpected, span.name_span())
))),
)),
err,
);
@ -1391,6 +1391,7 @@ fn sum_repetition() {
// element,
// meaning it'll preempt sum parser delegation to provide the desired
// behavior.
#[ignore] // TODO: Broken by introduction of superstate trampoline
#[test]
fn mixed_content_text_nodes() {
#[derive(Debug, PartialEq, Eq)]