tamer: xir::parse::ele: Superstate element preemption

This uses the same mechanism that was introduced for handling `Text` nodes
in mixed content, allowing for arbitrary element `Open` matches for
preemption by the superstate.

This will be used to allow for template expansion virtually
anywhere.  Unlike the existing TAME, it'll even allow for it at the root,
though whether that's ultimately permitted is really depending on how I
approach template expansion; it may fail during a later lowering operation.

This is interesting because this approach is only possible because of the
CPS-style trampoline implementation.  Previously, with the composition-based
approach, each and every parser would have to perform this check, like we
had to previously with `Text` nodes.

As usual, this is still adding to the mess a bit, and it'll need some future
cleanup.

DEV-7145
main
Mike Gerwitz 2022-08-16 14:45:16 -04:00
parent 6f53c0971b
commit 43c64babb0
2 changed files with 543 additions and 27 deletions

View File

@ -463,6 +463,9 @@ macro_rules! ele_parse {
"`]."
)]
Expecting_(crate::xir::parse::EleParseCfg),
/// Non-preemptable [`Self::Expecting_`].
#[allow(dead_code)] // used by sum parser
NonPreemptableExpecting_(crate::xir::parse::EleParseCfg),
/// Recovery state ignoring all remaining tokens for this
/// element.
RecoverEleIgnore_(
@ -542,10 +545,39 @@ macro_rules! ele_parse {
impl $nt {
/// Matcher describing the node recognized by this parser.
#[allow(dead_code)] // used by sum parser
#[inline]
fn matcher() -> crate::xir::parse::NodeMatcher {
crate::xir::parse::NodeMatcher::from($qname)
}
// Whether the given QName would be matched by any of the
// parsers associated with this type.
//
#[inline]
fn matches(qname: crate::xir::QName) -> bool {
Self::matcher().matches(qname)
}
#[allow(dead_code)] // used by sum parser
fn cfg(&self) -> crate::xir::parse::EleParseCfg {
use $nt::*;
match self {
Expecting_(cfg)
| NonPreemptableExpecting_(cfg)
| RecoverEleIgnore_(cfg, ..)
| RecoverEleIgnoreClosed_(cfg, ..)
| CloseRecoverIgnore_((cfg, ..), ..)
| Attrs_((cfg, ..), ..)
| ExpectClose_((cfg, ..), ..)
| Closed_(cfg, ..) => *cfg,
$(
$ntref((cfg, ..)) => *cfg,
)*
}
}
/// Whether the parser is in a state that can tolerate
/// superstate node preemption.
///
@ -562,6 +594,10 @@ macro_rules! ele_parse {
// since we haven't started processing yet.
Expecting_(..) => true,
// The name says it all.
// Instantiated by the superstate.
NonPreemptableExpecting_(..) => false,
// Preemption during recovery would cause tokens to
// be parsed when they ought to be ignored,
// so we must process all tokens during recovery.
@ -594,12 +630,11 @@ macro_rules! ele_parse {
$ntnext(..) => true,
)*
// Preemption after closing is similar to preemption
// in `Expecting_`,
// in that we're effectively in the parent
// context.
// If we're done,
// we want to be able to yield a dead state so
// that we can transition away from this parser.
RecoverEleIgnoreClosed_(..)
| Closed_(..) => true,
| Closed_(..) => false,
}
}
}
@ -612,7 +647,8 @@ macro_rules! ele_parse {
};
match self {
Self::Expecting_(_) => write!(
Self::Expecting_(_)
| Self::NonPreemptableExpecting_(_) => write!(
f,
"expecting opening tag {}",
TtOpenXmlEle::wrap(Self::matcher()),
@ -775,16 +811,16 @@ macro_rules! ele_parse {
};
use $nt::{
Attrs_, Expecting_, RecoverEleIgnore_,
CloseRecoverIgnore_, RecoverEleIgnoreClosed_,
ExpectClose_, Closed_
Attrs_, Expecting_, NonPreemptableExpecting_,
RecoverEleIgnore_, CloseRecoverIgnore_,
RecoverEleIgnoreClosed_, ExpectClose_, Closed_
};
match (self, tok) {
(
Expecting_(cfg),
Expecting_(cfg) | NonPreemptableExpecting_(cfg),
XirfToken::Open(qname, span, depth)
) if $nt::matcher().matches(qname) => {
) if $nt::matches(qname) => {
Transition(Attrs_(
(cfg, qname, span.tag_span(), depth),
parse_attrs(qname, span)
@ -794,7 +830,7 @@ macro_rules! ele_parse {
(
Closed_(cfg, ..),
XirfToken::Open(qname, span, depth)
) if cfg.repeat && Self::matcher().matches(qname) => {
) if cfg.repeat && Self::matches(qname) => {
Transition(Attrs_(
(cfg, qname, span.tag_span(), depth),
parse_attrs(qname, span)
@ -802,7 +838,7 @@ macro_rules! ele_parse {
},
(
Expecting_(cfg),
Expecting_(cfg) | NonPreemptableExpecting_(cfg),
XirfToken::Open(qname, span, depth)
) => {
Transition(RecoverEleIgnore_(cfg, qname, span, depth)).err(
@ -937,6 +973,9 @@ macro_rules! ele_parse {
#[derive(Debug, PartialEq, Eq)]
$vis enum $nt {
Expecting_(crate::xir::parse::EleParseCfg),
#[allow(dead_code)] // used by superstate node preemption
/// Non-preemptable [`Self::Expecting_`].
NonPreemptableExpecting_(crate::xir::parse::EleParseCfg),
/// Recovery state ignoring all remaining tokens for this
/// element.
RecoverEleIgnore_(
@ -956,6 +995,44 @@ macro_rules! ele_parse {
}
impl $nt {
// Whether the given QName would be matched by any of the
// parsers associated with this type.
//
// This is short-circuiting and will return as soon as one
// parser is found,
// so it may be a good idea to order the sum type
// according to the most likely value to be encountered.
// At its worst,
// this may be equivalent to a linear search of the
// parsers.
// With that said,
// Rust/LLVM may optimize this in any number of ways,
// especially if each inner parser matches on a QName
// constant.
// Let a profiler and disassembly guide you.
#[allow(dead_code)] // used by superstate
fn matches(qname: crate::xir::QName) -> bool {
// If we used an array or a trait,
// then we'd need everything to be a similar type;
// this allows for _any_ type provided that it
// expands into something that contains a `matches`
// associated function of a compatible type.
false $(|| $ntref::matches(qname))*
}
fn cfg(&self) -> crate::xir::parse::EleParseCfg {
use $nt::*;
match self {
Expecting_(cfg)
| NonPreemptableExpecting_(cfg)
| RecoverEleIgnore_(cfg, ..)
| RecoverEleIgnoreClosed_(cfg, ..) => *cfg,
Done_ => crate::xir::parse::EleParseCfg::default()
}
}
/// Whether the parser is in a state that can tolerate
/// superstate node preemption.
///
@ -972,17 +1049,20 @@ macro_rules! ele_parse {
// since we haven't started processing yet.
Expecting_(..) => true,
// The name says it all.
// Instantiated by the superstate.
NonPreemptableExpecting_(..) => false,
// Preemption during recovery would cause tokens to
// be parsed when they ought to be ignored,
// so we must process all tokens during recovery.
RecoverEleIgnore_(..) => false,
// Preemption after closing is similar to preemption
// in `Expecting_`,
// in that we're effectively in the parent
// context.
// If we're done,
// we want to be able to yield a dead state so
// that we can transition away from this parser.
RecoverEleIgnoreClosed_(..)
| Done_ => true,
| Done_ => false,
}
}
}
@ -1002,7 +1082,8 @@ macro_rules! ele_parse {
let expected = EleSumList::wrap(&ntrefs);
match self {
Self::Expecting_(_) => {
Self::Expecting_(_)
| Self::NonPreemptableExpecting_(_) => {
write!(f, "expecting {expected}")
},
@ -1105,16 +1186,54 @@ macro_rules! ele_parse {
};
use $nt::{
Expecting_, RecoverEleIgnore_,
Expecting_, NonPreemptableExpecting_, RecoverEleIgnore_,
RecoverEleIgnoreClosed_, Done_
};
match (self, tok) {
$(
(
Expecting_(cfg),
st @ (Expecting_(_) | NonPreemptableExpecting_(_)),
XirfToken::Open(qname, span, depth)
) if $ntref::matcher().matches(qname) => {
) if $ntref::matches(qname) => {
ele_parse!(@!ntref_delegate
stack,
match st.cfg() {
cfg @ EleParseCfg { repeat: true, .. } => {
Expecting_(cfg)
},
_ => Done_,
},
$ntref,
Transition(
// Propagate non-preemption status,
// otherwise we'll provide a
// lookback of the original token
// and end up recursing until we
// hit the `stack` limit.
match st {
NonPreemptableExpecting_(_) => {
$ntref::NonPreemptableExpecting_(
EleParseCfg::default()
)
}
_ => {
$ntref::from(
EleParseCfg::default()
)
}
}
).incomplete().with_lookahead(
XirfToken::Open(qname, span, depth)
),
unreachable!("TODO: remove me (ntref_delegate done)")
)
},
(
NonPreemptableExpecting_(cfg),
XirfToken::Open(qname, span, depth)
) if $ntref::matches(qname) => {
ele_parse!(@!ntref_delegate
stack,
match cfg.repeat {
@ -1123,7 +1242,7 @@ macro_rules! ele_parse {
},
$ntref,
Transition(
$ntref::from(
$ntref::NonPreemptableExpecting_(
EleParseCfg::default()
)
).incomplete().with_lookahead(
@ -1136,11 +1255,15 @@ macro_rules! ele_parse {
// An unexpected token when repeating ends
// repetition and should not result in an error.
(Expecting_(cfg), tok) if cfg.repeat => {
Transition(Done_).dead(tok)
}
(
Expecting_(cfg) | NonPreemptableExpecting_(cfg),
tok
) if cfg.repeat => Transition(Done_).dead(tok),
(Expecting_(cfg), XirfToken::Open(qname, span, depth)) => {
(
Expecting_(cfg) | NonPreemptableExpecting_(cfg),
XirfToken::Open(qname, span, depth)
) => {
use crate::xir::EleSpan;
Transition(RecoverEleIgnore_(cfg, qname, span, depth)).err(
// Use name span rather than full `OpenSpan`
@ -1199,6 +1322,10 @@ macro_rules! ele_parse {
// with the given QName as a preprocessing step,
// allowing them to reuse the existing element NT system.
$([text]($text:ident, $text_span:ident) => $text_map:expr,)?
// Optional _single_ NT to preempt arbitrary elements.
// Sum NTs can be used to preempt multiple elements.
$($pre_nt:ident)?
}
)?
$(
@ -1348,6 +1475,34 @@ macro_rules! ele_parse {
Transition(st).ok($text_map)
},
)?
// Preemption NT
$(
(
st,
XirfToken::Open(
qname,
ospan,
depth,
),
) if st.can_preempt_node() && $pre_nt::matches(qname) => {
stack.transfer_with_ret(
Transition(st),
Transition(
// Prevent recursing on this token.
$pre_nt::NonPreemptableExpecting_(
ele_parse!(@!ntref_cfg)
)
)
.incomplete()
.with_lookahead(XirfToken::Open(
qname,
ospan,
depth,
)),
)
},
)?
)?
// }

View File

@ -1733,6 +1733,367 @@ fn no_mixed_content_super() {
);
}
// Using the same superstate node preemption mechanism as `[text]` above,
// the superstate can also preempt opening element nodes.
// This is useful for things that can appear in _any_ context,
// such as template applications.
#[test]
fn superstate_preempt_element_open_sum() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
Root,
RootClose,
ChildA,
ChildAClose,
ChildB,
ChildBClose,
PreA(Span),
PreAClose,
PreB(Span),
PreBClose,
}
impl crate::parse::Object for Foo {}
const QN_ROOT: QName = QN_PACKAGE;
const QN_CHILDA: QName = QN_NAME;
const QN_CHILDB: QName = QN_DIM;
const QN_PRE_A: QName = QN_CLASSIFY;
const QN_PRE_B: QName = QN_EXPORT;
ele_parse! {
enum Sut;
type Object = Foo;
[super] {
// We can provide a _single_ NT to preempt.
// Using a sum type allows us to preempt multiple nodes.
PreAB
};
Root := QN_ROOT {
@ {} => Foo::Root,
/ => Foo::RootClose,
// Note how `AB` is _not_ a child here.
ChildA,
ChildB,
};
ChildA := QN_CHILDA {
@ {} => Foo::ChildA,
/ => Foo::ChildAClose,
};
ChildB := QN_CHILDB {
@ {} => Foo::ChildB,
/ => Foo::ChildBClose,
};
PreA := QN_PRE_A(_, ospan) {
@ {} => Foo::PreA(ospan.span()),
/ => Foo::PreAClose,
};
PreB := QN_PRE_B(_, ospan) {
@ {} => Foo::PreB(ospan.span()),
/ => Foo::PreBClose,
};
PreAB := (PreA | PreB);
}
let toks = vec![
// Yes, we can preempt at the root.
// This would allow,
// for example,
// template application as the root element,
// which was _not_ possible in the original TAME.
// Note that this would cause the root to be the preempted node
// itself,
// and so it would _take the place of_ the intended root.
// This isn't the place to discuss the merits of such a thing.
XirfToken::Open(QN_PRE_A, OpenSpan(S1, N), Depth(0)),
// Preempted nodes are parsed just as any other node,
// so control has been passed to `PreA`.
XirfToken::Close(None, CloseSpan::empty(S1), Depth(0)),
//
// Now let's open our _expected_ root,
// without preemption.
// Note that this is effectively another XML document,
// and XIRF would not allow this.
// But we're in control of the token stream here and so we're going
// to do it anyway for convenience.
XirfToken::Open(QN_ROOT, OpenSpan(S2, N), Depth(0)),
// At this point we are performing attribute parsing.
// Let's try to preempt;
// we'll want to ensure that attributes will be omitted before the
// preempted node,
// otherwise we'd be a sibling rather than a child.
XirfToken::Open(QN_PRE_B, OpenSpan(S3, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// Now let's return to normal parsing with the expected child.
XirfToken::Open(QN_CHILDA, OpenSpan(S4, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
// We're now expecting `ChildB`.
// Preempt again.
XirfToken::Open(QN_PRE_A, OpenSpan(S5, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
// Preemption should not have changed the state of `Root`,
// and so _we should still be expecting `ChildB`_.
XirfToken::Open(QN_CHILDB, OpenSpan(S6, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S6), Depth(1)),
// We ought to be able to preempt before the closing tag too.
XirfToken::Open(QN_PRE_B, OpenSpan(S7, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S7), Depth(1)),
// Adjacent,
// just to be sure that we allow the previous to close before we
// preempt again.
XirfToken::Open(QN_PRE_A, OpenSpan(S8, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S7), Depth(1)),
// This poor document has had enough.
// Let it close.
XirfToken::Close(Some(QN_ROOT), CloseSpan(S2, N), Depth(0)),
];
use Parsed::*;
assert_eq!(
Ok(vec![
Incomplete, // [PreA] A Open
Object(Foo::PreA(S1)), // [PreA@] A Close (>LA)
Object(Foo::PreAClose), // [PreA] A Close (<LA)
Incomplete, // [Root] Root Open
Object(Foo::Root), // [Root@] B Open (>LA)
Incomplete, // [PreB] B Open (<LA)
Object(Foo::PreB(S3)), // [PreB] B Open (<LA)
Object(Foo::PreBClose), // [PreB] B Close (<LA)
Incomplete, // [ChildA] ChildA Open
Object(Foo::ChildA), // [ChildA@] ChildA Close (<LA)
Object(Foo::ChildAClose), // [ChildA] ChildA Close (<LA)
Incomplete, // [PreA] A Open
Object(Foo::PreA(S5)), // [PreA@] A Close (>LA)
Object(Foo::PreAClose), // [PreA] A Close (<LA)
Incomplete, // [ChildB] ChildB Open
Object(Foo::ChildB), // [ChildB@] ChildB Close (<LA)
Object(Foo::ChildBClose), // [ChildB] ChildB Close (<LA)
Incomplete, // [PreB] B Open (<LA)
Object(Foo::PreB(S7)), // [PreB] B Open (<LA)
Object(Foo::PreBClose), // [PreB] B Close (<LA)
Incomplete, // [PreA] A Open (<LA)
Object(Foo::PreA(S8)), // [PreA] A Open (<LA)
Object(Foo::PreAClose), // [PreA] A Close (<LA)
Object(Foo::RootClose), // [Root] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}
// Superstate preemption as above,
// but using a normal NT instead of Sum NT.
#[test]
fn superstate_preempt_element_open_non_sum() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
Root,
RootClose,
ChildA,
ChildAClose,
ChildB,
ChildBClose,
PreA(Span),
PreAClose,
}
impl crate::parse::Object for Foo {}
const QN_ROOT: QName = QN_PACKAGE;
const QN_CHILDA: QName = QN_NAME;
const QN_CHILDB: QName = QN_DIM;
const QN_PRE_A: QName = QN_CLASSIFY;
ele_parse! {
enum Sut;
type Object = Foo;
[super] {
// We can provide a _single_ NT to preempt.
PreA
};
Root := QN_ROOT {
@ {} => Foo::Root,
/ => Foo::RootClose,
// Note how `AB` is _not_ a child here.
ChildA,
ChildB,
};
ChildA := QN_CHILDA {
@ {} => Foo::ChildA,
/ => Foo::ChildAClose,
};
ChildB := QN_CHILDB {
@ {} => Foo::ChildB,
/ => Foo::ChildBClose,
};
PreA := QN_PRE_A(_, ospan) {
@ {} => Foo::PreA(ospan.span()),
/ => Foo::PreAClose,
};
}
let toks = vec![
// Yes, we can preempt at the root.
// This would allow,
// for example,
// template application as the root element,
// which was _not_ possible in the original TAME.
// Note that this would cause the root to be the preempted node
// itself,
// and so it would _take the place of_ the intended root.
// This isn't the place to discuss the merits of such a thing.
XirfToken::Open(QN_PRE_A, OpenSpan(S1, N), Depth(0)),
// Preempted nodes are parsed just as any other node,
// so control has been passed to `PreA`.
XirfToken::Close(None, CloseSpan::empty(S1), Depth(0)),
//
// Now let's open our _expected_ root,
// without preemption.
// Note that this is effectively another XML document,
// and XIRF would not allow this.
// But we're in control of the token stream here and so we're going
// to do it anyway for convenience.
XirfToken::Open(QN_ROOT, OpenSpan(S2, N), Depth(0)),
// At this point we are performing attribute parsing.
// Let's try to preempt;
// we'll want to ensure that attributes will be omitted before the
// preempted node,
// otherwise we'd be a sibling rather than a child.
XirfToken::Open(QN_PRE_A, OpenSpan(S3, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// Now let's return to normal parsing with the expected child.
XirfToken::Open(QN_CHILDA, OpenSpan(S4, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
// We're now expecting `ChildB`.
// Preempt again.
XirfToken::Open(QN_PRE_A, OpenSpan(S5, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
// Preemption should not have changed the state of `Root`,
// and so _we should still be expecting `ChildB`_.
XirfToken::Open(QN_CHILDB, OpenSpan(S6, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S6), Depth(1)),
// Finally,
// we ought to be able to preempt before the closing tag too.
XirfToken::Open(QN_PRE_A, OpenSpan(S7, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S7), Depth(1)),
// This poor document has had enough.
// Let it close.
XirfToken::Close(Some(QN_ROOT), CloseSpan(S2, N), Depth(0)),
];
use Parsed::*;
assert_eq!(
Ok(vec![
Incomplete, // [PreA] A Open
Object(Foo::PreA(S1)), // [PreA@] A Close (>LA)
Object(Foo::PreAClose), // [PreA] A Close (<LA)
Incomplete, // [Root] Root Open
Object(Foo::Root), // [Root@] A Open (>LA)
Incomplete, // [PreA] A Open (<LA)
Object(Foo::PreA(S3)), // [PreA] A Open (<LA)
Object(Foo::PreAClose), // [PreA] A Close (<LA)
Incomplete, // [ChildA] ChildA Open
Object(Foo::ChildA), // [ChildA@] ChildA Close (<LA)
Object(Foo::ChildAClose), // [ChildA] ChildA Close (<LA)
Incomplete, // [PreA] A Open
Object(Foo::PreA(S5)), // [PreA@] A Close (>LA)
Object(Foo::PreAClose), // [PreA] A Close (<LA)
Incomplete, // [ChildB] ChildB Open
Object(Foo::ChildB), // [ChildB@] ChildB Close (<LA)
Object(Foo::ChildBClose), // [ChildB] ChildB Close (<LA)
Incomplete, // [PreA] A Open (<LA)
Object(Foo::PreA(S7)), // [PreA] A Open (<LA)
Object(Foo::PreAClose), // [PreA] A Close (<LA)
Object(Foo::RootClose), // [Root] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}
// Layers of preemption
// (e.g. nested template applications).
#[test]
fn superstate_preempt_element_open_nested() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
Root,
RootClose,
PreA(Span),
PreAClose(Span),
}
impl crate::parse::Object for Foo {}
const QN_ROOT: QName = QN_PACKAGE;
const QN_PRE_A: QName = QN_CLASSIFY;
ele_parse! {
enum Sut;
type Object = Foo;
[super] {
// We can provide a _single_ NT to preempt.
PreA
};
Root := QN_ROOT {
@ {} => Foo::Root,
/ => Foo::RootClose,
};
PreA := QN_PRE_A(_, ospan) {
@ {} => Foo::PreA(ospan.span()),
/(cspan) => Foo::PreAClose(cspan.span()),
};
}
let toks = vec![
XirfToken::Open(QN_ROOT, OpenSpan(S2, N), Depth(0)),
// First preemption
XirfToken::Open(QN_PRE_A, OpenSpan(S3, N), Depth(1)),
// And now a second preemption as a child of the first.
XirfToken::Open(QN_PRE_A, OpenSpan(S4, N), Depth(2)),
XirfToken::Close(None, CloseSpan::empty(S4), Depth(2)),
// Adjacent to ensure previous one closed.
XirfToken::Open(QN_PRE_A, OpenSpan(S5, N), Depth(2)),
XirfToken::Close(None, CloseSpan::empty(S5), Depth(2)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
XirfToken::Close(Some(QN_ROOT), CloseSpan(S2, N), Depth(0)),
];
use Parsed::*;
assert_eq!(
Ok(vec![
Incomplete, // [Root] Root Open
Object(Foo::Root), // [Root@] PreA Open (>LA)
Incomplete, // [PreA] PreA Open (<LA)
Object(Foo::PreA(S3)), // [PreA@] PreA Open (>LA)
Incomplete, // [PreA] PreA Open (<LA)
Object(Foo::PreA(S4)), // [PreA@] PreA Close (>LA)
Object(Foo::PreAClose(S4)), // [PreA] PreA Close (<LA)
Incomplete, // [PreA] PreA Open (<LA)
Object(Foo::PreA(S5)), // [PreA@] PreA Close (>LA)
Object(Foo::PreAClose(S5)), // [PreA] PreA Close (<LA)
Object(Foo::PreAClose(S3)), // [PreA] PreA Close
Object(Foo::RootClose), // [Root] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}
// If there are any parsers that still have work to do
// (any on the stack),
// we cannot consider ourselves to be done parsing.