tamer: xir::parse::ele: Nonterminal repetition (Kleene star)

This allows an element to be repeated by the parent NT.  The easiest way I
saw to implement this for now was to abuse the Context to provide a runtime
configuration that would allow the state machine to reset after it has
completed parsing.

This also influences error recovery, in that if we're expecting zero or more
of something, we cannot provide an error for an unexpected name, and instead
must emit a dead state so that the caller can determine what to do.

DEV-7145
main
Mike Gerwitz 2022-07-19 12:15:59 -04:00
parent e73c223a55
commit 1ec9c963fd
4 changed files with 646 additions and 41 deletions

View File

@ -478,6 +478,12 @@ pub mod context {
}
}
impl<T: Debug + Default> AsMut<Context<T>> for Context<T> {
fn as_mut(&mut self) -> &mut Context<T> {
self
}
}
impl<T: Debug + Default> Deref for Context<T> {
type Target = T;

View File

@ -309,7 +309,7 @@ macro_rules! attr_parse {
Self { ___ctx: (ele, _), .. } => {
write!(
f,
"parsing attributes for element {}",
"expecting attributes for element {}",
TtQuote::wrap(ele)
)
}

View File

@ -24,6 +24,19 @@ use crate::parse::ParseState;
/// A parser accepting a single element.
pub trait EleParseState: ParseState {}
/// Element parser configuration.
///
/// This configuration is set on a nonterminal reference using square
/// brackets
/// (e.g. `Foo[*]`).
#[derive(Debug, PartialEq, Default)]
pub struct EleParseCfg {
/// Whether to allow zero-or-more repetition for this element.
///
/// This is the Kleene star modifier (`*`).
repeat: bool,
}
#[macro_export]
macro_rules! ele_parse {
(type Object = $objty:ty; $($rest:tt)*) => {
@ -66,8 +79,10 @@ macro_rules! ele_parse {
$(/$(($close_span:ident))? => $closemap:expr,)?
// Nonterminal references are provided as a list.
// A configuration specifier can be provided,
// currently intended to support the Kleene star.
$(
$ntref:ident,
$ntref:ident $([$ntref_cfg:tt])?,
)*
) => {
ele_parse! {
@ -77,7 +92,7 @@ macro_rules! ele_parse {
<> {
$(
$ntref,
$ntref [$($ntref_cfg)?],
)*
}
@ -102,6 +117,19 @@ macro_rules! ele_parse {
crate::parse::ParseStatus::Object($close)
};
// NT[*] modifier.
(@!ntref_cfg *) => {
crate::parse::Context::from(crate::xir::parse::ele::EleParseCfg {
repeat: true,
..Default::default()
})
};
// No bracketed modifier following NT.
(@!ntref_cfg) => {
Self::Context::default()
};
(@!ele_dfn_body <$objty:ty> $nt:ident $qname:ident ($($open_span:ident)?)
// Attribute definition special form.
@ {
@ -121,7 +149,7 @@ macro_rules! ele_parse {
// Nonterminal references.
<> {
$(
$ntref:ident,
$ntref:ident [$($ntref_cfg:tt)?],
)*
}
@ -158,12 +186,20 @@ macro_rules! ele_parse {
// Recovery completed because end tag corresponding to the
// invalid element has been found.
RecoverEleIgnoreClosed_(crate::xir::QName, crate::xir::CloseSpan),
/// Recovery state ignoring all tokens when a `Close` is
/// expected.
///
/// This is token-agnostic---it
/// may be a child element,
/// but it may be text,
/// for example.
CloseRecoverIgnore_(Depth, crate::span::Span),
/// Parsing element attributes.
Attrs_([<$nt AttrsState_>]),
Attrs_(Depth, [<$nt AttrsState_>]),
$(
$ntref($ntref),
$ntref(Depth, $ntref),
)*
ExpectClose_(()),
ExpectClose_(Depth, ()),
/// Closing tag found and parsing of the element is
/// complete.
Closed_(crate::span::Span),
@ -202,20 +238,29 @@ macro_rules! ele_parse {
given = TtQuote::wrap(name),
expected = TtQuote::wrap($qname),
),
Self::Attrs_(sa) => std::fmt::Display::fmt(sa, f),
Self::ExpectClose_(_) => write!(
Self::CloseRecoverIgnore_(depth, _) => write!(
f,
"expecting closing element {}",
"attempting to recover by ignoring input \
until the expected end tag {expected} \
at depth {depth}",
expected = TtCloseXmlEle::wrap($qname),
),
Self::Attrs_(_, sa) => std::fmt::Display::fmt(sa, f),
Self::ExpectClose_(depth, _) => write!(
f,
"expecting closing element {} at depth {depth}",
TtCloseXmlEle::wrap($qname)
),
Self::Closed_(_) => write!(
f,
"element {} closed",
"done parsing element {}",
TtQuote::wrap($qname)
),
$(
Self::$ntref(st) => std::fmt::Display::fmt(st, f),
Self::$ntref(_, st) => {
std::fmt::Display::fmt(st, f)
},
)*
}
}
@ -223,7 +268,12 @@ macro_rules! ele_parse {
#[derive(Debug, PartialEq)]
enum [<$nt Error_>] {
/// An element was expected,
/// but the name of the element was unexpected.
UnexpectedEle_(crate::xir::QName, crate::span::Span),
/// Unexpected input while expecting an end tag for this
/// element.
CloseExpected_(crate::xir::flat::XirfToken),
Attrs_(crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>),
$(
$ntref([<$ntref Error_>]),
@ -258,14 +308,20 @@ macro_rules! ele_parse {
impl std::fmt::Display for [<$nt Error_>] {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use crate::{
fmt::DisplayWrapper,
xir::fmt::TtOpenXmlEle,
fmt::{DisplayWrapper, TtQuote},
xir::fmt::{TtOpenXmlEle, TtCloseXmlEle},
};
match self {
Self::UnexpectedEle_(name, _) => {
write!(f, "unexpected {}", TtOpenXmlEle::wrap(name))
}
Self::CloseExpected_(tok) => write!(
f,
"expected {}, but found {}",
TtCloseXmlEle::wrap($qname),
TtQuote::wrap(tok)
),
Self::Attrs_(e) => std::fmt::Display::fmt(e, f),
$(
Self::$ntref(e) => std::fmt::Display::fmt(e, f),
@ -284,11 +340,12 @@ macro_rules! ele_parse {
type Token = crate::xir::flat::XirfToken;
type Object = $objty;
type Error = [<$nt Error_>];
type Context = crate::parse::Context<crate::xir::parse::ele::EleParseCfg>;
fn parse_token(
self,
tok: Self::Token,
_: crate::parse::NoContext,
cfg: &mut Self::Context,
) -> crate::parse::TransitionResult<Self> {
use crate::{
parse::{EmptyContext, Transition, Transitionable},
@ -300,12 +357,24 @@ macro_rules! ele_parse {
use $nt::{
Attrs_, Expecting_, RecoverEleIgnore_,
RecoverEleIgnoreClosed_, ExpectClose_, Closed_
CloseRecoverIgnore_, RecoverEleIgnoreClosed_,
ExpectClose_, Closed_
};
match (self, tok) {
(Expecting_, XirfToken::Open(qname, span, ..)) if qname == $qname => {
Transition(Attrs_(parse_attrs(qname, span)))
(
Expecting_,
XirfToken::Open(qname, span, depth)
) if qname == $qname => {
Transition(Attrs_(depth, parse_attrs(qname, span)))
.incomplete()
},
(
Closed_(..),
XirfToken::Open(qname, span, depth)
) if cfg.repeat && qname == $qname => {
Transition(Attrs_(depth, parse_attrs(qname, span)))
.incomplete()
},
@ -322,15 +391,11 @@ macro_rules! ele_parse {
Transition(RecoverEleIgnoreClosed_(qname, span)).incomplete()
},
(st @ RecoverEleIgnore_(..), _) => {
Transition(st).incomplete()
},
(Attrs_(sa), tok) => {
(Attrs_(depth, sa), tok) => {
sa.delegate_until_obj(
tok,
EmptyContext,
|sa| Transition(Attrs_(sa)),
|sa| Transition(Attrs_(depth, sa)),
|| unreachable!("see ParseState::delegate_until_obj dead"),
|#[allow(unused_variables)] sa, attrs| {
let obj = match attrs {
@ -350,35 +415,53 @@ macro_rules! ele_parse {
},
};
Transition($ntfirst(Default::default())).ok(obj)
Transition($ntfirst(depth, Default::default()))
.ok(obj)
}
)
},
$(
($ntprev(st_inner), tok) => {
($ntprev(depth, st_inner), tok) => {
st_inner.delegate(
tok,
EmptyContext,
|si| Transition($ntprev(si)),
|| Transition($ntnext(Default::default()))
&mut ele_parse!(@!ntref_cfg $($ntref_cfg)?),
|si| Transition($ntprev(depth, si)),
|| Transition($ntnext(depth, Default::default()))
)
},
)*
// XIRF ensures proper nesting,
// so this must be our own closing tag.
(ExpectClose_(_), XirfToken::Close(_, span, _)) => {
// so we do not need to check the element name.
(
ExpectClose_(depth, ()) | CloseRecoverIgnore_(depth, _),
XirfToken::Close(_, span, tok_depth)
) if tok_depth == depth => {
$(
let $close_span = span;
)?
$closemap.transition(Closed_(span.tag_span()))
},
(ExpectClose_(depth, ()), unexpected_tok) => {
use crate::parse::Token;
Transition(
CloseRecoverIgnore_(depth, unexpected_tok.span())
).err([<$nt Error_>]::CloseExpected_(unexpected_tok))
}
// We're still in recovery,
// so this token gets thrown out.
(st @ (RecoverEleIgnore_(..) | CloseRecoverIgnore_(..)), _) => {
Transition(st).incomplete()
},
// TODO: Use `is_accepting` guard if we do not utilize
// exhaustiveness check.
(st @ (Closed_(..) | RecoverEleIgnoreClosed_(..)), tok) =>
Transition(st).dead(tok),
(st @ (Closed_(..) | RecoverEleIgnoreClosed_(..)), tok) => {
Transition(st).dead(tok)
}
todo => todo!("{todo:?}"),
}
@ -416,6 +499,9 @@ macro_rules! ele_parse {
$(
$ntref($ntref),
)*
/// Inner element has been parsed and is dead;
/// this indicates that this parser is also dead.
Done_,
}
impl std::fmt::Display for $nt {
@ -449,6 +535,8 @@ macro_rules! ele_parse {
$(
Self::$ntref(st) => std::fmt::Display::fmt(st, f),
)*
Self::Done_ => write!(f, "done parsing {expected}"),
}
}
}
@ -504,18 +592,22 @@ macro_rules! ele_parse {
type Token = crate::xir::flat::XirfToken;
type Object = $objty;
type Error = [<$nt Error_>];
type Context = crate::parse::Context<crate::xir::parse::ele::EleParseCfg>;
fn parse_token(
self,
tok: Self::Token,
_: crate::parse::NoContext,
cfg: &mut Self::Context,
) -> crate::parse::TransitionResult<Self> {
use crate::{
parse::{EmptyContext, Transition},
parse::Transition,
xir::flat::XirfToken,
};
use $nt::{Expecting_, RecoverEleIgnore_, RecoverEleIgnoreClosed_};
use $nt::{
Expecting_, RecoverEleIgnore_,
RecoverEleIgnoreClosed_, Done_
};
match (self, tok) {
$(
@ -525,13 +617,19 @@ macro_rules! ele_parse {
) if qname == $ntref::qname() => {
$ntref::default().delegate(
XirfToken::Open(qname, span, depth),
EmptyContext,
&mut Self::Context::default(),
|si| Transition(Self::$ntref(si)),
|| todo!("inner dead (should not happen here)"),
)
},
)*
// An unexpected token when repeating ends
// repetition and should not result in an error.
(Expecting_, tok) if cfg.repeat => {
Transition(Done_).dead(tok)
}
(Expecting_, XirfToken::Open(qname, span, depth)) => {
Transition(RecoverEleIgnore_(qname, span, depth)).err(
// Use name span rather than full `OpenSpan`
@ -558,24 +656,33 @@ macro_rules! ele_parse {
$(
(Self::$ntref(si), tok) => si.delegate(
tok,
EmptyContext,
&mut Self::Context::default(),
|si| Transition(Self::$ntref(si)),
|| todo!("inner dead"),
|| match cfg.repeat {
true => Transition(Expecting_),
false => Transition(Done_),
}
),
)*
(st @ Self::Done_, tok) => Transition(st).dead(tok),
todo => todo!("sum {todo:?}"),
}
}
fn is_accepting(&self) -> bool {
match self {
Self::RecoverEleIgnoreClosed_(..) => true,
Self::RecoverEleIgnoreClosed_(..) | Self::Done_ => true,
// Delegate entirely to the inner ParseState.
// It is desirable to maintain this state even after
// the inner parser is completed so that the inner
// state can accurately describe what took place.
// With that said,
// we will transition to `Done_` on an inner dead
// state,
// because of current `delegate` limitations.
$(
Self::$ntref(si) => si.is_accepting(),
)*

View File

@ -24,6 +24,13 @@
//! and so testing of that parsing is not duplicated here.
//! A brief visual inspection of the implementation of `ele_parse`
//! should suffice to verify this claim.
//!
//! [`Parser`] is configured to output a parse trace to stderr for tests,
//! which is visible when a test fails;
//! this aids in debugging and study.
//! To force it to output on a successful test to observe the behavior of
//! the system,
//! simply force the test to panic at the end.
use crate::{
convert::ExpectInto,
@ -44,6 +51,8 @@ const S3: Span = S2.offset_add(1).unwrap();
const S4: Span = S3.offset_add(1).unwrap();
const S5: Span = S4.offset_add(1).unwrap();
const S6: Span = S5.offset_add(1).unwrap();
const S7: Span = S6.offset_add(1).unwrap();
const S8: Span = S7.offset_add(1).unwrap();
// Some number (value does not matter).
const N: EleNameLen = 10;
@ -522,6 +531,118 @@ fn child_error_and_recovery() {
);
}
// This differs from the above test in that we encounter unexpected elements
// when we expected to find the end tag.
// This means that the element _name_ is not in error,
// but the fact that an element exists _at all_ is.
#[test]
fn child_error_and_recovery_at_close() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
Open,
Close,
}
impl Object for Foo {}
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo::Open,
/ => Foo::Close,
}
}
let unexpected_a = "unexpected a".unwrap_into();
let unexpected_b = "unexpected b".unwrap_into();
let span_a = OpenSpan(S2, N);
let span_b = OpenSpan(S4, N);
let toks = vec![
// The first token is the expected root.
XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
// Sut is now expecting either attributes
// (of which there are none),
// or a closing element.
// In either case,
// an opening element is entirely unexpected.
XirfToken::Open(unexpected_a, span_a, Depth(1)),
// And so we should ignore it up to this point.
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// Let's do the same thing again.
// It may be ideal to have another error exposed for each individual
// element that is unexpected,
// but for now the parser is kept simple and we simply continue
// to ignore elements until we reach the close.
XirfToken::Open(unexpected_b, span_b, Depth(1)),
// And so we should ignore it up to this point.
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
// Let's mix it up a bit with some text and make sure that is
// ignored too.
XirfToken::Text("unexpected text".unwrap_into(), S5),
// Having recovered from the above tokens,
// this will end parsing for `Sut` as expected.
XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S6, N), Depth(0)),
];
let mut sut = Sut::parse(toks.into_iter());
// The first token is expected,
// and we enter attribute parsing for `Sut`.
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Sut] Open 0
// The second token _will_ be unexpected,
// but we're parsing attributes for `Sut`,
// so we don't know that yet.
// Instead,
// the `Open` ends attribute parsing and yields a token of lookahead.
assert_eq!(
Some(Ok(Parsed::Object(Foo::Open))), // [Sut@] Open 1 (>LA)
sut.next()
);
// The token of lookahead (`Open`) is unexpected for `Sut`,
// which is expecting `Close`.
// The token should be consumed and returned in the error,
// _not_ produced as a token of lookahead,
// since we do not want to reprocess bad input.
assert_eq!(
// TODO: This references generated identifiers.
Some(Err(ParseError::StateError(SutError_::CloseExpected_(
XirfToken::Open(unexpected_a, span_a, Depth(1)),
)))),
sut.next(),
);
// The recovery state must not be in an accepting state,
// because we didn't close at the root depth yet.
let (mut sut, _) =
sut.finalize().expect_err("recovery must not be accepting");
// The next token is the self-closing `Close` for the unexpected opening
// tag.
// Since we are in recovery,
// it should be ignored.
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Sut!] Close 1
// We are still in recovery,
// and so we should still be ignoring tokens.
// It may be more ideal to throw individual errors per unexpected
// element
// (though doing so may be noisy if there is a lot),
// but for now the parser is kept simple.
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Sut!] Open 1
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Sut!] Close 1
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Sut!] Text
// Having recovered from the error,
// we should now be able to close successfully.
assert_eq!(Some(Ok(Parsed::Object(Foo::Close))), sut.next());
sut.finalize()
.expect("recovery must complete in an accepting state");
}
// A nonterminal of the form `(A | ... | Z)` should accept the element of
// any of the inner nonterminals.
#[test]
@ -581,6 +702,76 @@ fn sum_nonterminal_accepts_any_valid_element() {
});
}
// Compose sum NTs with a parent element.
#[test]
fn sum_nonterminal_as_child_element() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
Open(QName),
Close(QName),
}
impl crate::parse::Object for Foo {}
// QNames don't matter as long as they are unique.
const QN_ROOT: QName = QN_PACKAGE;
const QN_A: QName = QN_PACKAGE;
const QN_B: QName = QN_CLASSIFY;
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo::Open(QN_ROOT),
/ => Foo::Close(QN_ROOT),
// A|B followed by a B.
AB,
B,
}
AB := (A | B);
A := QN_A {
@ {} => Foo::Open(QN_A),
/ => Foo::Close(QN_A),
}
B := QN_B {
@ {} => Foo::Open(QN_B),
/ => Foo::Close(QN_B),
}
}
let toks = vec![
XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
// A
XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// B
XirfToken::Open(QN_B, OpenSpan(S3, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
XirfToken::Close(Some(QN_ROOT), CloseSpan(S5, N), Depth(0)),
];
use Parsed::*;
assert_eq!(
Ok(vec![
Incomplete, // [Sut] Root Open
Object(Foo::Open(QN_ROOT)), // [Sut@] A Open (>LA)
Incomplete, // [A] A Open (<LA)
Object(Foo::Open(QN_A)), // [A@] A Close (>LA)
Object(Foo::Close(QN_A)), // [A] A Close (<LA)
Incomplete, // [B] B Open
Object(Foo::Open(QN_B)), // [B@] B Close (>LA)
Object(Foo::Close(QN_B)), // [B] B Close (<LA)
Object(Foo::Close(QN_ROOT)), // [Sut] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}
#[test]
fn sum_nonterminal_error_recovery() {
#[derive(Debug, PartialEq, Eq)]
@ -669,3 +860,304 @@ fn sum_nonterminal_error_recovery() {
sut.finalize()
.expect("recovery must complete in an accepting state");
}
#[test]
fn child_repetition() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
RootOpen,
ChildOpen(QName),
ChildClose(QName),
RootClose,
}
impl crate::parse::Object for Foo {}
const QN_ROOT: QName = QN_PACKAGE;
const QN_A: QName = QN_DIM;
const QN_B: QName = QN_CLASSIFY;
const QN_C: QName = QN_EXPORT;
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo::RootOpen,
/ => Foo::RootClose,
// Two adjacent repeating followed by a non-repeating.
// While there's nothing inherently concerning here,
// this is just meant to test both types of following states.
ChildA[*],
ChildB[*],
ChildC,
}
ChildA := QN_A {
@ {} => Foo::ChildOpen(QN_A),
/ => Foo::ChildClose(QN_A),
}
ChildB := QN_B {
@ {} => Foo::ChildOpen(QN_B),
/ => Foo::ChildClose(QN_B),
}
ChildC := QN_C {
@ {} => Foo::ChildOpen(QN_C),
/ => Foo::ChildClose(QN_C),
}
}
let toks = vec![
XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
// ChildA (1)
XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// ChildA (2)
XirfToken::Open(QN_A, OpenSpan(S3, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
// ChildB (1)
XirfToken::Open(QN_B, OpenSpan(S4, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
// ChildB (2)
XirfToken::Open(QN_B, OpenSpan(S5, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S6), Depth(1)),
// ChildC (only)
XirfToken::Open(QN_C, OpenSpan(S6, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S7), Depth(1)),
XirfToken::Close(Some(QN_ROOT), CloseSpan(S8, N), Depth(0)),
];
use Parsed::*;
// Note that we cannot observe the handoff after the repeating parsers
// below because Parser immediately recur.
// For example,
// when ChildA has been closed,
// it awaits the next token to see if it should reset or if it should
// emit a dead state.
// If it receives `QN_A`,
// then it'll reset.
// However,
// `QN_B` will cause it to emit `dead` with the `Open` token as
// lookahead,
// which then gets turned into `Incomplete` with lookahead by
// `ParseState::delegate`,
// which then causes `Parser` to immediate recur,
// masking the `Incomplete` entirely.
// And so what we see below is a cleaner,
// albeit not entirely honest,
// script.
//
// (Also please note that the above description is true as of the time
// of writing,
// but it's possible that this comment has not been updated since
// then.)
assert_eq!(
Ok(vec![
Incomplete, // [Sut] Root Open
Object(Foo::RootOpen), // [Sut@] ChildA Open (>LA)
Incomplete, // [ChildA] ChildA Open (<LA)
Object(Foo::ChildOpen(QN_A)), // [ChildA@] ChildA Close (>LA)
Object(Foo::ChildClose(QN_A)), // [ChildA] ChildA Close (<LA)
Incomplete, // [ChildA] ChildA Open (<LA)
Object(Foo::ChildOpen(QN_A)), // [ChildA@] ChildA Close (>LA)
Object(Foo::ChildClose(QN_A)), // [ChildA] ChildA Close (<LA)
Incomplete, // [ChildB] ChildB Open (<LA)
Object(Foo::ChildOpen(QN_B)), // [ChildB@] ChildB Close (>LA)
Object(Foo::ChildClose(QN_B)), // [ChildB] ChildB Close (<LA)
Incomplete, // [ChildB] ChildB Open (<LA)
Object(Foo::ChildOpen(QN_B)), // [ChildB@] ChildB Close (>LA)
Object(Foo::ChildClose(QN_B)), // [ChildB] ChildB Close (<LA)
Incomplete, // [ChildC] ChildC Open (<LA)
Object(Foo::ChildOpen(QN_C)), // [ChildC@] ChildC Close (>LA)
Object(Foo::ChildClose(QN_C)), // [ChildC] ChildC Close (<LA)
Object(Foo::RootClose), // [Sut] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}
#[test]
fn child_repetition_invalid_tok_dead() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
RootOpen,
ChildOpen,
ChildClose,
RootClose,
}
impl crate::parse::Object for Foo {}
// QNames don't matter as long as they are unique.
const QN_ROOT: QName = QN_PACKAGE;
const QN_CHILD: QName = QN_DIM;
let unexpected: QName = "unexpected".unwrap_into();
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo::RootOpen,
/ => Foo::RootClose,
Child[*],
}
Child := QN_CHILD {
@ {} => Foo::ChildOpen,
/ => Foo::ChildClose,
}
}
let toks = vec![
XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
// Child (success)
XirfToken::Open(QN_CHILD, OpenSpan(S2, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// Repeat (unexpected)
XirfToken::Open(unexpected, OpenSpan(S2, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
XirfToken::Close(Some(QN_ROOT), CloseSpan(S8, N), Depth(0)),
];
let mut sut = Sut::parse(toks.into_iter());
use Parsed::*;
let mut next = || sut.next();
assert_eq!(next(), Some(Ok(Incomplete))); // [Sut] Open
assert_eq!(next(), Some(Ok(Object(Foo::RootOpen)))); // [Sut@] Open >
assert_eq!(next(), Some(Ok(Incomplete))); // [Child] Open <
assert_eq!(next(), Some(Ok(Object(Foo::ChildOpen)))); // [Child@] Close >
assert_eq!(next(), Some(Ok(Object(Foo::ChildClose)))); // [Child] Close <
// Intuitively,
// we may want to enter recovery and ignore the element.
// But the problem is that we need to emit a dead state so that other
// parsers can handle the input,
// because it may simply be the case that our repetition is over.
//
// Given that dead state and token of lookahead,
// `Parser` will immediately recurse to re-process the erroneous
// `Open`.
// Since the next token expected after the `Child` NT is `Close`,
// this will result in an error and trigger recovery _on `Sut`_,
// which will ignore the erroneous `Open`.
assert_eq!(
next(),
// TODO: This references generated identifiers.
Some(Err(ParseError::StateError(SutError_::CloseExpected_(
XirfToken::Open(unexpected, OpenSpan(S2, N), Depth(1)),
)))),
);
// This next token is also ignored as part of recovery.
assert_eq!(next(), Some(Ok(Incomplete))); // [Sut] Child Close
// Finally,
// `Sut` encounters its expected `Close` and ends recovery.
assert_eq!(next(), Some(Ok(Object(Foo::RootClose)))); // [Sut] Close
sut.finalize()
.expect("recovery must complete in an accepting state");
}
// Repetition on a nonterminal of the form `(A | ... | Z)` will allow any
// number of `A` through `Z` in any order.
// This is similar to the above test.
#[test]
fn sum_repetition() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
Open(QName),
Close(QName),
}
impl crate::parse::Object for Foo {}
const QN_ROOT: QName = QN_PACKAGE;
const QN_A: QName = QN_DIM;
const QN_B: QName = QN_CLASSIFY;
const QN_C: QName = QN_EXPORT;
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo::Open(QN_ROOT),
/ => Foo::Close(QN_ROOT),
// A|B|C in any order,
// any number of times.
ABC[*],
}
ABC := (A | B | C );
A := QN_A {
@ {} => Foo::Open(QN_A),
/ => Foo::Close(QN_A),
}
B := QN_B {
@ {} => Foo::Open(QN_B),
/ => Foo::Close(QN_B),
}
C := QN_C {
@ {} => Foo::Open(QN_C),
/ => Foo::Close(QN_C),
}
}
let toks = vec![
XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
// A (1)
XirfToken::Open(QN_A, OpenSpan(S1, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S2), Depth(1)),
// A (2)
XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// B (1)
XirfToken::Open(QN_B, OpenSpan(S3, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
// C (1)
XirfToken::Open(QN_C, OpenSpan(S4, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
// B (2)
XirfToken::Open(QN_B, OpenSpan(S5, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S6), Depth(1)),
XirfToken::Close(Some(QN_ROOT), CloseSpan(S7, N), Depth(0)),
];
use Parsed::*;
// See notes on preceding repetition test `child_repetition` regarding
// the suppression of `Incomplete` for dead states.
assert_eq!(
Ok(vec![
Incomplete, // [Sut] Root Open
Object(Foo::Open(QN_ROOT)), // [Sut@] A Open (>LA)
Incomplete, // [A] A Open (<LA)
Object(Foo::Open(QN_A)), // [A@] A Close (>LA)
Object(Foo::Close(QN_A)), // [A] A Close (<LA)
Incomplete, // [A] A Open
Object(Foo::Open(QN_A)), // [A@] A Close (>LA)
Object(Foo::Close(QN_A)), // [A] A Close (<LA)
Incomplete, // [B] B Open
Object(Foo::Open(QN_B)), // [B@] B Close (>LA)
Object(Foo::Close(QN_B)), // [B] B Close (<LA)
Incomplete, // [C] C Open
Object(Foo::Open(QN_C)), // [C@] C Close (>LA)
Object(Foo::Close(QN_C)), // [C] C Close (<LA)
Incomplete, // [B] B Open
Object(Foo::Open(QN_B)), // [B@] B Close (>LA)
Object(Foo::Close(QN_B)), // [B] B Close (<LA)
Object(Foo::Close(QN_ROOT)), // [Sut] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}