tamer: xir::parse::ele: Initial element parser generator concept

This begins generating parsers that are capable of parsing elements.  I need
to move on, so this abstraction isn't going to go as far as it could, but
let's see where it takes me.

This was the work that required the recent lookahead changes, which has been
detailed in previous commits.

This initial support is basic, but robust.  It supports parsing elements
with attributes and children, but it does not yet support the equivalent of
the Kleene star (`*`).  Such support will likely be added by supporting
parsers that are able to recurse on their own definition in tail position,
which will also require supporting parsers that do not add to the stack.

This generates parsers that, like all the other parsers, use enums to
provide a typed stack.  Stitched parsers produce a nested stack that is
always bounded in size.  Fortunately, expressions---which can nest
deeply---do not need to maintain ancestor context on the stack, and so this
should work fine; we can get away with this because XIRF ensures proper
nesting for us.  Statements that _do_ need to maintain such context are not
nested.

This also does not yet support emitting an object on closing tag, which
will be necessary for NIR, which will be a streaming IR that is "near" to
the source XML in structure.  This will then be used to lower into AIR for
the ASG, which gives structure needed for further analysis.

More information to come; I just want to get this committed to serve as a
mental synchronization point and clear my head, since I've been sitting on
these changes for so long and have to keep stashing them as I tumble down
rabbit holes covered in yak hair.

DEV-7145
main
Mike Gerwitz 2022-07-13 13:55:32 -04:00
parent c9b3b84f90
commit 73efc59582
11 changed files with 923 additions and 33 deletions

View File

@ -181,6 +181,13 @@ pub type Delim<const LEFT: &'static str, const RIGHT: &'static str, W> =
/// so this is _not_ safe against format escapes.
pub type Tt<W> = Delim<"`", "`", W>;
/// Quote text that would conventionally be delimited in a teletypewriter
/// font.
///
/// This is a more terse alternative to [`Tt`] when formatter composition is
/// unneeded.
pub type TtQuote = Tt<Raw>;
/// Prefix with a single space.
pub type Sp<W> = Prefix<" ", W>;

View File

@ -255,6 +255,57 @@ pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
}
}
/// Delegate parsing from a compatible, stitched [`ParseState`] `SP`
/// until this parser yields an [`Object`].
///
/// This method is appropriate for [`ParseState`]s that yield an object
/// after they have completed parsing.
/// It is not suitable for [`ParseState`]s that yield multiple objects
/// during parsing,
/// which typically indicate completion with a dead state
/// (see [`ParseState::delegate`]).
///
/// _This method is still under development and has outstanding TODOs._
fn delegate_until_obj<SP, C>(
self,
tok: <Self as ParseState>::Token,
mut context: C,
into: impl FnOnce(Self) -> Transition<SP>,
_dead: impl FnOnce() -> Transition<SP>,
objf: impl FnOnce(<Self as ParseState>::Object) -> TransitionResult<SP>,
) -> TransitionResult<SP>
where
Self: PartiallyStitchableParseState<SP>,
C: AsMut<<Self as ParseState>::Context>,
{
use ParseStatus::{Incomplete, Object as Obj};
let TransitionResult(Transition(newst), data) =
self.parse_token(tok, context.as_mut());
match data {
TransitionData::Dead(Lookahead(_lookahead)) => {
// Or restrict this to certain types of ParseState
todo!("expecting object, so what should we do on Dead?")
}
TransitionData::Result(Ok(Obj(obj)), lookahead) => {
objf(obj).maybe_with_lookahead(lookahead)
}
TransitionData::Result(result, lookahead) => TransitionResult(
into(newst),
TransitionData::Result(
match result {
Ok(_) => Ok(Incomplete),
Err(e) => Err(e.into()),
},
lookahead,
),
),
}
}
/// Delegate parsing from a compatible, stitched [`ParseState`] `SP`
/// while consuming objects during `SP` state transition.
///

View File

@ -81,6 +81,25 @@ impl<S: ParseState> TransitionResult<S> {
}
}
}
/// Possibly indicate that this transition includes a single token of
/// lookahead.
///
/// If the argument is [`None`],
/// this returns `self` unchanged.
///
/// This is useful when working with the output of other parsers.
/// See [`with_lookahead`](TransitionResult::with_lookahead) for more
/// information.
pub(in super::super) fn maybe_with_lookahead(
self,
lookahead: Option<Lookahead<S::Token>>,
) -> Self {
match lookahead {
Some(Lookahead(lookahead)) => self.with_lookahead(lookahead),
None => self,
}
}
}
/// Token to use as a lookahead token in place of the next token from the

View File

@ -439,6 +439,7 @@ pub mod st {
L_BOOLEAN: cid "boolean",
L_CGEN: cid "cgen",
L_CLASS: cid "class",
L_CLASSIFY: cid "classify",
L_CONST: cid "const",
L_DEP: cid "dep",
L_DESC: cid "desc",
@ -447,6 +448,7 @@ pub mod st {
L_ELIG_CLASS_YIELDS: tid "elig-class-yields",
L_EMPTY: cid "empty",
L_EXEC: cid "exec",
L_EXPORT: cid "export",
L_EXTERN: cid "extern",
L_FALSE: cid "false",
L_FLOAT: cid "float",
@ -482,10 +484,10 @@ pub mod st {
L_SRC: cid "src",
L_STATIC: cid "static",
L_SYM: cid "sym",
L_SYM_DEPS: cid "sym-deps",
L_SYM_DEP: cid "sym-dep",
L_SYM_REF: cid "sym-ref",
L_SYMTABLE: cid "symtable",
L_SYM_DEP: cid "sym-dep",
L_SYM_DEPS: cid "sym-deps",
L_SYM_REF: cid "sym-ref",
L_TITLE: cid "title",
L_TPL: cid "tpl",
L_TRUE: cid "true",

View File

@ -147,6 +147,16 @@ impl Attr {
pub fn value(&self) -> SymbolId {
self.1
}
/// [`AttrSpan`] for this attribute.
///
/// The attribute span allows deriving a number of different spans;
/// see [`AttrSpan`] for more information.
pub fn attr_span(&self) -> &AttrSpan {
match self {
Attr(.., span) => span,
}
}
}
impl Token for Attr {

View File

@ -19,10 +19,17 @@
//! XIR formatting types for use with [`crate::fmt`]
use crate::fmt::{AndQualConjList, Prefix, Raw, Tt};
use crate::fmt::{AndQualConjList, Delim, Prefix, Raw, Tt};
/// Denote an XML attribute by prefixing the value with `@`.
pub type XmlAttr = Prefix<"@", Raw>;
/// A list of XML attributes [`Tt`]-quoted.
pub type XmlAttrList = AndQualConjList<"attribute", "attributes", Tt<XmlAttr>>;
/// Opening tag for XML element.
pub type OpenXmlEle = Delim<"<", ">", Raw>;
/// Opening tag for XML element as teletypewriter
/// (for use in sentences).
pub type TtOpenXmlEle = Tt<OpenXmlEle>;

View File

@ -23,5 +23,6 @@
//! processing of the input XML to ensure well-formedness.
mod attr;
mod ele;
pub use attr::{AttrParseError, AttrParseState};

View File

@ -155,6 +155,16 @@ pub trait AttrParseState: ParseState {
fn required_missing(&self) -> Vec<QName>;
}
/// Parse attributes for the given element.
///
/// This function is useful when the type of [`AttrParseState`] `S` can be
/// inferred,
/// so that the expression reads more like natural language.
#[cfg(test)] // currently only used by tests; remove when ready
pub fn parse_attrs<S: AttrParseState>(ele: QName, span: Span) -> S {
S::with_element(ele, span)
}
#[macro_export]
macro_rules! attr_parse {
($(#[$sattr:meta])*
@ -165,22 +175,12 @@ macro_rules! attr_parse {
)*
}
) => {
use crate::{
parse,
xir::{
attr,
parse::{AttrParseError, AttrParseState},
flat,
QName,
}
};
$(
// This provides a nice error on $ty itself at the call site,
// rather than relying on `Into::into` to cause the error
// later on,
// which places the error inside the macro definition.
assert_impl_all!($ty: From<attr::Attr>);
assert_impl_all!($ty: From<crate::xir::attr::Attr>);
)*
#[doc=concat!("Parser producing [`", stringify!($struct_name), "`].")]
@ -196,7 +196,7 @@ macro_rules! attr_parse {
#[derive(Debug, PartialEq, Eq)]
$vis struct $state_name {
#[doc(hidden)]
___ctx: (QName, Span),
___ctx: (crate::xir::QName, Span),
#[doc(hidden)]
___done: bool,
$(
@ -204,8 +204,8 @@ macro_rules! attr_parse {
)*
}
impl AttrParseState for $state_name {
fn with_element(ele: QName, span: Span) -> Self {
impl crate::xir::parse::AttrParseState for $state_name {
fn with_element(ele: crate::xir::QName, span: Span) -> Self {
Self {
___ctx: (ele, span),
___done: false,
@ -215,7 +215,7 @@ macro_rules! attr_parse {
}
}
fn element_name(&self) -> QName {
fn element_name(&self) -> crate::xir::QName {
match self.___ctx {
(name, _) => name,
}
@ -229,12 +229,15 @@ macro_rules! attr_parse {
fn finalize_attr(
self,
) -> Result<Self::Object, AttrParseError<Self>> {
) -> Result<
Self::Object,
crate::xir::parse::AttrParseError<Self>,
> {
// Validate required fields before we start moving data.
$(
attr_parse!(@if_missing_req $($fmod)? self.$field {
$crate::attr_parse!(@if_missing_req $($fmod)? self.$field {
return Err(
AttrParseError::MissingRequired(
crate::xir::parse::AttrParseError::MissingRequired(
self,
)
)
@ -243,7 +246,7 @@ macro_rules! attr_parse {
let obj = $struct_name {
$(
$field: attr_parse!(
$field: $crate::attr_parse!(
@maybe_value $($fmod)? self.$field
),
)*
@ -252,12 +255,12 @@ macro_rules! attr_parse {
Ok(obj)
}
fn required_missing(&self) -> Vec<QName> {
fn required_missing(&self) -> Vec<crate::xir::QName> {
#[allow(unused_mut)]
let mut missing = vec![];
$(
attr_parse!(@if_missing_req $($fmod)? self.$field {
$crate::attr_parse!(@if_missing_req $($fmod)? self.$field {
missing.push($qname);
});
)*
@ -267,7 +270,9 @@ macro_rules! attr_parse {
}
impl $state_name {
fn done_with_element(ele: QName, span: Span) -> Self {
fn done_with_element(ele: crate::xir::QName, span: Span) -> Self {
use crate::xir::parse::attr::AttrParseState;
let mut new = Self::with_element(ele, span);
new.___done = true;
new
@ -289,9 +294,9 @@ macro_rules! attr_parse {
)*
}
impl parse::Object for $struct_name {}
impl crate::parse::Object for $struct_name {}
impl Display for $state_name {
impl std::fmt::Display for $state_name {
/// Additional error context shown in diagnostic messages for
/// certain variants of [`ParseError`].
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
@ -300,18 +305,24 @@ macro_rules! attr_parse {
}
}
impl parse::ParseState for $state_name {
type Token = flat::XirfToken;
impl crate::parse::ParseState for $state_name {
type Token = crate::xir::flat::XirfToken;
type Object = $struct_name;
type Error = AttrParseError<Self>;
type Error = crate::xir::parse::AttrParseError<Self>;
fn parse_token(
#[allow(unused_mut)]
mut self,
tok: Self::Token,
_: parse::NoContext,
_: crate::parse::NoContext,
) -> crate::parse::TransitionResult<Self> {
use crate::parse::{Transition, Transitionable, ParseStatus};
use crate::xir::attr::Attr;
use crate::xir::{
flat,
parse::{AttrParseError, AttrParseState}
};
#[allow(unused_imports)]
use crate::xir::attr::Attr; // unused if no attrs
match tok {
$(

View File

@ -0,0 +1,348 @@
// XIR element parser generator
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Element parser generator for parsing of [XIRF](super::super::flat).
#[macro_export]
macro_rules! ele_parse {
(type Object = $objty:ty; $($rest:tt)*) => {
ele_parse!(@!nonterm_decl <$objty> $($rest)*)
};
(@!nonterm_decl <$objty:ty> $nt:ident := $($rest:tt)*) => {
ele_parse!(@!nonterm_def <$objty> $nt $($rest)*);
};
(@!nonterm_def <$objty:ty> $nt:ident $qname:ident { $($matches:tt)* } $($rest:tt)*) => {
ele_parse!(@!ele_expand_body <$objty> $nt $qname $($matches)*);
ele_parse! {
type Object = $objty;
$($rest)*
}
};
(@!nonterm_def <$objty:ty> $nt:ident ($ntreffirst:ident $(| $ntref:ident)+), $($rest:tt)*) => {
ele_parse!(@!ele_dfn_sum $nt [$ntfirst $($nt)*]);
ele_parse! {
$($rest)*
}
};
(@!nonterm_decl <$objty:ty>) => {};
// Expand the provided data to a more verbose form that provides the
// context necessary for state transitions.
(@!ele_expand_body <$objty:ty> $nt:ident $qname:ident
@ { $($attrbody:tt)* } => $attrmap:expr,
// Nonterminal references are provided as a list.
$(
$ntref:ident,
)*
) => {
ele_parse! {
@!ele_dfn_body <$objty> $nt $qname
@ { $($attrbody)* } => $attrmap,
<> {
$(
$ntref,
)*
}
// Generate state transitions of the form `(S) -> (S')`.
-> {
@ ->
$(
($nt::$ntref),
($nt::$ntref) ->
)* ($nt::ExpectClose_),
}
}
};
(@!ele_dfn_body <$objty:ty> $nt:ident $qname:ident
// Attribute definition special form.
@ {
// We must lightly parse attributes here so that we can retrieve
// the field identifiers that may be later used as bindings in
// `$attrmap`.
$(
$(#[$fattr:meta])*
$field:ident: ($fmatch:tt) => $fty:ty,
)*
} => $attrmap:expr,
// Nonterminal references.
<> {
$(
$ntref:ident,
)*
}
-> {
@ -> ($ntfirst:path),
$(
($ntprev:path) -> ($ntnext:path),
)*
}
) => {
// TODO
paste::paste! {
crate::attr_parse! {
struct [<$nt AttrsState_>] -> [<$nt Attrs_>] {
$(
$(#[$fattr])*
$field: ($fmatch) => $fty,
)*
}
}
#[doc=concat!("Parser for element [`", stringify!($qname), "`].")]
#[derive(Debug, PartialEq, Eq, Default)]
enum $nt {
#[doc=concat!(
"Expecting opening tag for element [`",
stringify!($qname),
"`]."
)]
#[default]
Expecting_,
/// Recovery state ignoring all remaining tokens for this
/// element.
RecoverEleIgnore_(crate::xir::QName, crate::xir::OpenSpan, Depth),
RecoverEleIgnoreClosed_(crate::xir::QName, crate::xir::CloseSpan),
/// Parsing element attributes.
Attrs_([<$nt AttrsState_>]),
$(
$ntref($ntref),
)*
ExpectClose_(()),
/// Closing tag found and parsing of the element is
/// complete.
Closed_(crate::span::Span),
}
impl std::fmt::Display for $nt {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use crate::{
fmt::{DisplayWrapper, TtQuote},
xir::fmt::TtOpenXmlEle,
};
match self {
Self::Expecting_ => write!(
f,
"expecting opening tag {}",
TtOpenXmlEle::wrap($qname),
),
Self::RecoverEleIgnore_(name, ..) => write!(
f,
"attempting to recover by ignoring element \
with unexpected name {given} \
(expected {expected})",
given = TtQuote::wrap(name),
expected = TtQuote::wrap($qname),
),
Self::Attrs_(sa) => todo!("Attrs_ Display: {sa:?}"),
Self::Closed_(_) => write!(
f,
"element {} closed",
TtQuote::wrap($qname)
),
$(
Self::$ntref(st) => std::fmt::Display::fmt(st, f),
)*
todo => todo!("other Display: {todo:?}"),
}
}
}
#[derive(Debug, PartialEq)]
enum [<$nt Error_>] {
UnexpectedEle_(crate::xir::QName, crate::span::Span),
Attrs_(crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>),
$(
$ntref([<$ntref Error_>]),
)*
}
impl From<crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>>
for [<$nt Error_>]
{
fn from(
e: crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>
) -> Self {
[<$nt Error_>]::Attrs_(e)
}
}
$(
impl From<[<$ntref Error_>]> for [<$nt Error_>] {
fn from(e: [<$ntref Error_>]) -> Self {
[<$nt Error_>]::$ntref(e)
}
}
)*
impl std::error::Error for [<$nt Error_>] {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
// TODO
None
}
}
impl std::fmt::Display for [<$nt Error_>] {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use crate::{
fmt::DisplayWrapper,
xir::fmt::TtOpenXmlEle,
};
match self {
Self::UnexpectedEle_(name, _) => {
write!(f, "unexpected {}", TtOpenXmlEle::wrap(name))
}
Self::Attrs_(e) => std::fmt::Display::fmt(e, f),
$(
Self::$ntref(e) => std::fmt::Display::fmt(e, f),
)*
}
}
}
impl crate::diagnose::Diagnostic for [<$nt Error_>] {
fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
todo!()
}
}
impl crate::parse::ParseState for $nt {
type Token = crate::xir::flat::XirfToken;
type Object = $objty;
type Error = [<$nt Error_>];
fn parse_token(
self,
tok: Self::Token,
_: crate::parse::NoContext,
) -> crate::parse::TransitionResult<Self> {
use crate::{
parse::{EmptyContext, Transition},
xir::{
flat::XirfToken,
parse::attr::parse_attrs,
},
};
use $nt::{
Attrs_, Expecting_, RecoverEleIgnore_,
RecoverEleIgnoreClosed_, ExpectClose_, Closed_
};
match (self, tok) {
(Expecting_, XirfToken::Open(qname, span, ..)) if qname == $qname => {
Transition(Attrs_(parse_attrs(qname, span.tag_span())))
.incomplete()
},
(Expecting_, XirfToken::Open(qname, span, depth)) => {
Transition(RecoverEleIgnore_(qname, span, depth)).err(
[<$nt Error_>]::UnexpectedEle_(qname, span.name_span())
)
},
(
RecoverEleIgnore_(qname, _, depth_open),
XirfToken::Close(_, span, depth_close)
) if depth_open == depth_close => {
Transition(RecoverEleIgnoreClosed_(qname, span)).incomplete()
},
(st @ RecoverEleIgnore_(..), _) => {
Transition(st).incomplete()
},
(Attrs_(sa), tok) => {
sa.delegate_until_obj(
tok,
EmptyContext,
|sa| Transition(Attrs_(sa)),
|| unreachable!("see ParseState::delegate_until_obj dead"),
|attrs| {
let obj = match attrs {
[<$nt Attrs_>] {
$(
$field,
)*
} => $attrmap,
};
Transition($ntfirst(Default::default())).ok(obj)
}
)
},
$(
($ntprev(st_inner), tok) => {
st_inner.delegate(
tok,
EmptyContext,
|si| Transition($ntprev(si)),
|| Transition($ntnext(Default::default()))
)
},
)*
// XIRF ensures proper nesting,
// so this must be our own closing tag.
(ExpectClose_(_), XirfToken::Close(_, span, _)) =>
Transition(Closed_(span.tag_span())).incomplete(),
// TODO: Use `is_accepting` guard if we do not utilize
// exhaustiveness check.
(st @ (Closed_(..) | RecoverEleIgnoreClosed_(..)), tok) =>
Transition(st).dead(tok),
todo => todo!("{todo:?}"),
}
}
fn is_accepting(&self) -> bool {
matches!(*self, Self::Closed_(..) | Self::RecoverEleIgnoreClosed_(..))
}
}
}
};
(@!ele_dfn_sum $nt:ident [$($ntref:ident)*]) => {
#[derive(Debug, PartialEq, Eq)]
enum $nt {
$(
$ntref($ntref),
)*
}
};
}
#[cfg(test)]
mod test;

View File

@ -0,0 +1,431 @@
// XIR element parser generator tests
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Element parser generator tests.
//!
//! It is expected to be understood for these tests that `ele_parse`
//! directly invokes `attr_parse` to perform all attribute parsing,
//! and so testing of that parsing is not duplicated here.
//! A brief visual inspection of the implementation of `ele_parse`
//! should suffice to verify this claim.
use crate::{
convert::ExpectInto,
parse::{Object, ParseError, ParseState, Parsed},
span::{Span, DUMMY_SPAN},
sym::SymbolId,
xir::{
attr::{Attr, AttrSpan},
flat::{Depth, XirfToken},
st::qname::*,
CloseSpan, EleNameLen, EleSpan, OpenSpan,
},
};
const S1: Span = DUMMY_SPAN;
const S2: Span = S1.offset_add(1).unwrap();
const S3: Span = S2.offset_add(1).unwrap();
const S4: Span = S3.offset_add(1).unwrap();
const S5: Span = S4.offset_add(1).unwrap();
const S6: Span = S5.offset_add(1).unwrap();
// Some number (value does not matter).
const N: EleNameLen = 10;
#[test]
fn empty_element_no_attrs() {
#[derive(Debug, PartialEq, Eq)]
struct Foo;
impl Object for Foo {}
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo,
}
}
let toks = vec![
// Length (second argument) here is arbitrary.
XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
XirfToken::Close(None, CloseSpan::empty(S2), Depth(0)),
];
assert_eq!(
Ok(vec![
Parsed::Incomplete, // Open
Parsed::Object(Foo), // Close (store LA)
Parsed::Incomplete, // Close (take LA)
]),
Sut::parse(toks.into_iter()).collect(),
);
}
#[test]
fn empty_element_with_attr_bindings() {
#[derive(Debug, PartialEq, Eq)]
struct Foo(SymbolId, SymbolId, (Span, Span));
impl Object for Foo {}
ele_parse! {
type Object = Foo;
// In practice we wouldn't actually use Attr
// (we'd use an appropriate newtype),
// but for the sake of this test we'll keep things simple.
Sut := QN_PACKAGE {
@ {
name: (QN_NAME) => Attr,
value: (QN_VALUE) => Attr,
} => Foo(
name.value(),
value.value(),
(name.attr_span().value_span(), value.attr_span().value_span())
),
}
}
let name_val = "bar".into();
let value_val = "baz".into();
let toks = vec![
XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
// Purposefully out of order just to demonstrate that order does
// not matter.
XirfToken::Attr(Attr(QN_VALUE, value_val, AttrSpan(S2, S3))),
XirfToken::Attr(Attr(QN_NAME, name_val, AttrSpan(S4, S5))),
XirfToken::Close(None, CloseSpan::empty(S6), Depth(0)),
];
assert_eq!(
Ok(vec![
Parsed::Incomplete, // Open
Parsed::Incomplete, // Attr
Parsed::Incomplete, // Attr
Parsed::Object(Foo(name_val, value_val, (S5, S3))), // Close
Parsed::Incomplete, // Close (LA)
]),
Sut::parse(toks.into_iter()).collect(),
);
}
// An unexpected element produces an error for the offending token and
// then employs a recovery strategy so that parsing may continue.
#[test]
fn unexpected_element() {
ele_parse! {
type Object = ();
Sut := QN_PACKAGE {
// symbol soup
@ {} => (),
}
}
let unexpected = "unexpected".unwrap_into();
let span = OpenSpan(S1, 3);
// Note that the depth is >0 just to ensure that we don't
// hard-code some assumption that `0` means "root".
const DEPTH_ROOT: Depth = Depth(5);
const DEPTH_CHILD: Depth = Depth(6);
// Implied here is that we have valid XIRF.
// This means that,
// in the context of the larger real-world system
// (not these test cases),
// even as we discard tokens,
// XIRF is still doing its job before feeding them to us,
// meaning that we get XIRF's parsing even though we've chosen
// to ignore further input for this element.
// In other words---our
// decision to skip tokens does not skip the validations that XIRF
// performs,
// such as ensuring proper nesting.
let toks = vec![
// Any name besides `QN_PACKAGE`
XirfToken::Open(unexpected, span, DEPTH_ROOT),
// From this point on we are in a recovery state,
// and will not emit tokens
// (or further errors)
// for these inputs.
XirfToken::Attr(Attr(QN_VALUE, "ignored".into(), AttrSpan(S2, S3))),
XirfToken::Open(QN_NAME, OpenSpan(S4, N), DEPTH_CHILD),
// This ensures that closing at a different depth will not count
// as the closing node for recovery.
XirfToken::Close(None, CloseSpan::empty(S5), DEPTH_CHILD),
// This final token closes the element that caused the error,
// and so brings us into an accepting state.
XirfToken::Close(Some(unexpected), CloseSpan(S6, 3), DEPTH_ROOT),
];
let mut sut = Sut::parse(toks.into_iter());
// The first token of input is the unexpected element,
// and so should result an error.
// The referenced span should be the _name_ of the element,
// not the tag,
// since the error is referring not to the fact that an element
// was encountered
// (which was expected),
// but to the fact that the name was not the one expected.
assert_eq!(
// TODO: This references generated identifiers.
Some(Err(ParseError::StateError(SutError_::UnexpectedEle_(
unexpected,
span.name_span()
)))),
sut.next(),
);
// We should have now entered a recovery mode whereby we discard
// input until we close the element that introduced the error.
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Attr
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Open (C)
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Close (C)
// The recovery state must not be in an accepting state,
// because we didn't close at the root depth yet.
let (mut sut, _) =
sut.finalize().expect_err("recovery must not be accepting");
// The next token should close the element that is in error,
// and bring us into an accepting state.
// But since we are not emitting tokens,
// we'll still be marked as incomplete.
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // Close (R)
sut.finalize()
.expect("recovery must complete in an accepting state");
}
#[test]
fn single_child_element() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
RootAttr,
ChildAttr,
}
impl Object for Foo {}
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo::RootAttr,
Child,
}
Child := QN_CLASSIFY {
@ {} => Foo::ChildAttr,
}
}
let toks = vec![
XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
XirfToken::Open(QN_CLASSIFY, OpenSpan(S2, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S4, N), Depth(0)),
];
assert_eq!(
Ok(vec![
Parsed::Incomplete, // [Sut] Root Open
Parsed::Object(Foo::RootAttr), // [Sut@] Child Open (>LA)
Parsed::Incomplete, // [Child] Child Open (<LA)
Parsed::Object(Foo::ChildAttr), // [Child@] Child Close (>LA)
Parsed::Incomplete, // [Child] Child Close (<LA)
Parsed::Incomplete, // [Sut] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}
/// Expands off of [`single_child_element`],
/// but the former provides a clear indication of whether a single state
/// is properly recognized without having to worry about how nonterminals'
/// states transition to one-another in sequence.
#[test]
fn multiple_child_elements_sequential() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
Root,
ChildA,
ChildB,
}
impl Object for Foo {}
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo::Root,
// Order matters here.
ChildA,
ChildB,
}
ChildA := QN_CLASSIFY {
@ {} => Foo::ChildA,
}
ChildB := QN_EXPORT {
@ {} => Foo::ChildB,
}
}
let toks = vec![
XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
// ChildA
XirfToken::Open(QN_CLASSIFY, OpenSpan(S2, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// Child B
XirfToken::Open(QN_EXPORT, OpenSpan(S3, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S4), Depth(1)),
XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S5, N), Depth(0)),
];
assert_eq!(
Ok(vec![
Parsed::Incomplete, // [Sut] Root Open
Parsed::Object(Foo::Root), // [Sut@] ChildA Open (>LA)
Parsed::Incomplete, // [ChildA] ChildA Open (<LA)
Parsed::Object(Foo::ChildA), // [ChildA@] ChildA Close (>LA)
Parsed::Incomplete, // [ChildA] ChildA Close (<LA)
Parsed::Incomplete, // [ChildB] ChildB Open
Parsed::Object(Foo::ChildB), // [ChildB@] ChildB Close (>LA)
Parsed::Incomplete, // [ChildB] ChildB Close (<LA)
Parsed::Incomplete, // [Sut] Root Close
]),
Sut::parse(toks.into_iter()).collect(),
);
}
#[test]
fn child_error_and_recovery() {
#[derive(Debug, PartialEq, Eq)]
enum Foo {
Root,
ChildABad, // Will not yield this one.
ChildB,
}
impl Object for Foo {}
ele_parse! {
type Object = Foo;
Sut := QN_PACKAGE {
@ {} => Foo::Root,
// This is what we're expecting,
// but not what we will provide.
ChildA,
// But we _will_ provide this expected value,
// after error recovery ignores the above.
ChildB,
}
ChildA := QN_CLASSIFY {
@ {} => Foo::ChildABad,
}
ChildB := QN_EXPORT {
@ {} => Foo::ChildB,
}
}
let unexpected = "unexpected".unwrap_into();
let span = OpenSpan(S2, N);
let toks = vec![
// The first token is the expected root.
XirfToken::Open(QN_PACKAGE, OpenSpan(S1, N), Depth(0)),
// --> But this one is unexpected (name).
XirfToken::Open(unexpected, span, Depth(1)),
// And so we should ignore it up to this point.
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// At this point,
// having encountered the closing tag,
// the next token should result in a dead state,
// which should then result in a transition away from the state
// for `ChildA`,
// which means that we expect `ChildB`.
// Parsing continues normally.
XirfToken::Open(QN_EXPORT, OpenSpan(S4, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S4, N), Depth(0)),
];
let mut sut = Sut::parse(toks.into_iter());
// The first token is expected,
// and we enter attribute parsing for `Sut`.
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [Sut] Open 0
// The second token _will_ be unexpected,
// but we're parsing attributes for `Sut`,
// so we don't know that yet.
// Instead,
// the `Open` ends attribute parsing and yields a token of lookahead.
assert_eq!(
Some(Ok(Parsed::Object(Foo::Root))), // [Sut@] Open 1 (>LA)
sut.next()
);
// The token of lookahead (`Open`) is unexpected for `ChildA`,
// which must throw an error and enter a recovery state.
// The token should be consumed and returned in the error,
// _not_ produced as a token of lookahead,
// since we do not want to reprocess bad input.
assert_eq!(
// TODO: This references generated identifiers.
Some(Err(ParseError::StateError(SutError_::ChildA(
ChildAError_::UnexpectedEle_(unexpected, span.name_span())
)))),
sut.next(),
);
// The next token is the self-closing `Close` for the unexpected opening
// tag.
// Since we are in recovery,
// it should be ignored.
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // [ChildA!] Close 1
// Having recovered from the error,
// we should happily accept the remaining tokens starting with
// `ChildB`.
// An intelligent system ought to accept `ChildA` if it didn't produce
// any output for the erroneous input,
// but that's not what we're doing yet.
assert_eq!(
Ok(vec![
Parsed::Incomplete, // [ChildB] Open 1
Parsed::Object(Foo::ChildB), // [ChildB@] Close 1 (>LA)
Parsed::Incomplete, // [ChildB] Close 1 (<LA)
Parsed::Incomplete, // [Sut] Close 0
]),
sut.collect()
);
}

View File

@ -84,10 +84,12 @@ pub mod qname {
}
qname_const! {
QN_CLASSIFY: :L_CLASSIFY,
QN_DESC: :L_DESC,
QN_DIM: :L_DIM,
QN_DTYPE: :L_DTYPE,
QN_ELIG_CLASS_YIELDS: L_PREPROC:L_ELIG_CLASS_YIELDS,
QN_EXPORT: :L_EXPORT,
QN_EXTERN: :L_EXTERN,
QN_FRAGMENT: L_PREPROC:L_FRAGMENT,
QN_FRAGMENTS: L_PREPROC:L_FRAGMENTS,
@ -117,6 +119,7 @@ pub mod qname {
QN_TITLE: :L_TITLE,
QN_TYPE: :L_TYPE,
QN_UUROOTPATH: :L_UUROOTPATH,
QN_VALUE: :L_VALUE,
QN_VIRTUAL: :L_VIRTUAL,
QN_XMLNS: :L_XMLNS,
QN_XMLNS_L: L_XMLNS:L_L,