// XIR element parser generator // // Copyright (C) 2014-2022 Ryan Specialty Group, LLC. // // This file is part of TAME. // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . //! Element parser generator for parsing of [XIRF](super::super::flat). use arrayvec::ArrayVec; use std::fmt::Display; use crate::{ diagnostic_panic, fmt::{DisplayWrapper, TtQuote}, parse::{ ClosedParseState, Context, ParseState, Transition, TransitionResult, }, xir::{Prefix, QName}, }; #[cfg(doc)] use crate::{ele_parse, parse::Parser}; /// A parser accepting a single element. pub trait EleParseState: ParseState {} /// Element parser configuration. /// /// This configuration is set on a nonterminal reference using square /// brackets /// (e.g. `Foo[*]`). #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub struct EleParseCfg { /// Whether to allow zero-or-more repetition for this element. /// /// This is the Kleene star modifier (`*`). pub repeat: bool, } // This is an implementation detail for the internal state of EleParseState. impl From for () { fn from(_: EleParseCfg) -> Self { () } } /// Maximum level of nesting for source XML trees. /// /// Technically this is the maximum level of nesting for _parsing_ those /// trees, /// which may end up being less than this value. /// /// This should be set to something reasonable, /// but is not an alternative to coming up with code conventions that /// disallow ridiculous levels of nesting. /// TAME does have a lot of nesting with primitives, /// but that nesting is easily abstracted with templates. /// Templates may expand into ridiculous levels of nesting---this /// has no impact on the template expansion phase. /// /// Note that this is assuming that this parser is used only for TAME /// sources. /// If that's not the case, /// this can be made to be configurable like XIRF. pub const MAX_DEPTH: usize = 16; /// Parser stack for trampoline. /// /// This can be used as a call stack for parsers while avoiding creating /// otherwise-recursive data structures with composition-based delegation. /// However, /// it is more similar to CPS, /// in that the parser popped off the stack need not be the parser that /// initiated the request and merely represents the next step in /// a delayed computation. /// If such a return context is unneeded, /// a [`ParseState`] may implement tail calls by simply not pushing itself /// onto the stack before requesting transfer to another [`ParseState`]. #[derive(Debug, Default)] pub struct StateStack(ArrayVec); pub type StateStackContext = Context>; // Note that public visibility is needed because `ele_parse` expands outside // of this module. impl StateStack { /// Request a transfer to another [`ParseState`], /// expecting that control be returned to `ret` after it has /// completed. /// /// This can be reasoned about like calling a thunk: /// the return [`ParseState`] is put onto the stack, /// the target [`ParseState`] is used for the state transition to /// cause [`Parser`] to perform the call to it, /// and when it is done /// (e.g. a dead state), /// `ret` will be pop'd from the stack and we'll transition back to /// it. /// Note that this method is not responsible for returning; /// see [`Self::ret_or_dead`] to perform a return. /// /// However, /// the calling [`ParseState`] is not responsible for its return, /// unlike a typical function call. /// Instead, /// this _actually_ more closely resembles CPS /// (continuation passing style), /// and so [`ele_parse!`] must be careful to ensure that stack /// operations are properly paired. /// On the upside, /// if something is erroneously `ret`'d, /// the parser is guaranteed to be in a consistent state since the /// entire state has been reified /// (but the input would then be parsed incorrectly). /// /// Note that tail calls can be implemented by transferring control /// without pushing an entry on the stack to return to, /// but that hasn't been formalized \[yet\] and requires extra care. pub fn transfer_with_ret( &mut self, Transition(ret): Transition, target: TransitionResult, ) -> TransitionResult where SA: ParseState, ST: ParseState, { let Self(stack) = self; // TODO: Global configuration to (hopefully) ensure that XIRF will // actually catch this. if stack.is_full() { // TODO: We need some spans here and ideally convert the // parenthetical error message into a diagnostic footnote. // TODO: Or should we have a special error type that tells the // parent `Parser` to panic with context? diagnostic_panic!( vec![], "maximum parsing depth of {} exceeded while attempting \ to push return state {} \ (expected XIRF configuration to prevent this error)", MAX_DEPTH, TtQuote::wrap(ret), ); } stack.push(ret.into()); target } /// Attempt to return to a previous [`ParseState`] that transferred /// control away from itself, /// otherwise yield a dead state transition to `deadst`. /// /// Conceptually, /// this is like returning from a function call, /// where the function was invoked using [`Self::transfer_with_ret`]. /// However, /// this system is more akin to CPS /// (continuation passing style); /// see [`Self::transfer_with_ret`] for important information. /// /// If there is no state to return to on the stack, /// then it is assumed that we have received more input than expected /// after having completed a full parse. pub fn ret_or_dead( &mut self, lookahead: S::Token, deadst: S, ) -> TransitionResult { let Self(stack) = self; // This should certainly never happen unless there is a bug in the // `ele_parse!` parser-generator, // since it means that we're trying to return to a caller that // does not exist. match stack.pop() { Some(st) => Transition(st).incomplete().with_lookahead(lookahead), None => Transition(deadst).dead(lookahead), } } /// Test every [`ParseState`] on the stack against the predicate `f`. pub fn all(&self, f: impl Fn(&S) -> bool) -> bool { let Self(stack) = self; stack[..].iter().all(f) } } /// Match some type of node. #[derive(Debug, PartialEq, Eq)] pub enum NodeMatcher { /// Static [`QName`] with a simple equality check. QName(QName), /// Any element with a matching [`Prefix`]. Prefix(Prefix), } impl NodeMatcher { /// Match against the provided [`QName`]. pub fn matches(&self, qname: QName) -> bool { match self { Self::QName(qn_match) if qn_match == &qname => true, Self::Prefix(prefix) if Some(*prefix) == qname.prefix() => true, _ => false, } } } impl From for NodeMatcher { fn from(qname: QName) -> Self { Self::QName(qname) } } impl From for NodeMatcher { fn from(prefix: Prefix) -> Self { Self::Prefix(prefix) } } impl Display for NodeMatcher { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use crate::xir::fmt::XmlPrefixAnyLocal; match self { Self::QName(qname) => Display::fmt(qname, f), Self::Prefix(prefix) => XmlPrefixAnyLocal::fmt(prefix, f), } } } #[macro_export] macro_rules! ele_parse { ( $vis:vis enum $super:ident; // Attr has to be first to avoid ambiguity with `$rest`. $(type AttrValueError = $evty:ty;)? type Object = $objty:ty; $($rest:tt)* ) => { ele_parse! {@!next $vis $super $(type AttrValueError = $evty;)? type Object = $objty; $($rest)* } ele_parse!(@!super_sum <$objty> $vis $super $($rest)*); }; (@!next $vis:vis $super:ident // Attr has to be first to avoid ambiguity with `$rest`. $(type AttrValueError = $evty:ty;)? type Object = $objty:ty; $($rest:tt)* ) => { ele_parse!(@!nonterm_decl <$objty, $($evty)?> $vis $super $($rest)*); }; (@!nonterm_decl <$objty:ty, $($evty:ty)?> $vis:vis $super:ident $nt:ident := $($rest:tt)* ) => { ele_parse!(@!nonterm_def <$objty, $($evty)?> $vis $super $nt $($rest)*); }; (@!nonterm_def <$objty:ty, $($evty:ty)?> $vis:vis $super:ident $nt:ident $qname:ident $(($($ntp:tt)*))? { $($matches:tt)* }; $($rest:tt)* ) => { ele_parse!(@!ele_expand_body <$objty, $($evty)?> $vis $super $nt $qname ($($($ntp)*)?) $($matches)* ); ele_parse! {@!next $vis $super $(type AttrValueError = $evty;)? type Object = $objty; $($rest)* } }; (@!nonterm_def <$objty:ty, $($evty:ty)?> $vis:vis $super:ident $nt:ident ($ntref_first:ident $(| $ntref:ident)+); $($rest:tt)* ) => { ele_parse!(@!ele_dfn_sum <$objty> $vis $super $nt [$ntref_first $($ntref)*] ); ele_parse! {@!next $vis $super $(type AttrValueError = $evty;)? type Object = $objty; $($rest)* } }; (@!nonterm_decl <$objty:ty, $($evty:ty)?> $vis:vis $super:ident) => {}; // Expand the provided data to a more verbose form that provides the // context necessary for state transitions. (@!ele_expand_body <$objty:ty, $($evty:ty)?> $vis:vis $super:ident $nt:ident $qname:ident ($($ntp:tt)*) @ { $($attrbody:tt)* } => $attrmap:expr, $(/$(($close_span:ident))? => $closemap:expr,)? // Special forms (`[sp](args) => expr`). $( [$special:ident]$(($($special_arg:ident),*))? => $special_map:expr, )? // Nonterminal references are provided as a list. // A configuration specifier can be provided, // currently intended to support the Kleene star. $( $ntref:ident $([$ntref_cfg:tt])?, )* ) => { ele_parse! { @!ele_dfn_body <$objty, $($evty)?> $vis $super $nt $qname ($($ntp)*) @ { $($attrbody)* } => $attrmap, /$($($close_span)?)? => ele_parse!(@!ele_close $($closemap)?), $([$special]$(($($special_arg),*))? => $special_map,)? <> { $( $ntref, )* } // Generate state transitions of the form `(S) -> (S')`. -> { @ -> $( ($nt::$ntref, $ntref) [$($ntref_cfg)?], ($nt::$ntref) -> )* ($nt::ExpectClose_, ()) [], } } }; // No explicit Close mapping defaults to doing nothing at all // (so yield Incomplete). (@!ele_close) => { crate::parse::ParseStatus::Incomplete }; (@!ele_close $close:expr) => { crate::parse::ParseStatus::Object($close) }; // NT[*] modifier. (@!ntref_cfg *) => { crate::xir::parse::EleParseCfg { repeat: true, } }; // No bracketed modifier following NT. (@!ntref_cfg) => { crate::xir::parse::EleParseCfg { repeat: false, } }; // Delegation when the destination type is `()`, // indicating that the next state is not a child NT // (it is likely the state expecting a closing tag). (@!ntref_delegate $stack:ident, $ret:expr, (), $_target:expr, $done:expr ) => { $done }; // Delegate to a child parser by pushing self onto the stack and // yielding to one of the child's states. // This uses a trampoline, // which avoids recursive data structures // (due to `ParseState` composition/stitching) // and does not grow the call stack. (@!ntref_delegate $stack:ident, $ret:expr, $ntnext_st:ty, $target:expr, $_done:expr ) => { $stack.transfer_with_ret( Transition($ret), $target, ) }; (@!ele_dfn_body <$objty:ty, $($evty:ty)?> $vis:vis $super:ident $nt:ident $qname:ident ($($qname_matched:pat, $open_span:pat)?) // Attribute definition special form. @ { // We must lightly parse attributes here so that we can retrieve // the field identifiers that may be later used as bindings in // `$attrmap`. $( $(#[$fattr:meta])* $field:ident: ($($fmatch:tt)+) => $fty:ty, )* } => $attrmap:expr, // Close expression // (defaulting to Incomplete via @!ele_expand_body). /$($close_span:ident)? => $closemap:expr, // Non-whitespace text nodes can be mapped into elements with the // given QName as a preprocessing step, // allowing them to reuse the existing element NT system. $([text]($text:ident, $text_span:ident) => $text_map:expr,)? // Nonterminal references. <> { $( $ntref:ident, )* } -> { @ -> ($ntfirst:path, $ntfirst_st:ty) [$($ntfirst_cfg:tt)?], $( ($ntprev:path) -> ($ntnext:path, $ntnext_st:ty) [$($ntnext_cfg:tt)?], )* } ) => { paste::paste! { crate::attr_parse! { vis($vis); $(type ValueError = $evty;)? struct [<$nt AttrsState_>] -> [<$nt Attrs_>] { $( $(#[$fattr])* $field: ($($fmatch)+) => $fty, )* } } #[doc=concat!("Parser for element [`", stringify!($qname), "`].")] #[derive(Debug, PartialEq, Eq)] $vis enum $nt { #[doc=concat!( "Expecting opening tag for element [`", stringify!($qname), "`]." )] Expecting_(crate::xir::parse::EleParseCfg), /// Recovery state ignoring all remaining tokens for this /// element. RecoverEleIgnore_( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::xir::OpenSpan, crate::xir::flat::Depth ), // Recovery completed because end tag corresponding to the // invalid element has been found. RecoverEleIgnoreClosed_( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::xir::CloseSpan ), /// Recovery state ignoring all tokens when a `Close` is /// expected. /// /// This is token-agnostic---it /// may be a child element, /// but it may be text, /// for example. CloseRecoverIgnore_( ( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::span::Span, crate::xir::flat::Depth ), crate::span::Span ), /// Parsing element attributes. Attrs_( ( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::span::Span, crate::xir::flat::Depth ), [<$nt AttrsState_>] ), $( $ntref( ( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::span::Span, crate::xir::flat::Depth ), ), )* ExpectClose_( ( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::span::Span, crate::xir::flat::Depth ), ), /// Closing tag found and parsing of the element is /// complete. Closed_( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::span::Span ), } impl From for $nt { fn from(repeat: crate::xir::parse::EleParseCfg) -> Self { Self::Expecting_(repeat) } } impl crate::xir::parse::EleParseState for $nt {} impl $nt { /// Matcher describing the node recognized by this parser. #[allow(dead_code)] // used by sum parser fn matcher() -> crate::xir::parse::NodeMatcher { crate::xir::parse::NodeMatcher::from($qname) } /// Yield the expected depth of child elements, /// if known. #[allow(dead_code)] // used by text special form fn child_depth(&self) -> Option { match self { $ntfirst((_, _, _, depth)) => Some(depth.child_depth()), $( $ntnext((_, _, _, depth)) => Some(depth.child_depth()), )* _ => None, } } } impl std::fmt::Display for $nt { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use crate::{ fmt::{DisplayWrapper, TtQuote}, xir::fmt::{TtOpenXmlEle, TtCloseXmlEle}, }; match self { Self::Expecting_(_) => write!( f, "expecting opening tag {}", TtOpenXmlEle::wrap(Self::matcher()), ), Self::RecoverEleIgnore_(_, name, _, _) | Self::RecoverEleIgnoreClosed_(_, name, _) => write!( f, "attempting to recover by ignoring element \ with unexpected name {given} \ (expected {expected})", given = TtQuote::wrap(name), expected = TtQuote::wrap(Self::matcher()), ), Self::CloseRecoverIgnore_((_, qname, _, depth), _) => write!( f, "attempting to recover by ignoring input \ until the expected end tag {expected} \ at depth {depth}", expected = TtCloseXmlEle::wrap(qname), ), Self::Attrs_(_, sa) => std::fmt::Display::fmt(sa, f), Self::ExpectClose_((_, qname, _, depth)) => write!( f, "expecting closing element {} at depth {depth}", TtCloseXmlEle::wrap(qname) ), Self::Closed_(_, qname, _) => write!( f, "done parsing element {}", TtQuote::wrap(qname), ), $( // TODO: A better description. Self::$ntref(_) => { write!( f, "preparing to transition to \ parser for next child element(s)" ) }, )* } } } #[derive(Debug, PartialEq)] $vis enum [<$nt Error_>] { /// An element was expected, /// but the name of the element was unexpected. UnexpectedEle_(crate::xir::QName, crate::span::Span), /// Unexpected input while expecting an end tag for this /// element. /// /// The span corresponds to the opening tag. CloseExpected_( crate::xir::QName, crate::span::Span, crate::xir::flat::XirfToken, ), Attrs_(crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>), } impl From]>> for [<$nt Error_>] { fn from( e: crate::xir::parse::AttrParseError<[<$nt AttrsState_>]> ) -> Self { [<$nt Error_>]::Attrs_(e) } } impl std::error::Error for [<$nt Error_>] { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { // TODO None } } impl std::fmt::Display for [<$nt Error_>] { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use crate::{ fmt::{DisplayWrapper, TtQuote}, xir::fmt::{TtOpenXmlEle, TtCloseXmlEle}, }; match self { Self::UnexpectedEle_(name, _) => write!( f, "unexpected {unexpected} (expecting {expected})", unexpected = TtOpenXmlEle::wrap(name), expected = TtOpenXmlEle::wrap($nt::matcher()), ), Self::CloseExpected_(qname, _, tok) => write!( f, "expected {}, but found {}", TtCloseXmlEle::wrap(qname), TtQuote::wrap(tok) ), Self::Attrs_(e) => std::fmt::Display::fmt(e, f), } } } impl crate::diagnose::Diagnostic for [<$nt Error_>] { fn describe(&self) -> Vec { use crate::{ diagnose::Annotate, fmt::{DisplayWrapper, TtQuote}, parse::Token, xir::fmt::{TtCloseXmlEle}, }; match self { Self::UnexpectedEle_(_, ospan) => ospan.error( format!( "expected {ele_name} here", ele_name = TtQuote::wrap($nt::matcher()) ) ).into(), Self::CloseExpected_(qname, span, tok) => vec![ span.note("element starts here"), tok.span().error(format!( "expected {}", TtCloseXmlEle::wrap(qname), )), ], Self::Attrs_(e) => e.describe(), } } } impl crate::parse::ParseState for $nt { type Token = crate::xir::flat::XirfToken< crate::xir::flat::RefinedText >; type Object = $objty; type Error = [<$nt Error_>]; type Context = crate::xir::parse::StateStackContext; type Super = $super; fn parse_token( self, tok: Self::Token, #[allow(unused_variables)] // used only if child NTs stack: &mut Self::Context, ) -> crate::parse::TransitionResult { use crate::{ parse::{EmptyContext, Transition, Transitionable}, xir::{ EleSpan, flat::XirfToken, parse::parse_attrs, }, }; // Used only by _some_ expansions. #[allow(unused_imports)] use crate::xir::flat::Text; use $nt::{ Attrs_, Expecting_, RecoverEleIgnore_, CloseRecoverIgnore_, RecoverEleIgnoreClosed_, ExpectClose_, Closed_ }; match (self, tok) { ( Expecting_(cfg), XirfToken::Open(qname, span, depth) ) if $nt::matcher().matches(qname) => { Transition(Attrs_( (cfg, qname, span.tag_span(), depth), parse_attrs(qname, span) )).incomplete() }, ( Closed_(cfg, ..), XirfToken::Open(qname, span, depth) ) if cfg.repeat && Self::matcher().matches(qname) => { Transition(Attrs_( (cfg, qname, span.tag_span(), depth), parse_attrs(qname, span) )).incomplete() }, ( Expecting_(cfg), XirfToken::Open(qname, span, depth) ) => { Transition(RecoverEleIgnore_(cfg, qname, span, depth)).err( [<$nt Error_>]::UnexpectedEle_(qname, span.name_span()) ) }, ( RecoverEleIgnore_(cfg, qname, _, depth_open), XirfToken::Close(_, span, depth_close) ) if depth_open == depth_close => { Transition( RecoverEleIgnoreClosed_(cfg, qname, span) ).incomplete() }, (Attrs_(meta @ (_, qname, _, _), sa), tok) => { sa.delegate_until_obj::( tok, EmptyContext, |sa| Transition(Attrs_(meta, sa)), || unreachable!("see ParseState::delegate_until_obj dead"), |#[allow(unused_variables)] sa, attrs| { let obj = match attrs { // Attribute field bindings for `$attrmap` [<$nt Attrs_>] { $( $field, )* } => { // Optional `OpenSpan` binding let _ = qname; // avoid unused warning $( use crate::xir::parse::attr::AttrParseState; let $qname_matched = qname; let $open_span = sa.element_span(); )? $attrmap }, }; // Lookahead is added by `delegate_until_obj`. ele_parse!(@!ntref_delegate stack, $ntfirst(meta), $ntfirst_st, Transition( Into::<$ntfirst_st>::into( ele_parse!(@!ntref_cfg $($ntfirst_cfg)?) ) ).ok(obj), Transition($ntfirst(meta)).ok(obj) ) } ) }, $( ($ntprev(meta), tok) => { ele_parse!(@!ntref_delegate stack, $ntnext(meta), $ntnext_st, Transition( Into::<$ntnext_st>::into( ele_parse!(@!ntref_cfg $($ntnext_cfg)?) ) ).incomplete().with_lookahead(tok), Transition($ntnext(meta)).incomplete().with_lookahead(tok) ) }, )* // XIRF ensures proper nesting, // so we do not need to check the element name. ( ExpectClose_((cfg, qname, _, depth)) | CloseRecoverIgnore_((cfg, qname, _, depth), _), XirfToken::Close(_, span, tok_depth) ) if tok_depth == depth => { $( let $close_span = span; )? $closemap.transition(Closed_(cfg, qname, span.tag_span())) }, (ExpectClose_(meta @ (_, qname, otspan, _)), unexpected_tok) => { use crate::parse::Token; Transition( CloseRecoverIgnore_(meta, unexpected_tok.span()) ).err([<$nt Error_>]::CloseExpected_(qname, otspan, unexpected_tok)) } // We're still in recovery, // so this token gets thrown out. (st @ (RecoverEleIgnore_(..) | CloseRecoverIgnore_(..)), _) => { Transition(st).incomplete() }, // TODO: Use `is_accepting` guard if we do not utilize // exhaustiveness check. (st @ (Closed_(..) | RecoverEleIgnoreClosed_(..)), tok) => { Transition(st).dead(tok) } todo => todo!("{todo:?}"), } } fn is_accepting(&self, _: &Self::Context) -> bool { matches!(*self, Self::Closed_(..) | Self::RecoverEleIgnoreClosed_(..)) } } } }; (@!ele_dfn_sum <$objty:ty> $vis:vis $super:ident $nt:ident [$($ntref:ident)*]) => { $( // Provide a (hopefully) helpful error that can be corrected // rather than any obscure errors that may follow from trying // to compose parsers that were not generated with this macro. assert_impl_all!($ntref: crate::xir::parse::EleParseState); )* paste::paste! { #[doc=concat!( "Parser expecting one of ", $("[`", stringify!($ntref), "`], ",)* "." )] #[derive(Debug, PartialEq, Eq)] $vis enum $nt { Expecting_(crate::xir::parse::EleParseCfg), /// Recovery state ignoring all remaining tokens for this /// element. RecoverEleIgnore_( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::xir::OpenSpan, crate::xir::flat::Depth, ), RecoverEleIgnoreClosed_( crate::xir::parse::EleParseCfg, crate::xir::QName, crate::xir::CloseSpan ), /// Inner element has been parsed and is dead; /// this indicates that this parser is also dead. Done_, } impl std::fmt::Display for $nt { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use crate::{ fmt::{DisplayWrapper, ListDisplayWrapper, TtQuote}, xir::fmt::EleSumList, }; let ntrefs = [ $( $ntref::matcher(), )* ]; let expected = EleSumList::wrap(&ntrefs); match self { Self::Expecting_(_) => { write!(f, "expecting {expected}") }, Self::RecoverEleIgnore_(_, name, _, _) | Self::RecoverEleIgnoreClosed_(_, name, _) => write!( f, "attempting to recover by ignoring element \ with unexpected name {given} \ (expected {expected})", given = TtQuote::wrap(name), ), Self::Done_ => write!(f, "done parsing {expected}"), } } } impl From for $nt { fn from(repeat: crate::xir::parse::EleParseCfg) -> Self { Self::Expecting_(repeat) } } #[derive(Debug, PartialEq)] $vis enum [<$nt Error_>] { UnexpectedEle_(crate::xir::QName, crate::span::Span), } impl std::error::Error for [<$nt Error_>] { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { // TODO None } } impl std::fmt::Display for [<$nt Error_>] { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use crate::{ fmt::DisplayWrapper, xir::fmt::TtOpenXmlEle, }; match self { Self::UnexpectedEle_(qname, _) => { write!(f, "unexpected {}", TtOpenXmlEle::wrap(qname)) }, } } } impl crate::diagnose::Diagnostic for [<$nt Error_>] { fn describe(&self) -> Vec { use crate::{ diagnose::Annotate, fmt::{DisplayWrapper, ListDisplayWrapper, TtQuote}, xir::fmt::EleSumList, }; let ntrefs = [ $( $ntref::matcher(), )* ]; let expected = EleSumList::wrap(&ntrefs); match self { Self::UnexpectedEle_(qname, span) => { span .error(format!( "element {name} cannot appear here", name = TtQuote::wrap(qname), )) .with_help(format!("expecting {expected}")) .into() }, } } } impl crate::parse::ParseState for $nt { type Token = crate::xir::flat::XirfToken< crate::xir::flat::RefinedText >; type Object = $objty; type Error = [<$nt Error_>]; type Context = crate::xir::parse::StateStackContext; type Super = $super; fn parse_token( self, tok: Self::Token, stack: &mut Self::Context, ) -> crate::parse::TransitionResult { use crate::{ parse::Transition, xir::{ flat::XirfToken, parse::EleParseCfg, }, }; use $nt::{ Expecting_, RecoverEleIgnore_, RecoverEleIgnoreClosed_, Done_ }; match (self, tok) { $( ( Expecting_(cfg), XirfToken::Open(qname, span, depth) ) if $ntref::matcher().matches(qname) => { ele_parse!(@!ntref_delegate stack, match cfg.repeat { true => Expecting_(cfg), false => Done_, }, $ntref, Transition( $ntref::from( EleParseCfg::default() ) ).incomplete().with_lookahead( XirfToken::Open(qname, span, depth) ), unreachable!("TODO: remove me (ntref_delegate done)") ) }, )* // An unexpected token when repeating ends // repetition and should not result in an error. (Expecting_(cfg), tok) if cfg.repeat => { Transition(Done_).dead(tok) } (Expecting_(cfg), XirfToken::Open(qname, span, depth)) => { use crate::xir::EleSpan; Transition(RecoverEleIgnore_(cfg, qname, span, depth)).err( // Use name span rather than full `OpenSpan` // since it's specifically the name that // was unexpected, // not the fact that it's an element. [<$nt Error_>]::UnexpectedEle_(qname, span.name_span()) ) }, // XIRF ensures that the closing tag matches the opening, // so we need only check depth. ( RecoverEleIgnore_(cfg, qname, _, depth_open), XirfToken::Close(_, span, depth_close) ) if depth_open == depth_close => { Transition(RecoverEleIgnoreClosed_(cfg, qname, span)).incomplete() }, (st @ RecoverEleIgnore_(..), _) => { Transition(st).incomplete() }, ( st @ (Done_ | RecoverEleIgnoreClosed_(..)), tok ) => Transition(st).dead(tok), todo => todo!("sum {todo:?}"), } } fn is_accepting(&self, _: &Self::Context) -> bool { match self { Self::RecoverEleIgnoreClosed_(..) | Self::Done_ => true, _ => false, } } } } }; // Generate superstate sum type. // // This is really annoying because we cannot read the output of another // macro, // and so we have to do our best to re-parse the body of the // original `ele_parse!` invocation without duplicating too much // logic, // and we have to do so in a way that we can aggregate all of // those data. (@!super_sum <$objty:ty> $vis:vis $super:ident $( // NT definition is always followed by `:=`. $nt:ident := // Identifier if an element NT. $($_i:ident)? // Parenthesis for a sum NT, // or possibly the span match for an element NT. // So: `:= QN_IDENT(span)` or `:= (A | B | C)`. $( ($($_p:tt)*) )? // Braces for an element NT body. $( {$($_b:tt)*} )? // Element and sum NT both conclude with a semicolon, // which we need to disambiguate the next `$nt`. ; )* ) => { paste::paste! { /// Superstate representing the union of all related parsers. /// /// This [`ParseState`] allows sub-parsers to independently /// the states associated with their own subgraph, /// and then yield a state transition directly to a state of /// another parser. /// This is conceptually like CPS (continuation passing style), /// where this [`ParseState`] acts as a trampoline. /// /// This [`ParseState`] is required for use with [`Parser`]; /// see [`ClosedParseState`] for more information. #[derive(Debug, PartialEq, Eq)] $vis enum $super { $( $nt($nt), )* } // Default parser is the first NT. impl Default for $super { fn default() -> Self { use $super::*; ele_parse!(@!ntfirst $($nt)*)( crate::xir::parse::EleParseCfg::default().into() ) } } $( impl From<$nt> for $super { fn from(st: $nt) -> Self { $super::$nt(st) } } )* impl std::fmt::Display for $super { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { $( Self::$nt(e) => std::fmt::Display::fmt(e, f), )* } } } /// Superstate error object representing the union of all /// related parsers' errors. #[derive(Debug, PartialEq)] $vis enum [<$super Error_>] { $( $nt([<$nt Error_>]), )* } $( impl From<[<$nt Error_>]> for [<$super Error_>] { fn from(e: [<$nt Error_>]) -> Self { [<$super Error_>]::$nt(e) } } )* impl std::error::Error for [<$super Error_>] { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { // TODO None } } impl std::fmt::Display for [<$super Error_>] { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { $( Self::$nt(e) => std::fmt::Display::fmt(e, f), )* } } } impl crate::diagnose::Diagnostic for [<$super Error_>] { fn describe(&self) -> Vec { match self { $( Self::$nt(e) => e.describe(), )* } } } impl crate::parse::ParseState for $super { type Token = crate::xir::flat::XirfToken< crate::xir::flat::RefinedText >; type Object = $objty; type Error = [<$super Error_>]; type Context = crate::xir::parse::StateStackContext; fn parse_token( self, tok: Self::Token, stack: &mut Self::Context, ) -> crate::parse::TransitionResult { use crate::{ parse::Transition, xir::flat::{XirfToken, RefinedText}, }; match (self, tok) { // Depth check is unnecessary since _all_ xir::parse // parsers // (at least at the time of writing) // ignore whitespace and comments, // so may as well return early. // TODO: I'm ignoring _all_ text for now to // proceed with development; fix. ( st, XirfToken::Text(RefinedText::Whitespace(..), _) | XirfToken::Text(RefinedText::Unrefined(..), _) // XXX | XirfToken::Comment(..) ) => { Transition(st).incomplete() } $( // Pass token directly to child until it reports // a dead state, // after which we return to the `ParseState` // atop of the stack. (Self::$nt(st), tok) => st.delegate_child( tok, stack, |deadst, tok, stack| { stack.ret_or_dead(tok, deadst) }, ), )* } } fn is_accepting(&self, stack: &Self::Context) -> bool { // This is short-circuiting, // starting at the _bottom_ of the stack and // moving upward. // The idea is that, // is we're still in the middle of parsing, // then it's almost certain that the [`ParseState`] on // the bottom of the stack will not be in an // accepting state, // and so we can stop checking early. // In most cases, // if we haven't hit EOF early, // the stack should be either empty or consist of only // the root state. // // After having considered the stack, // we can then consider the active `ParseState`. stack.all(|st| st.is_inner_accepting(stack)) && self.is_inner_accepting(stack) } } impl $super { /// Whether the inner (active child) [`ParseState`] is in an /// accepting state. fn is_inner_accepting( &self, ctx: &::Context ) -> bool { use crate::parse::ParseState; match self { $( Self::$nt(st) => st.is_accepting(ctx), )* } } } } }; (@!ntfirst $ntfirst:ident $($nt:ident)*) => { $ntfirst } } #[cfg(test)] mod test;