1646 lines
66 KiB
Rust
1646 lines
66 KiB
Rust
// XIR element parser generator
|
||
//
|
||
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
|
||
//
|
||
// This file is part of TAME.
|
||
//
|
||
// This program is free software: you can redistribute it and/or modify
|
||
// it under the terms of the GNU General Public License as published by
|
||
// the Free Software Foundation, either version 3 of the License, or
|
||
// (at your option) any later version.
|
||
//
|
||
// This program is distributed in the hope that it will be useful,
|
||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
// GNU General Public License for more details.
|
||
//
|
||
// You should have received a copy of the GNU General Public License
|
||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
//! Element parser generator for parsing of [XIRF](super::super::flat).
|
||
|
||
use arrayvec::ArrayVec;
|
||
use std::fmt::Display;
|
||
|
||
use crate::{
|
||
diagnostic_panic,
|
||
fmt::{DisplayWrapper, TtQuote},
|
||
parse::{
|
||
ClosedParseState, Context, ParseState, Transition, TransitionResult,
|
||
},
|
||
xir::{Prefix, QName},
|
||
};
|
||
|
||
#[cfg(doc)]
|
||
use crate::{ele_parse, parse::Parser};
|
||
|
||
/// A parser accepting a single element.
|
||
pub trait EleParseState: ParseState {}
|
||
|
||
/// Maximum level of parser nesting.
|
||
///
|
||
/// Unfortunately,
|
||
/// this limit _does not_ correspond to the level of XML nesting;
|
||
/// parsers composed of Sum NTs,
|
||
/// in particular,
|
||
/// push multiple parsers onto the stack for a single element.
|
||
///
|
||
/// Note that this is assuming that this parser is used only for TAME
|
||
/// sources.
|
||
/// If that's not the case,
|
||
/// this can be made to be configurable like XIRF.
|
||
pub const MAX_DEPTH: usize = 64;
|
||
|
||
/// Parser stack for trampoline.
|
||
///
|
||
/// This can be used as a call stack for parsers while avoiding creating
|
||
/// otherwise-recursive data structures with composition-based delegation.
|
||
/// However,
|
||
/// it is more similar to CPS,
|
||
/// in that the parser popped off the stack need not be the parser that
|
||
/// initiated the request and merely represents the next step in
|
||
/// a delayed computation.
|
||
/// If such a return context is unneeded,
|
||
/// a [`ParseState`] may implement tail calls by simply not pushing itself
|
||
/// onto the stack before requesting transfer to another [`ParseState`].
|
||
#[derive(Debug, Default)]
|
||
pub struct StateStack<S: ClosedParseState>(ArrayVec<S, MAX_DEPTH>);
|
||
|
||
pub type StateStackContext<S> = Context<StateStack<S>>;
|
||
|
||
// Note that public visibility is needed because `ele_parse` expands outside
|
||
// of this module.
|
||
impl<S: ClosedParseState> StateStack<S> {
|
||
/// Request a transfer to another [`ParseState`],
|
||
/// expecting that control be returned to `ret` after it has
|
||
/// completed.
|
||
///
|
||
/// This can be reasoned about like calling a thunk:
|
||
/// the return [`ParseState`] is put onto the stack,
|
||
/// the target [`ParseState`] is used for the state transition to
|
||
/// cause [`Parser`] to perform the call to it,
|
||
/// and when it is done
|
||
/// (e.g. a dead state),
|
||
/// `ret` will be pop'd from the stack and we'll transition back to
|
||
/// it.
|
||
/// Note that this method is not responsible for returning;
|
||
/// see [`Self::ret_or_dead`] to perform a return.
|
||
///
|
||
/// However,
|
||
/// the calling [`ParseState`] is not responsible for its return,
|
||
/// unlike a typical function call.
|
||
/// Instead,
|
||
/// this _actually_ more closely resembles CPS
|
||
/// (continuation passing style),
|
||
/// and so [`ele_parse!`] must be careful to ensure that stack
|
||
/// operations are properly paired.
|
||
/// On the upside,
|
||
/// if something is erroneously `ret`'d,
|
||
/// the parser is guaranteed to be in a consistent state since the
|
||
/// entire state has been reified
|
||
/// (but the input would then be parsed incorrectly).
|
||
///
|
||
/// Note that tail calls can be implemented by transferring control
|
||
/// without pushing an entry on the stack to return to,
|
||
/// but that hasn't been formalized \[yet\] and requires extra care.
|
||
pub fn transfer_with_ret<SA, ST>(
|
||
&mut self,
|
||
Transition(ret): Transition<SA>,
|
||
target: TransitionResult<ST>,
|
||
) -> TransitionResult<ST>
|
||
where
|
||
SA: ParseState<Super = S::Super>,
|
||
ST: ParseState,
|
||
{
|
||
let Self(stack) = self;
|
||
|
||
// TODO: Global configuration to (hopefully) ensure that XIRF will
|
||
// actually catch this.
|
||
if stack.is_full() {
|
||
// TODO: We need some spans here and ideally convert the
|
||
// parenthetical error message into a diagnostic footnote.
|
||
// TODO: Or should we have a special error type that tells the
|
||
// parent `Parser` to panic with context?
|
||
diagnostic_panic!(
|
||
vec![],
|
||
"maximum parsing depth of {} exceeded while attempting \
|
||
to push return state {} \
|
||
(try reducing XML nesting as a workaround)",
|
||
MAX_DEPTH,
|
||
TtQuote::wrap(ret),
|
||
);
|
||
}
|
||
|
||
stack.push(ret.into());
|
||
target
|
||
}
|
||
|
||
/// Attempt to return to a previous [`ParseState`] that transferred
|
||
/// control away from itself,
|
||
/// otherwise yield a dead state transition to `deadst`.
|
||
///
|
||
/// Conceptually,
|
||
/// this is like returning from a function call,
|
||
/// where the function was invoked using [`Self::transfer_with_ret`].
|
||
/// However,
|
||
/// this system is more akin to CPS
|
||
/// (continuation passing style);
|
||
/// see [`Self::transfer_with_ret`] for important information.
|
||
///
|
||
/// If there is no state to return to on the stack,
|
||
/// then it is assumed that we have received more input than expected
|
||
/// after having completed a full parse.
|
||
pub fn ret_or_dead(
|
||
&mut self,
|
||
lookahead: S::Token,
|
||
deadst: S,
|
||
) -> TransitionResult<S> {
|
||
let Self(stack) = self;
|
||
|
||
// This should certainly never happen unless there is a bug in the
|
||
// `ele_parse!` parser-generator,
|
||
// since it means that we're trying to return to a caller that
|
||
// does not exist.
|
||
match stack.pop() {
|
||
Some(st) => Transition(st).incomplete().with_lookahead(lookahead),
|
||
None => Transition(deadst).dead(lookahead),
|
||
}
|
||
}
|
||
|
||
/// Test every [`ParseState`] on the stack against the predicate `f`.
|
||
pub fn all(&self, f: impl Fn(&S) -> bool) -> bool {
|
||
let Self(stack) = self;
|
||
stack[..].iter().all(f)
|
||
}
|
||
}
|
||
|
||
/// Match some type of node.
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
pub enum NodeMatcher {
|
||
/// Static [`QName`] with a simple equality check.
|
||
QName(QName),
|
||
/// Any element with a matching [`Prefix`].
|
||
Prefix(Prefix),
|
||
}
|
||
|
||
impl NodeMatcher {
|
||
/// Match against the provided [`QName`].
|
||
pub fn matches(&self, qname: QName) -> bool {
|
||
match self {
|
||
Self::QName(qn_match) if qn_match == &qname => true,
|
||
Self::Prefix(prefix) if Some(*prefix) == qname.prefix() => true,
|
||
_ => false,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl From<QName> for NodeMatcher {
|
||
fn from(qname: QName) -> Self {
|
||
Self::QName(qname)
|
||
}
|
||
}
|
||
|
||
impl From<Prefix> for NodeMatcher {
|
||
fn from(prefix: Prefix) -> Self {
|
||
Self::Prefix(prefix)
|
||
}
|
||
}
|
||
|
||
impl Display for NodeMatcher {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||
use crate::xir::fmt::XmlPrefixAnyLocal;
|
||
|
||
match self {
|
||
Self::QName(qname) => Display::fmt(qname, f),
|
||
Self::Prefix(prefix) => XmlPrefixAnyLocal::fmt(prefix, f),
|
||
}
|
||
}
|
||
}
|
||
|
||
#[macro_export]
|
||
macro_rules! ele_parse {
|
||
(
|
||
$vis:vis enum $super:ident;
|
||
|
||
// Attr has to be first to avoid ambiguity with `$rest`.
|
||
$(type AttrValueError = $evty:ty;)?
|
||
type Object = $objty:ty;
|
||
|
||
$(
|
||
[super] {
|
||
$($super_body:tt)*
|
||
};
|
||
)?
|
||
|
||
// Combination of square brackets above and the prefix here are
|
||
// needed for disambiguation.
|
||
$nt_first:ident := $($nt_defs:tt)*
|
||
) => {
|
||
ele_parse! {@!next $vis $super
|
||
$(type AttrValueError = $evty;)?
|
||
type Object = $objty;
|
||
$nt_first := $($nt_defs)*
|
||
}
|
||
|
||
ele_parse!(@!super_sum <$objty> $vis $super
|
||
$([super] { $($super_body)* })?
|
||
$nt_first := $($nt_defs)*
|
||
);
|
||
};
|
||
|
||
(@!next $vis:vis $super:ident
|
||
// Attr has to be first to avoid ambiguity with `$rest`.
|
||
$(type AttrValueError = $evty:ty;)?
|
||
type Object = $objty:ty;
|
||
|
||
$($rest:tt)*
|
||
) => {
|
||
ele_parse!(@!nonterm_decl <$objty, $($evty)?> $vis $super $($rest)*);
|
||
};
|
||
|
||
(@!nonterm_decl <$objty:ty, $($evty:ty)?>
|
||
$vis:vis $super:ident $nt:ident := $($rest:tt)*
|
||
) => {
|
||
ele_parse!(@!nonterm_def <$objty, $($evty)?> $vis $super $nt $($rest)*);
|
||
};
|
||
|
||
(@!nonterm_def <$objty:ty, $($evty:ty)?>
|
||
$vis:vis $super:ident $nt:ident $qname:ident $(($($ntp:tt)*))?
|
||
{ $($matches:tt)* }; $($rest:tt)*
|
||
) => {
|
||
ele_parse!(@!ele_expand_body <$objty, $($evty)?>
|
||
$vis $super $nt $qname ($($($ntp)*)?) $($matches)*
|
||
);
|
||
|
||
ele_parse! {@!next $vis $super
|
||
$(type AttrValueError = $evty;)?
|
||
type Object = $objty;
|
||
$($rest)*
|
||
}
|
||
};
|
||
|
||
(@!nonterm_def <$objty:ty, $($evty:ty)?>
|
||
$vis:vis $super:ident $nt:ident
|
||
($ntref_first:ident $(| $ntref:ident)+); $($rest:tt)*
|
||
) => {
|
||
ele_parse!(@!ele_dfn_sum <$objty>
|
||
$vis $super $nt [$ntref_first $($ntref)*]
|
||
);
|
||
|
||
ele_parse! {@!next $vis $super
|
||
$(type AttrValueError = $evty;)?
|
||
type Object = $objty;
|
||
$($rest)*
|
||
}
|
||
};
|
||
|
||
(@!nonterm_decl <$objty:ty, $($evty:ty)?> $vis:vis $super:ident) => {};
|
||
|
||
// Expand the provided data to a more verbose form that provides the
|
||
// context necessary for state transitions.
|
||
(@!ele_expand_body <$objty:ty, $($evty:ty)?>
|
||
$vis:vis $super:ident $nt:ident $qname:ident ($($ntp:tt)*)
|
||
|
||
@ { $($attrbody:tt)* } => $attrmap:expr,
|
||
$(/$(($close_span:ident))? => $closemap:expr,)?
|
||
|
||
// Special forms (`[sp](args) => expr`).
|
||
$(
|
||
[$special:ident]$(($($special_arg:ident),*))?
|
||
=> $special_map:expr,
|
||
)?
|
||
|
||
// Nonterminal references are provided as a list.
|
||
// A configuration specifier can be provided,
|
||
// currently intended to support the Kleene star.
|
||
$(
|
||
$ntref:ident,
|
||
)*
|
||
) => {
|
||
ele_parse! {
|
||
@!ele_dfn_body <$objty, $($evty)?> $vis $super $nt $qname ($($ntp)*)
|
||
@ { $($attrbody)* } => $attrmap,
|
||
/$($($close_span)?)? => ele_parse!(@!ele_close $($closemap)?),
|
||
|
||
$([$special]$(($($special_arg),*))? => $special_map,)?
|
||
|
||
<> {
|
||
$(
|
||
$ntref,
|
||
)*
|
||
}
|
||
|
||
// Generate state transitions of the form `(S) -> (S')`.
|
||
-> {
|
||
@ ->
|
||
$(
|
||
($nt::$ntref, $ntref),
|
||
($nt::$ntref) ->
|
||
)* ($nt::ExpectClose_, ()),
|
||
}
|
||
}
|
||
};
|
||
|
||
// No explicit Close mapping defaults to doing nothing at all
|
||
// (so yield Incomplete).
|
||
(@!ele_close) => {
|
||
crate::parse::ParseStatus::Incomplete
|
||
};
|
||
|
||
(@!ele_close $close:expr) => {
|
||
crate::parse::ParseStatus::Object($close)
|
||
};
|
||
|
||
// Delegation when the destination type is `()`,
|
||
// indicating that the next state is not a child NT
|
||
// (it is likely the state expecting a closing tag).
|
||
(@!ntref_delegate
|
||
$stack:ident, $ret:expr, (), $_target:expr, $done:expr
|
||
) => {
|
||
$done
|
||
};
|
||
|
||
// Delegate to a child parser by pushing self onto the stack and
|
||
// yielding to one of the child's states.
|
||
// This uses a trampoline,
|
||
// which avoids recursive data structures
|
||
// (due to `ParseState` composition/stitching)
|
||
// and does not grow the call stack.
|
||
(@!ntref_delegate
|
||
$stack:ident, $ret:expr, $ntnext_st:ty, $target:expr, $_done:expr
|
||
) => {
|
||
$stack.transfer_with_ret(
|
||
Transition($ret),
|
||
$target,
|
||
)
|
||
};
|
||
|
||
(@!ele_dfn_body <$objty:ty, $($evty:ty)?>
|
||
$vis:vis $super:ident $nt:ident $qname:ident
|
||
($($qname_matched:pat, $open_span:pat)?)
|
||
|
||
// Attribute definition special form.
|
||
@ {
|
||
// We must lightly parse attributes here so that we can retrieve
|
||
// the field identifiers that may be later used as bindings in
|
||
// `$attrmap`.
|
||
$(
|
||
$(#[$fattr:meta])*
|
||
$field:ident: ($($fmatch:tt)+) => $fty:ty,
|
||
)*
|
||
} => $attrmap:expr,
|
||
|
||
// Close expression
|
||
// (defaulting to Incomplete via @!ele_expand_body).
|
||
/$($close_span:ident)? => $closemap:expr,
|
||
|
||
// Streaming (as opposed to aggregate) attribute parsing.
|
||
$([attr]($attr_stream_binding:ident) => $attr_stream_map:expr,)?
|
||
|
||
// Nonterminal references.
|
||
<> {
|
||
$(
|
||
$ntref:ident,
|
||
)*
|
||
}
|
||
|
||
-> {
|
||
@ -> ($ntfirst:path, $ntfirst_st:ty),
|
||
$(
|
||
($ntprev:path) -> ($ntnext:path, $ntnext_st:ty),
|
||
)*
|
||
}
|
||
) => {
|
||
paste::paste! {
|
||
crate::attr_parse! {
|
||
vis($vis);
|
||
$(type ValueError = $evty;)?
|
||
|
||
struct [<$nt AttrsState_>] -> [<$nt Attrs_>] {
|
||
$(
|
||
$(#[$fattr])*
|
||
$field: ($($fmatch)+) => $fty,
|
||
)*
|
||
}
|
||
}
|
||
|
||
#[doc=concat!("Parser for element [`", stringify!($qname), "`].")]
|
||
#[derive(Debug, PartialEq, Eq, Default)]
|
||
$vis enum $nt {
|
||
#[doc=concat!(
|
||
"Expecting opening tag for element [`",
|
||
stringify!($qname),
|
||
"`]."
|
||
)]
|
||
#[default]
|
||
Expecting_,
|
||
/// Non-preemptable [`Self::Expecting_`].
|
||
#[allow(dead_code)] // used by sum parser
|
||
NonPreemptableExpecting_,
|
||
/// Recovery state ignoring all remaining tokens for this
|
||
/// element.
|
||
RecoverEleIgnore_(
|
||
crate::xir::QName,
|
||
crate::xir::OpenSpan,
|
||
crate::xir::flat::Depth
|
||
),
|
||
// Recovery completed because end tag corresponding to the
|
||
// invalid element has been found.
|
||
RecoverEleIgnoreClosed_(
|
||
crate::xir::QName,
|
||
crate::xir::CloseSpan
|
||
),
|
||
/// Recovery state ignoring all tokens when a `Close` is
|
||
/// expected.
|
||
///
|
||
/// This is token-agnostic---it
|
||
/// may be a child element,
|
||
/// but it may be text,
|
||
/// for example.
|
||
CloseRecoverIgnore_(
|
||
(
|
||
crate::xir::QName,
|
||
crate::xir::OpenSpan,
|
||
crate::xir::flat::Depth
|
||
),
|
||
crate::span::Span
|
||
),
|
||
/// Parsing element attributes.
|
||
Attrs_(
|
||
(
|
||
crate::xir::QName,
|
||
crate::xir::OpenSpan,
|
||
crate::xir::flat::Depth
|
||
),
|
||
[<$nt AttrsState_>]
|
||
),
|
||
$(
|
||
$ntref(
|
||
(
|
||
crate::xir::QName,
|
||
crate::xir::OpenSpan,
|
||
crate::xir::flat::Depth
|
||
),
|
||
),
|
||
)*
|
||
ExpectClose_(
|
||
(
|
||
crate::xir::QName,
|
||
crate::xir::OpenSpan,
|
||
crate::xir::flat::Depth
|
||
),
|
||
),
|
||
/// Closing tag found and parsing of the element is
|
||
/// complete.
|
||
Closed_(
|
||
Option<crate::xir::QName>,
|
||
crate::span::Span
|
||
),
|
||
}
|
||
|
||
impl $nt {
|
||
/// Matcher describing the node recognized by this parser.
|
||
#[allow(dead_code)] // used by sum parser
|
||
#[inline]
|
||
fn matcher() -> crate::xir::parse::NodeMatcher {
|
||
crate::xir::parse::NodeMatcher::from($qname)
|
||
}
|
||
|
||
/// Whether the given QName would be matched by any of the
|
||
/// parsers associated with this type.
|
||
#[inline]
|
||
fn matches(qname: crate::xir::QName) -> bool {
|
||
Self::matcher().matches(qname)
|
||
}
|
||
|
||
/// Number of
|
||
/// [`NodeMatcher`](crate::xir::parse::NodeMatcher)s
|
||
/// considered by this parser.
|
||
///
|
||
/// This is always `1` for this parser.
|
||
#[allow(dead_code)] // used by Sum NTs
|
||
const fn matches_n() -> usize {
|
||
1
|
||
}
|
||
|
||
/// Format [`Self::matcher`] for display.
|
||
///
|
||
/// This value may be rendered singularly or as part of a
|
||
/// list of values joined together by Sum NTs.
|
||
/// This function receives the number of values to be
|
||
/// formatted as `n` and the current 0-indexed offset
|
||
/// within that list as `i`.
|
||
/// This allows for zero-copy rendering of composable NTs.
|
||
///
|
||
/// `i` must be incremented after the operation.
|
||
#[allow(dead_code)] // used by Sum NTs
|
||
fn fmt_matches(
|
||
n: usize,
|
||
i: &mut usize,
|
||
f: &mut std::fmt::Formatter
|
||
) -> std::fmt::Result {
|
||
use crate::{
|
||
fmt::ListDisplayWrapper,
|
||
xir::fmt::EleSumList,
|
||
};
|
||
|
||
EleSumList::fmt_nth(n, *i, &Self::matcher(), f)?;
|
||
*i += 1;
|
||
|
||
Ok(())
|
||
}
|
||
|
||
/// Whether the parser is in a state that can tolerate
|
||
/// superstate node preemption.
|
||
///
|
||
/// For more information,
|
||
/// see the superstate
|
||
#[doc=concat!(
|
||
" [`", stringify!($super), "::can_preempt_node`]."
|
||
)]
|
||
fn can_preempt_node(&self) -> bool {
|
||
use $nt::*;
|
||
|
||
match self {
|
||
// Preemption before the opening tag is safe,
|
||
// since we haven't started processing yet.
|
||
Expecting_ => true,
|
||
|
||
// The name says it all.
|
||
// Instantiated by the superstate.
|
||
NonPreemptableExpecting_ => false,
|
||
|
||
// Preemption during recovery would cause tokens to
|
||
// be parsed when they ought to be ignored,
|
||
// so we must process all tokens during recovery.
|
||
RecoverEleIgnore_(..)
|
||
| CloseRecoverIgnore_(..) => false,
|
||
|
||
// It is _not_ safe to preempt attribute parsing
|
||
// since attribute parsers aggregate until a
|
||
// non-attribute token is encountered;
|
||
// we must allow attribute parsing to finish its
|
||
// job _before_ any preempted nodes are emitted
|
||
// since the attributes came _before_ that node.
|
||
Attrs_(..) => false,
|
||
|
||
// These states represent jump states where we're
|
||
// about to transition to the next child parser.
|
||
// It's safe to preempt here,
|
||
// since we're not in the middle of parsing.
|
||
//
|
||
// Note that this includes `ExpectClose_` because of
|
||
// the macro preprocessing,
|
||
// and Rust's exhaustiveness check will ensure
|
||
// that it is accounted for if that changes.
|
||
// If we're expecting that the next token is a
|
||
// `Close`,
|
||
// then it must be safe to preempt other nodes
|
||
// that may appear in this context as children.
|
||
$ntfirst(..) => true,
|
||
$(
|
||
$ntnext(..) => true,
|
||
)*
|
||
|
||
// If we're done,
|
||
// we want to be able to yield a dead state so
|
||
// that we can transition away from this parser.
|
||
RecoverEleIgnoreClosed_(..)
|
||
| Closed_(..) => false,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl std::fmt::Display for $nt {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||
use crate::{
|
||
fmt::{DisplayWrapper, TtQuote},
|
||
xir::fmt::{TtOpenXmlEle, TtCloseXmlEle},
|
||
};
|
||
|
||
match self {
|
||
Self::Expecting_
|
||
| Self::NonPreemptableExpecting_ => write!(
|
||
f,
|
||
"expecting opening tag {}",
|
||
TtOpenXmlEle::wrap(Self::matcher()),
|
||
),
|
||
Self::RecoverEleIgnore_(name, _, _)
|
||
| Self::RecoverEleIgnoreClosed_(name, _) => write!(
|
||
f,
|
||
"attempting to recover by ignoring element \
|
||
with unexpected name {given} \
|
||
(expected {expected})",
|
||
given = TtQuote::wrap(name),
|
||
expected = TtQuote::wrap(Self::matcher()),
|
||
),
|
||
Self::CloseRecoverIgnore_((qname, _, depth), _) => write!(
|
||
f,
|
||
"attempting to recover by ignoring input \
|
||
until the expected end tag {expected} \
|
||
at depth {depth}",
|
||
expected = TtCloseXmlEle::wrap(qname),
|
||
),
|
||
|
||
Self::Attrs_(_, sa) => std::fmt::Display::fmt(sa, f),
|
||
Self::ExpectClose_((qname, _, depth)) => write!(
|
||
f,
|
||
"expecting closing element {} at depth {depth}",
|
||
TtCloseXmlEle::wrap(qname)
|
||
),
|
||
Self::Closed_(Some(qname), _) => write!(
|
||
f,
|
||
"done parsing element {}",
|
||
TtQuote::wrap(qname),
|
||
),
|
||
// Should only happen on an unexpected `Close`.
|
||
Self::Closed_(None, _) => write!(
|
||
f,
|
||
"skipped parsing element {}",
|
||
TtQuote::wrap(Self::matcher()),
|
||
),
|
||
$(
|
||
// TODO: A better description.
|
||
Self::$ntref(_) => {
|
||
write!(
|
||
f,
|
||
"preparing to transition to \
|
||
parser for next child element(s)"
|
||
)
|
||
},
|
||
)*
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, PartialEq)]
|
||
$vis enum [<$nt Error_>] {
|
||
/// An element was expected,
|
||
/// but the name of the element was unexpected.
|
||
UnexpectedEle(crate::xir::QName, crate::span::Span),
|
||
|
||
/// Unexpected input while expecting an end tag for this
|
||
/// element.
|
||
///
|
||
/// The span corresponds to the opening tag.
|
||
CloseExpected(
|
||
crate::xir::QName,
|
||
crate::xir::OpenSpan,
|
||
crate::xir::flat::XirfToken<crate::xir::flat::RefinedText>,
|
||
),
|
||
|
||
Attrs(crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>),
|
||
}
|
||
|
||
impl From<crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>>
|
||
for [<$nt Error_>]
|
||
{
|
||
fn from(
|
||
e: crate::xir::parse::AttrParseError<[<$nt AttrsState_>]>
|
||
) -> Self {
|
||
[<$nt Error_>]::Attrs(e)
|
||
}
|
||
}
|
||
|
||
impl std::error::Error for [<$nt Error_>] {
|
||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||
// TODO
|
||
None
|
||
}
|
||
}
|
||
|
||
impl std::fmt::Display for [<$nt Error_>] {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||
use crate::{
|
||
fmt::{DisplayWrapper, TtQuote},
|
||
xir::fmt::{TtOpenXmlEle, TtCloseXmlEle},
|
||
};
|
||
|
||
match self {
|
||
Self::UnexpectedEle(name, _) => write!(
|
||
f,
|
||
"unexpected {unexpected} (expecting {expected})",
|
||
unexpected = TtOpenXmlEle::wrap(name),
|
||
expected = TtOpenXmlEle::wrap($nt::matcher()),
|
||
),
|
||
|
||
Self::CloseExpected(qname, _, tok) => write!(
|
||
f,
|
||
"expected {}, but found {}",
|
||
TtCloseXmlEle::wrap(qname),
|
||
TtQuote::wrap(tok)
|
||
),
|
||
|
||
Self::Attrs(e) => std::fmt::Display::fmt(e, f),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl crate::diagnose::Diagnostic for [<$nt Error_>] {
|
||
fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
|
||
use crate::{
|
||
diagnose::Annotate,
|
||
fmt::{DisplayWrapper, TtQuote},
|
||
parse::Token,
|
||
xir::{
|
||
EleSpan,
|
||
fmt::TtCloseXmlEle,
|
||
},
|
||
};
|
||
|
||
match self {
|
||
Self::UnexpectedEle(_, ospan) => ospan.error(
|
||
format!(
|
||
"expected {ele_name} here",
|
||
ele_name = TtQuote::wrap($nt::matcher())
|
||
)
|
||
).into(),
|
||
|
||
Self::CloseExpected(qname, ospan, tok) => vec![
|
||
ospan.span().note("element starts here"),
|
||
tok.span().error(format!(
|
||
"expected {}",
|
||
TtCloseXmlEle::wrap(qname),
|
||
)),
|
||
],
|
||
|
||
Self::Attrs(e) => e.describe(),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl crate::parse::ParseState for $nt {
|
||
type Token = crate::xir::flat::XirfToken<
|
||
crate::xir::flat::RefinedText
|
||
>;
|
||
type Object = $objty;
|
||
type Error = [<$nt Error_>];
|
||
type Context = crate::xir::parse::StateStackContext<Self::Super>;
|
||
type Super = $super;
|
||
|
||
fn parse_token(
|
||
self,
|
||
tok: Self::Token,
|
||
#[allow(unused_variables)] // used only if child NTs
|
||
stack: &mut Self::Context,
|
||
) -> crate::parse::TransitionResult<Self::Super> {
|
||
use crate::{
|
||
parse::{EmptyContext, Transition, Transitionable},
|
||
xir::{
|
||
EleSpan,
|
||
flat::XirfToken,
|
||
parse::parse_attrs,
|
||
},
|
||
};
|
||
|
||
use $nt::{
|
||
Attrs_, Expecting_, NonPreemptableExpecting_,
|
||
RecoverEleIgnore_, CloseRecoverIgnore_,
|
||
RecoverEleIgnoreClosed_, ExpectClose_, Closed_
|
||
};
|
||
|
||
match (self, tok) {
|
||
(
|
||
Expecting_ | NonPreemptableExpecting_,
|
||
XirfToken::Open(qname, span, depth)
|
||
) if $nt::matches(qname) => {
|
||
let transition = Transition(Attrs_(
|
||
(qname, span, depth),
|
||
parse_attrs(qname, span)
|
||
));
|
||
|
||
// Streaming attribute parsing will cause the
|
||
// attribute map to be yielded immediately as
|
||
// the opening object,
|
||
// since we will not be aggregating attrs.
|
||
$(
|
||
// Used only to match on `[attr]`.
|
||
let [<_ $attr_stream_binding>] = ();
|
||
return transition.ok($attrmap);
|
||
)?
|
||
|
||
// If the `[attr]` special form was _not_
|
||
// provided,
|
||
// we'll be aggregating attributes.
|
||
#[allow(unreachable_code)]
|
||
transition.incomplete()
|
||
},
|
||
|
||
(
|
||
Closed_(..),
|
||
XirfToken::Open(qname, span, depth)
|
||
) if Self::matches(qname) => {
|
||
Transition(Attrs_(
|
||
(qname, span, depth),
|
||
parse_attrs(qname, span)
|
||
)).incomplete()
|
||
},
|
||
|
||
(
|
||
Expecting_ | NonPreemptableExpecting_,
|
||
XirfToken::Open(qname, span, depth)
|
||
) => {
|
||
Transition(RecoverEleIgnore_(qname, span, depth)).err(
|
||
[<$nt Error_>]::UnexpectedEle(qname, span.name_span())
|
||
)
|
||
},
|
||
|
||
(
|
||
RecoverEleIgnore_(qname, _, depth_open),
|
||
XirfToken::Close(_, span, depth_close)
|
||
) if depth_open == depth_close => {
|
||
Transition(
|
||
RecoverEleIgnoreClosed_(qname, span)
|
||
).incomplete()
|
||
},
|
||
|
||
// Streaming attribute matching takes precedence
|
||
// over aggregate.
|
||
// This is primarily me being lazy,
|
||
// because it's not worth a robust syntax for
|
||
// something that's rarely used
|
||
// (macro-wise, I mean;
|
||
// it's heavily utilized as a percentage of
|
||
// source file parsed since short-hand
|
||
// template applications are heavily used).
|
||
$(
|
||
(
|
||
st @ Attrs_(..),
|
||
XirfToken::Attr($attr_stream_binding),
|
||
) => Transition(st).ok($attr_stream_map),
|
||
|
||
// Override the aggregate attribute parser
|
||
// delegation by forcing the below match to
|
||
// become unreachable
|
||
// (xref anchor <<SATTR>>).
|
||
// Since we have already emitted the `$attrmap`
|
||
// object on `Open`,
|
||
// this yields an incomplete parse.
|
||
(Attrs_(meta, _), tok) => {
|
||
ele_parse!(@!ntref_delegate
|
||
stack,
|
||
$ntfirst(meta),
|
||
$ntfirst_st,
|
||
Transition($ntfirst_st::default())
|
||
.incomplete()
|
||
.with_lookahead(tok),
|
||
Transition($ntfirst(meta))
|
||
.incomplete()
|
||
.with_lookahead(tok)
|
||
)
|
||
}
|
||
)?
|
||
|
||
// This becomes unreachable when the `[attr]` special
|
||
// form is provided,
|
||
// which overrides this match directly above
|
||
// (xref <<SATTR>>).
|
||
#[allow(unreachable_patterns)]
|
||
(Attrs_(meta @ (qname, span, depth), sa), tok) => {
|
||
sa.delegate_until_obj::<Self, _>(
|
||
tok,
|
||
EmptyContext,
|
||
|sa| Transition(Attrs_(meta, sa)),
|
||
// If we enter a dead state then we have
|
||
// failed produce an attribute object,
|
||
// in which case we'll recover by
|
||
// ignoring the entire element.
|
||
|| Transition(RecoverEleIgnore_(qname, span, depth)),
|
||
|#[allow(unused_variables)] sa, attrs| {
|
||
let obj = match attrs {
|
||
// Attribute field bindings for `$attrmap`
|
||
[<$nt Attrs_>] {
|
||
$(
|
||
$field,
|
||
)*
|
||
} => {
|
||
// Optional `OpenSpan` binding
|
||
let _ = qname; // avoid unused warning
|
||
$(
|
||
use crate::xir::parse::attr::AttrParseState;
|
||
let $qname_matched = qname;
|
||
let $open_span = sa.element_span();
|
||
)?
|
||
|
||
$attrmap
|
||
},
|
||
};
|
||
|
||
// Lookahead is added by `delegate_until_obj`.
|
||
ele_parse!(@!ntref_delegate
|
||
stack,
|
||
$ntfirst(meta),
|
||
$ntfirst_st,
|
||
Transition(<$ntfirst_st>::default()).ok(obj),
|
||
Transition($ntfirst(meta)).ok(obj)
|
||
)
|
||
}
|
||
)
|
||
},
|
||
|
||
$(
|
||
($ntprev(meta), tok) => {
|
||
ele_parse!(@!ntref_delegate
|
||
stack,
|
||
$ntnext(meta),
|
||
$ntnext_st,
|
||
// Since we're just transitioning,
|
||
// this _must_ accept the token of input,
|
||
// otherwise error.
|
||
Transition(<$ntnext_st>::default())
|
||
.incomplete()
|
||
.with_lookahead(tok),
|
||
Transition($ntnext(meta)).incomplete().with_lookahead(tok)
|
||
)
|
||
},
|
||
)*
|
||
|
||
// XIRF ensures proper nesting,
|
||
// so we do not need to check the element name.
|
||
(
|
||
ExpectClose_((qname, _, depth))
|
||
| CloseRecoverIgnore_((qname, _, depth), _),
|
||
XirfToken::Close(_, span, tok_depth)
|
||
) if tok_depth == depth => {
|
||
$(
|
||
let $close_span = span;
|
||
)?
|
||
$closemap.transition(Closed_(Some(qname), span.tag_span()))
|
||
},
|
||
|
||
(ExpectClose_(meta @ (qname, otspan, _)), unexpected_tok) => {
|
||
use crate::parse::Token;
|
||
Transition(
|
||
CloseRecoverIgnore_(meta, unexpected_tok.span())
|
||
).err([<$nt Error_>]::CloseExpected(qname, otspan, unexpected_tok))
|
||
}
|
||
|
||
// We're still in recovery,
|
||
// so this token gets thrown out.
|
||
(st @ (RecoverEleIgnore_(..) | CloseRecoverIgnore_(..)), _) => {
|
||
Transition(st).incomplete()
|
||
},
|
||
|
||
// TODO: Use `is_accepting` guard if we do not utilize
|
||
// exhaustiveness check.
|
||
(
|
||
st @ (
|
||
Expecting_
|
||
| Closed_(..)
|
||
| RecoverEleIgnoreClosed_(..)
|
||
),
|
||
tok
|
||
) => {
|
||
Transition(st).dead(tok)
|
||
}
|
||
|
||
todo => todo!("{todo:?}"),
|
||
}
|
||
}
|
||
|
||
fn is_accepting(&self, _: &Self::Context) -> bool {
|
||
matches!(*self, Self::Closed_(..) | Self::RecoverEleIgnoreClosed_(..))
|
||
}
|
||
}
|
||
}
|
||
};
|
||
|
||
(@!ele_dfn_sum <$objty:ty> $vis:vis $super:ident $nt:ident [$($ntref:ident)*]) => {
|
||
paste::paste! {
|
||
#[doc=concat!(
|
||
"Parser expecting one of ",
|
||
$("[`", stringify!($ntref), "`], ",)*
|
||
"."
|
||
)]
|
||
#[derive(Debug, PartialEq, Eq, Default)]
|
||
$vis enum $nt {
|
||
#[default]
|
||
Expecting_,
|
||
#[allow(dead_code)] // used by superstate node preemption
|
||
/// Non-preemptable [`Self::Expecting_`].
|
||
NonPreemptableExpecting_,
|
||
/// Recovery state ignoring all remaining tokens for this
|
||
/// element.
|
||
RecoverEleIgnore_(
|
||
crate::xir::QName,
|
||
crate::xir::OpenSpan,
|
||
crate::xir::flat::Depth,
|
||
),
|
||
}
|
||
|
||
impl $nt {
|
||
// Whether the given QName would be matched by any of the
|
||
// parsers associated with this type.
|
||
//
|
||
// This is short-circuiting and will return as soon as one
|
||
// parser is found,
|
||
// so it may be a good idea to order the sum type
|
||
// according to the most likely value to be encountered.
|
||
// At its worst,
|
||
// this may be equivalent to a linear search of the
|
||
// parsers.
|
||
// With that said,
|
||
// Rust/LLVM may optimize this in any number of ways,
|
||
// especially if each inner parser matches on a QName
|
||
// constant.
|
||
// Let a profiler and disassembly guide you.
|
||
#[allow(dead_code)] // used by superstate
|
||
fn matches(qname: crate::xir::QName) -> bool {
|
||
// If we used an array or a trait,
|
||
// then we'd need everything to be a similar type;
|
||
// this allows for _any_ type provided that it
|
||
// expands into something that contains a `matches`
|
||
// associated function of a compatible type.
|
||
false $(|| $ntref::matches(qname))*
|
||
}
|
||
|
||
// Number of
|
||
// [`NodeMatcher`](crate::xir::parse::NodeMatcher)s
|
||
// considered by this parser.
|
||
//
|
||
// This is the sum of the number of matches of each
|
||
// constituent NT.
|
||
const fn matches_n() -> usize {
|
||
// Count the number of NTs by adding the number of
|
||
// matches in each.
|
||
0 $(+ $ntref::matches_n())*
|
||
}
|
||
|
||
/// Format constituent NTs for display.
|
||
///
|
||
/// This function receives the number of values to be
|
||
/// formatted as `n` and the current 0-indexed offset
|
||
/// within that list as `i`.
|
||
/// This allows for zero-copy rendering of composable NTs.
|
||
///
|
||
/// See also [`Self::fmt_matches_top`] to initialize the
|
||
/// formatting process with the correct values.
|
||
fn fmt_matches(
|
||
n: usize,
|
||
i: &mut usize,
|
||
f: &mut std::fmt::Formatter
|
||
) -> std::fmt::Result {
|
||
$(
|
||
$ntref::fmt_matches(n, i, f)?;
|
||
)*
|
||
|
||
Ok(())
|
||
}
|
||
|
||
/// Begin formatting using [`Self::fmt_matches`].
|
||
///
|
||
/// This provides the initial values for the function.
|
||
fn fmt_matches_top(f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||
Self::fmt_matches(Self::matches_n().saturating_sub(1), &mut 0, f)
|
||
}
|
||
|
||
/// Whether the parser is in a state that can tolerate
|
||
/// superstate node preemption.
|
||
///
|
||
/// For more information,
|
||
/// see the superstate
|
||
#[doc=concat!(
|
||
" [`", stringify!($super), "::can_preempt_node`]."
|
||
)]
|
||
fn can_preempt_node(&self) -> bool {
|
||
use $nt::*;
|
||
|
||
match self {
|
||
// Preemption before the opening tag is safe,
|
||
// since we haven't started processing yet.
|
||
Expecting_ => true,
|
||
|
||
// The name says it all.
|
||
// Instantiated by the superstate.
|
||
NonPreemptableExpecting_ => false,
|
||
|
||
// Preemption during recovery would cause tokens to
|
||
// be parsed when they ought to be ignored,
|
||
// so we must process all tokens during recovery.
|
||
RecoverEleIgnore_(..) => false,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl std::fmt::Display for $nt {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||
use crate::{
|
||
fmt::{DisplayWrapper, TtQuote},
|
||
};
|
||
|
||
match self {
|
||
Self::Expecting_
|
||
| Self::NonPreemptableExpecting_ => {
|
||
write!(f, "expecting ")?;
|
||
Self::fmt_matches_top(f)
|
||
},
|
||
|
||
Self::RecoverEleIgnore_(name, _, _) => {
|
||
write!(
|
||
f,
|
||
"attempting to recover by ignoring element \
|
||
with unexpected name {given} \
|
||
(expected",
|
||
given = TtQuote::wrap(name),
|
||
)?;
|
||
|
||
Self::fmt_matches_top(f)?;
|
||
f.write_str(")")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, PartialEq)]
|
||
$vis enum [<$nt Error_>] {
|
||
UnexpectedEle(crate::xir::QName, crate::span::Span),
|
||
}
|
||
|
||
impl std::error::Error for [<$nt Error_>] {
|
||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||
// TODO
|
||
None
|
||
}
|
||
}
|
||
|
||
impl std::fmt::Display for [<$nt Error_>] {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||
use crate::{
|
||
fmt::DisplayWrapper,
|
||
xir::fmt::TtOpenXmlEle,
|
||
};
|
||
|
||
match self {
|
||
Self::UnexpectedEle(qname, _) => {
|
||
write!(f, "unexpected {}", TtOpenXmlEle::wrap(qname))
|
||
},
|
||
}
|
||
}
|
||
}
|
||
|
||
impl crate::diagnose::Diagnostic for [<$nt Error_>] {
|
||
fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
|
||
use crate::{
|
||
diagnose::Annotate,
|
||
fmt::{DisplayWrapper, TtQuote, DisplayFn},
|
||
};
|
||
|
||
// Note that we should place expected values in the help
|
||
// footnote rather than the span label because it can
|
||
// get rather long.
|
||
// Maybe in the future the diagnostic renderer can be
|
||
// smart about that based on the terminal width and
|
||
// automatically move into the footer.
|
||
match self {
|
||
Self::UnexpectedEle(qname, span) => {
|
||
span
|
||
.error(format!(
|
||
"element {name} cannot appear here",
|
||
name = TtQuote::wrap(qname),
|
||
))
|
||
.with_help(format!(
|
||
"expecting {}",
|
||
DisplayFn($nt::fmt_matches_top)
|
||
))
|
||
.into()
|
||
},
|
||
}
|
||
}
|
||
}
|
||
|
||
impl crate::parse::ParseState for $nt {
|
||
type Token = crate::xir::flat::XirfToken<
|
||
crate::xir::flat::RefinedText
|
||
>;
|
||
type Object = $objty;
|
||
type Error = [<$nt Error_>];
|
||
type Context = crate::xir::parse::StateStackContext<Self::Super>;
|
||
type Super = $super;
|
||
|
||
fn parse_token(
|
||
self,
|
||
tok: Self::Token,
|
||
stack: &mut Self::Context,
|
||
) -> crate::parse::TransitionResult<Self::Super> {
|
||
use crate::{
|
||
parse::Transition,
|
||
xir::{
|
||
flat::XirfToken,
|
||
EleSpan,
|
||
},
|
||
};
|
||
|
||
use $nt::{
|
||
Expecting_, NonPreemptableExpecting_, RecoverEleIgnore_,
|
||
};
|
||
|
||
match (self, tok) {
|
||
$(
|
||
(
|
||
st @ (Expecting_ | NonPreemptableExpecting_),
|
||
XirfToken::Open(qname, span, depth)
|
||
) if $ntref::matches(qname) => {
|
||
ele_parse!(@!ntref_delegate
|
||
stack,
|
||
Expecting_,
|
||
$ntref,
|
||
Transition(
|
||
// Propagate non-preemption status,
|
||
// otherwise we'll provide a
|
||
// lookback of the original token
|
||
// and end up recursing until we
|
||
// hit the `stack` limit.
|
||
match st {
|
||
NonPreemptableExpecting_ => {
|
||
$ntref::NonPreemptableExpecting_
|
||
}
|
||
_ => {
|
||
$ntref::default()
|
||
}
|
||
}
|
||
).incomplete().with_lookahead(
|
||
XirfToken::Open(qname, span, depth)
|
||
),
|
||
unreachable!("TODO: remove me (ntref_delegate done)")
|
||
)
|
||
},
|
||
|
||
(
|
||
NonPreemptableExpecting_,
|
||
XirfToken::Open(qname, span, depth)
|
||
) if $ntref::matches(qname) => {
|
||
ele_parse!(@!ntref_delegate
|
||
stack,
|
||
Expecting_,
|
||
$ntref,
|
||
Transition(
|
||
$ntref::NonPreemptableExpecting_
|
||
).incomplete().with_lookahead(
|
||
XirfToken::Open(qname, span, depth)
|
||
),
|
||
unreachable!("TODO: remove me (ntref_delegate done)")
|
||
)
|
||
},
|
||
)*
|
||
|
||
// If we're non-preemptable,
|
||
// then we're expected to be able to process this
|
||
// token or fail trying.
|
||
(
|
||
NonPreemptableExpecting_,
|
||
XirfToken::Open(qname, span, depth)
|
||
) => {
|
||
Transition(RecoverEleIgnore_(qname, span, depth)).err(
|
||
// Use name span rather than full `OpenSpan`
|
||
// since it's specifically the name that
|
||
// was unexpected,
|
||
// not the fact that it's an element.
|
||
[<$nt Error_>]::UnexpectedEle(qname, span.name_span())
|
||
)
|
||
},
|
||
|
||
// An unexpected token when repeating ends
|
||
// repetition and should not result in an error.
|
||
(
|
||
Expecting_ | NonPreemptableExpecting_,
|
||
tok
|
||
) => Transition(Expecting_).dead(tok),
|
||
|
||
// XIRF ensures that the closing tag matches the opening,
|
||
// so we need only check depth.
|
||
(
|
||
RecoverEleIgnore_(_, _, depth_open),
|
||
XirfToken::Close(_, _, depth_close)
|
||
) if depth_open == depth_close => {
|
||
Transition(Expecting_).incomplete()
|
||
},
|
||
|
||
(st @ RecoverEleIgnore_(..), _) => {
|
||
Transition(st).incomplete()
|
||
},
|
||
}
|
||
}
|
||
|
||
fn is_accepting(&self, _: &Self::Context) -> bool {
|
||
matches!(self, Self::Expecting_)
|
||
}
|
||
}
|
||
}
|
||
};
|
||
|
||
// Generate superstate sum type.
|
||
//
|
||
// This is really annoying because we cannot read the output of another
|
||
// macro,
|
||
// and so we have to do our best to re-parse the body of the
|
||
// original `ele_parse!` invocation without duplicating too much
|
||
// logic,
|
||
// and we have to do so in a way that we can aggregate all of
|
||
// those data.
|
||
(@!super_sum <$objty:ty> $vis:vis $super:ident
|
||
$(
|
||
[super] {
|
||
// Non-whitespace text nodes can be mapped into elements
|
||
// with the given QName as a preprocessing step,
|
||
// allowing them to reuse the existing element NT system.
|
||
$([text]($text:ident, $text_span:ident) => $text_map:expr,)?
|
||
|
||
// Optional _single_ NT to preempt arbitrary elements.
|
||
// Sum NTs can be used to preempt multiple elements.
|
||
$($pre_nt:ident)?
|
||
}
|
||
)?
|
||
$(
|
||
// NT definition is always followed by `:=`.
|
||
$nt:ident :=
|
||
// Identifier if an element NT.
|
||
$($_i:ident)?
|
||
// Parenthesis for a sum NT,
|
||
// or possibly the span match for an element NT.
|
||
// So: `:= QN_IDENT(span)` or `:= (A | B | C)`.
|
||
$( ($($_p:tt)*) )?
|
||
// Braces for an element NT body.
|
||
$( {$($_b:tt)*} )?
|
||
// Element and sum NT both conclude with a semicolon,
|
||
// which we need to disambiguate the next `$nt`.
|
||
;
|
||
)*
|
||
) => {
|
||
paste::paste! {
|
||
/// Superstate representing the union of all related parsers.
|
||
///
|
||
/// This [`ParseState`] allows sub-parsers to independently
|
||
/// the states associated with their own subgraph,
|
||
/// and then yield a state transition directly to a state of
|
||
/// another parser.
|
||
/// This is conceptually like CPS (continuation passing style),
|
||
/// where this [`ParseState`] acts as a trampoline.
|
||
///
|
||
/// This [`ParseState`] is required for use with [`Parser`];
|
||
/// see [`ClosedParseState`] for more information.
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
$vis enum $super {
|
||
$(
|
||
$nt($nt),
|
||
)*
|
||
}
|
||
|
||
// Default parser is the first NT,
|
||
// and is non-preemptable to force error handling if the root
|
||
// node is unexpected.
|
||
// Note that this also prevents preemption at the root,
|
||
// which is necessary for now anyway since we need to be able
|
||
// to statically resolve imports without template expansion in
|
||
// NIR
|
||
// (otherwise we have a chicken-and-egg problem).
|
||
impl Default for $super {
|
||
fn default() -> Self {
|
||
ele_parse!(@!ntfirst_init $super, $($nt)*)
|
||
}
|
||
}
|
||
|
||
$(
|
||
impl From<$nt> for $super {
|
||
fn from(st: $nt) -> Self {
|
||
$super::$nt(st)
|
||
}
|
||
}
|
||
)*
|
||
|
||
impl std::fmt::Display for $super {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||
match self {
|
||
$(
|
||
Self::$nt(e) => std::fmt::Display::fmt(e, f),
|
||
)*
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Superstate error object representing the union of all
|
||
/// related parsers' errors.
|
||
#[derive(Debug, PartialEq)]
|
||
$vis enum [<$super Error_>] {
|
||
$(
|
||
$nt([<$nt Error_>]),
|
||
)*
|
||
}
|
||
|
||
$(
|
||
impl From<[<$nt Error_>]> for [<$super Error_>] {
|
||
fn from(e: [<$nt Error_>]) -> Self {
|
||
[<$super Error_>]::$nt(e)
|
||
}
|
||
}
|
||
)*
|
||
|
||
impl std::error::Error for [<$super Error_>] {
|
||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||
// TODO
|
||
None
|
||
}
|
||
}
|
||
|
||
impl std::fmt::Display for [<$super Error_>] {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||
match self {
|
||
$(
|
||
Self::$nt(e) => std::fmt::Display::fmt(e, f),
|
||
)*
|
||
}
|
||
}
|
||
}
|
||
|
||
impl crate::diagnose::Diagnostic for [<$super Error_>] {
|
||
fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
|
||
match self {
|
||
$(
|
||
Self::$nt(e) => e.describe(),
|
||
)*
|
||
}
|
||
}
|
||
}
|
||
|
||
impl crate::parse::ParseState for $super {
|
||
type Token = crate::xir::flat::XirfToken<
|
||
crate::xir::flat::RefinedText
|
||
>;
|
||
type Object = $objty;
|
||
type Error = [<$super Error_>];
|
||
type Context = crate::xir::parse::StateStackContext<Self>;
|
||
|
||
fn parse_token(
|
||
self,
|
||
tok: Self::Token,
|
||
stack: &mut Self::Context,
|
||
) -> crate::parse::TransitionResult<Self> {
|
||
use crate::{
|
||
parse::Transition,
|
||
xir::flat::{XirfToken, RefinedText},
|
||
};
|
||
|
||
// Used only by _some_ expansions.
|
||
#[allow(unused_imports)]
|
||
use crate::xir::flat::Text;
|
||
|
||
match (self, tok) {
|
||
// [super] {
|
||
$(
|
||
// [text] preemption;
|
||
// see `Self::can_preempt_node`.
|
||
$(
|
||
(
|
||
st,
|
||
XirfToken::Text(
|
||
RefinedText::Unrefined(
|
||
Text($text, $text_span)
|
||
),
|
||
_,
|
||
)
|
||
) if st.can_preempt_node() => {
|
||
Transition(st).ok($text_map)
|
||
},
|
||
)?
|
||
|
||
// Preemption NT
|
||
$(
|
||
(
|
||
st,
|
||
XirfToken::Open(
|
||
qname,
|
||
ospan,
|
||
depth,
|
||
),
|
||
) if st.can_preempt_node() && $pre_nt::matches(qname) => {
|
||
stack.transfer_with_ret(
|
||
Transition(st),
|
||
Transition(
|
||
// Prevent recursing on this token.
|
||
$pre_nt::NonPreemptableExpecting_
|
||
)
|
||
.incomplete()
|
||
.with_lookahead(XirfToken::Open(
|
||
qname,
|
||
ospan,
|
||
depth,
|
||
)),
|
||
)
|
||
},
|
||
)?
|
||
)?
|
||
// }
|
||
|
||
// Depth check is unnecessary since _all_ xir::parse
|
||
// parsers
|
||
// (at least at the time of writing)
|
||
// ignore whitespace and comments,
|
||
// so may as well return early.
|
||
// TODO: I'm ignoring _all_ text for now to
|
||
// proceed with development; fix.
|
||
(
|
||
st,
|
||
XirfToken::Text(RefinedText::Whitespace(..), _)
|
||
| XirfToken::Comment(..)
|
||
) => {
|
||
Transition(st).incomplete()
|
||
}
|
||
|
||
$(
|
||
// Pass token directly to child until it reports
|
||
// a dead state,
|
||
// after which we return to the `ParseState`
|
||
// atop of the stack.
|
||
(Self::$nt(st), tok) => st.delegate_child(
|
||
tok,
|
||
stack,
|
||
|deadst, tok, stack| {
|
||
stack.ret_or_dead(tok, deadst)
|
||
},
|
||
),
|
||
)*
|
||
}
|
||
}
|
||
|
||
fn is_accepting(&self, stack: &Self::Context) -> bool {
|
||
// This is short-circuiting,
|
||
// starting at the _bottom_ of the stack and
|
||
// moving upward.
|
||
// The idea is that,
|
||
// is we're still in the middle of parsing,
|
||
// then it's almost certain that the [`ParseState`] on
|
||
// the bottom of the stack will not be in an
|
||
// accepting state,
|
||
// and so we can stop checking early.
|
||
// In most cases,
|
||
// if we haven't hit EOF early,
|
||
// the stack should be either empty or consist of only
|
||
// the root state.
|
||
//
|
||
// After having considered the stack,
|
||
// we can then consider the active `ParseState`.
|
||
stack.all(|st| st.is_inner_accepting(stack))
|
||
&& self.is_inner_accepting(stack)
|
||
}
|
||
}
|
||
|
||
impl $super {
|
||
/// Whether the inner (active child) [`ParseState`] is in an
|
||
/// accepting state.
|
||
fn is_inner_accepting(
|
||
&self,
|
||
ctx: &<Self as crate::parse::ParseState>::Context
|
||
) -> bool {
|
||
use crate::parse::ParseState;
|
||
|
||
match self {
|
||
$(
|
||
Self::$nt(st) => st.is_accepting(ctx),
|
||
)*
|
||
}
|
||
}
|
||
|
||
/// Whether the inner parser is in a state that can tolerate
|
||
/// superstate node preemption.
|
||
///
|
||
/// Node preemption allows us (the superstate) to ask for
|
||
/// permission from the inner parser to parse some token
|
||
/// ourselves,
|
||
/// by asking whether the parser is in a state that
|
||
/// would cause semantic issues if we were to do so.
|
||
///
|
||
/// For example,
|
||
/// if we were to preempt text nodes while an inner parser
|
||
/// was still parsing attributes,
|
||
/// then we would emit an object associated with that
|
||
/// text before the inner parser had a chance to
|
||
/// conclude that attribute parsing has completed and
|
||
/// emit the opening object for that node;
|
||
/// the result would otherwise be an incorrect
|
||
/// `Text, Open` instead of the correct `Open, Text`,
|
||
/// which would effectively unparent the text.
|
||
/// Similarly,
|
||
/// if we were to parse our own tokens while an inner
|
||
/// parser was performing error recovery in such a way as
|
||
/// to ignore all child tokens,
|
||
/// then we would emit an object in an incorrect
|
||
/// context.
|
||
#[allow(dead_code)] // TODO: Remove when using for tpl apply
|
||
fn can_preempt_node(&self) -> bool {
|
||
match self {
|
||
$(
|
||
Self::$nt(st) => st.can_preempt_node(),
|
||
)*
|
||
}
|
||
}
|
||
}
|
||
}
|
||
};
|
||
|
||
(@!ntfirst_init $super:ident, $ntfirst:ident $($nt:ident)*) => {
|
||
$super::$ntfirst($ntfirst::NonPreemptableExpecting_)
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod test;
|