tamer: xir::parse::ele: Superstate not to accept early EOF

This was accepting an early EOF when the active child `ParseState` was in an
accepting state, because it was not ensuring that anything on the stack was
also accepting.

Ideally, there should be nothing on the stack, and hopefully in the future
that's what happens.  But with how things are today, it's important that, if
anything is on the stack, it is accepting.

Since `is_accepting` on the superstate is only called during finalization,
and because the check terminates early, and because the stack practically
speaking will only have a couple things on it max (unless we're in tail
position in a deeply nested tree, without TCO [yet]), this shouldn't be an
expensive check.

Implementing this did require that we expose `Context` to `is_accepting`,
which I had hoped to avoid having to do, but here we are.

DEV-7145
main
Mike Gerwitz 2022-08-11 13:49:11 -04:00
parent a4419413fb
commit ed8a2ce28a
13 changed files with 119 additions and 28 deletions

View File

@ -166,7 +166,7 @@ impl ParseState for AirAggregate {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Empty
}
}

View File

@ -234,7 +234,7 @@ impl ParseState for XmloToAir {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
matches!(*self, Self::Done(_))
}
}

View File

@ -224,7 +224,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
}
(Symtable(_, ss), Xirf::Close(Some(QN_SYMTABLE), ..))
if ss.is_accepting() =>
if ss.is_accepting(ctx) =>
{
Transition(SymDepsExpected).incomplete()
}
@ -243,7 +243,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
}
(SymDeps(_, sd), Xirf::Close(None | Some(QN_SYM_DEPS), ..))
if sd.is_accepting() =>
if sd.is_accepting(ctx) =>
{
Transition(FragmentsExpected).incomplete()
}
@ -263,7 +263,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
(
Fragments(_, sf),
Xirf::Close(None | Some(QN_FRAGMENTS), span, _),
) if sf.is_accepting() => {
) if sf.is_accepting(ctx) => {
Transition(Eoh).ok(XmloToken::Eoh(span.tag_span()))
}
@ -287,7 +287,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Eoh || *self == Self::Done
}
}
@ -436,7 +436,7 @@ impl ParseState for SymtableState {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Ready
}
}
@ -687,7 +687,7 @@ impl ParseState for SymDepsState {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Ready
}
}
@ -798,7 +798,7 @@ impl ParseState for FragmentsState {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Ready
}
}

View File

@ -198,7 +198,7 @@ pub mod test {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Done
}
}
@ -579,7 +579,7 @@ pub mod test {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
true
}
}
@ -603,7 +603,7 @@ pub mod test {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
true
}
}
@ -627,7 +627,7 @@ pub mod test {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
true
}
}

View File

@ -245,7 +245,7 @@ impl<O: Object, E: Diagnostic + PartialEq> ParseState for ParsedObject<O, E> {
unreachable!("ParsedObject must be used for type information only")
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
unreachable!("ParsedObject must be used for type information only")
}
}
@ -282,7 +282,7 @@ mod test {
Transition(self).ok(tok)
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
true
}
}

View File

@ -195,7 +195,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
if let Some(Lookahead(lookahead)) = &self.lookahead {
Err(ParseError::Lookahead(lookahead.span(), st.to_string()))
} else if st.is_accepting() {
} else if st.is_accepting(&self.ctx) {
Ok(())
} else {
let endpoints = self.last_span.endpoints();
@ -583,7 +583,7 @@ pub mod test {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
true
}
}

View File

@ -232,7 +232,7 @@ where
/// or the entire list of attributes.
/// It is acceptable to attempt to parse just one of those attributes,
/// or it is acceptable to parse all the way until the end.
fn is_accepting(&self) -> bool;
fn is_accepting(&self, ctx: &Self::Context) -> bool;
/// Delegate parsing from a compatible, stitched [`ParseState`] `SP`.
///

View File

@ -77,7 +77,7 @@ impl ParseState for AttrParseState {
}
#[inline]
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Empty
}
}

View File

@ -421,7 +421,7 @@ where
/// Intuitively,
/// this means that the parser must have encountered the closing tag
/// for the root element.
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
// TODO: It'd be nice if we could also return additional context to
// aid the user in diagnosing the problem,
// e.g. what element(s) still need closing.

View File

@ -487,7 +487,7 @@ macro_rules! attr_parse {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
// We must always be consumed via the dead state.
false
}

View File

@ -192,6 +192,12 @@ impl<S: ClosedParseState> StateStack<S> {
None => Transition(deadst).dead(lookahead),
}
}
/// Test every [`ParseState`] on the stack against the predicate `f`.
pub fn all(&self, f: impl Fn(&S) -> bool) -> bool {
let Self(stack) = self;
stack[..].iter().all(f)
}
}
/// Match some type of node.
@ -868,7 +874,7 @@ macro_rules! ele_parse {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
matches!(*self, Self::Closed_(..) | Self::RecoverEleIgnoreClosed_(..))
}
}
@ -1096,7 +1102,7 @@ macro_rules! ele_parse {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
match self {
Self::RecoverEleIgnoreClosed_(..) | Self::Done_ => true,
_ => false,
@ -1236,7 +1242,10 @@ macro_rules! ele_parse {
tok: Self::Token,
stack: &mut Self::Context,
) -> crate::parse::TransitionResult<Self> {
use crate::parse::Transition;
use crate::{
parse::Transition,
xir::flat::{XirfToken, RefinedText},
};
match (self, tok) {
// Depth check is unnecessary since _all_ xir::parse
@ -1271,10 +1280,40 @@ macro_rules! ele_parse {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, stack: &Self::Context) -> bool {
// This is short-circuiting,
// starting at the _bottom_ of the stack and
// moving upward.
// The idea is that,
// is we're still in the middle of parsing,
// then it's almost certain that the [`ParseState`] on
// the bottom of the stack will not be in an
// accepting state,
// and so we can stop checking early.
// In most cases,
// if we haven't hit EOF early,
// the stack should be either empty or consist of only
// the root state.
//
// After having considered the stack,
// we can then consider the active `ParseState`.
stack.all(|st| st.is_inner_accepting(stack))
&& self.is_inner_accepting(stack)
}
}
impl $super {
/// Whether the inner (active child) [`ParseState`] is in an
/// accepting state.
fn is_inner_accepting(
&self,
ctx: &<Self as crate::parse::ParseState>::Context
) -> bool {
use crate::parse::ParseState;
match self {
$(
Self::$nt(si) => si.is_accepting(),
Self::$nt(st) => st.is_accepting(ctx),
)*
}
}

View File

@ -1757,6 +1757,58 @@ fn mixed_content_text_nodes_with_non_mixed_content_child() {
);
}
// If there are any parsers that still have work to do
// (any on the stack),
// we cannot consider ourselves to be done parsing.
#[test]
fn superstate_not_accepting_until_root_close() {
const QN_ROOT: QName = QN_PACKAGE;
const QN_A: QName = QN_CLASSIFY;
ele_parse! {
enum Sut;
type Object = ();
Root := QN_ROOT {
@ {} => (),
A,
};
A := QN_A {
@ {} => (),
};
}
let toks = vec![
XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)),
XirfToken::Open(QN_A, OpenSpan(S2, N), Depth(1)),
XirfToken::Close(None, CloseSpan::empty(S3), Depth(1)),
// A is in an accepting state here,
// but we haven't yet closed Root and so Sut should not allow us
// to finish parsing at this point.
];
let mut sut = Sut::parse(toks.into_iter());
use Parsed::*;
assert_eq!(sut.next(), Some(Ok(Incomplete))); // [Root] Open Root
assert_eq!(sut.next(), Some(Ok(Object(())))); // [Root@] Open A (>LA)
assert_eq!(sut.next(), Some(Ok(Incomplete))); // [A] Open A (<LA)
assert_eq!(sut.next(), Some(Ok(Object(())))); // [A@] Close A (>LA)
assert_eq!(sut.next(), Some(Ok(Incomplete))); // [A] Close A (<LA)
// Since we haven't yet finished parsing the root,
// this should not be an accepting state even though the active child
// is in an accepting state.
let (mut sut, _) = sut
.finalize()
.expect_err("child accepting must not be accepting for superstate");
let err = sut.next().unwrap().unwrap_err();
assert_matches!(err, ParseError::UnexpectedEof(..),);
}
// Ensure that we can actually export the generated identifiers
// (add visibility to them).
// We don't want to always make them public by default because then Rust

View File

@ -597,7 +597,7 @@ impl<SA: StackAttrParseState> ParseState for Stack<SA> {
Transition(BuddingElement(ele)).incomplete()
}
(st, tok) if st.is_accepting() => Transition(st).dead(tok),
(st, tok) if st.is_accepting(ctx) => Transition(st).dead(tok),
(stack, tok) => {
todo!(
@ -611,7 +611,7 @@ impl<SA: StackAttrParseState> ParseState for Stack<SA> {
}
}
fn is_accepting(&self) -> bool {
fn is_accepting(&self, _: &Self::Context) -> bool {
*self == Self::Empty || *self == Self::Done
}
}