tamer: xir::Token::AttrEnd: Remove

More information can be found in the prior commit message, but I'll summarize here. This token was introduced to create a LL(0) parser---no tokens of lookahead. This allowed the underlying TokenStream to be freely passed to the next system that needed it. Since then, Parser and ParseState were introduced, along with ParseStatus::Dead, which introduces the concept of lookahead for a single token---an LL(1) grammar. I had always suspected that this would happen, given the awkwardness of AttrEnd; it was just a matter of time before the right abstraction manifested itself to handle lookahead. DEV-11339
2021-12-17 10:14:31 -05:00 · 2021-12-17 10:14:31 -05:00 · 0cc0bc9d5a
parent 61f7a12975
commit 0cc0bc9d5a
9 changed files with 26 additions and 131 deletions
--- a/tamer/src/obj/xmlo/reader/test.rs
+++ b/tamer/src/obj/xmlo/reader/test.rs
@ -56,7 +56,6 @@ fn parses_package_attrs() {
            Token::AttrValue(raw::L_TRUE, DS),
            Token::AttrName(("preproc", "elig-class-yields").unwrap_into(), DS),
            Token::AttrValue(elig, DS),
-            Token::AttrEnd(DS),
        ]
        .into_iter(),
    );
@ -84,7 +83,6 @@ fn parses_package_attrs_with_ns_prefix() {
            Token::Open(("lv", "package").unwrap_into(), DS),
            Token::AttrName("name".unwrap_into(), DS),
            Token::AttrValue(name, DS),
-            Token::AttrEnd(DS),
        ]
        .into_iter(),
    );
--- a/tamer/src/xir.rs
+++ b/tamer/src/xir.rs
@ -509,28 +509,6 @@ pub enum Token {
    ///    but it was removed.)
    AttrValueFragment(SymbolId, Span),

-    /// A delimiter indicating that attribute processing has ended and the
-    ///   next token will be either a child node or [`Token::Close`].
-    ///
-    /// This allows for streaming attribute collection without any
-    ///   lookahead,
-    ///     which would otherwise require an iterator supporting a `peek`
-    ///     operation.
-    ///
-    /// This is mandatory for _readers_ to produce,
-    ///   but _writers must ignore it and not require it to be present_,
-    ///     allowing for the reduction of token counts for generated XIR in
-    ///     situations where we know that it will not be further parsed.
-    ///
-    /// The [`Span`] ought to be the final byte of the preceding attribute,
-    ///   and is required only so that we can guarantee an API that can
-    ///   produce a [`Span`] for any given [`Token`].
-    /// The span position cannot be after the preceding attribute because,
-    ///   if attributes are parsed in isolation,
-    ///   the following byte is outside of the context that we are permitted
-    ///   to parse.
-    AttrEnd(Span),
-
    /// Comment node.
    Comment(SymbolId, Span),

@ -575,7 +553,6 @@ impl Display for Token {
            Self::AttrValueFragment(attr_val, span) => {
                write!(f, "attribute value fragment `{}` at {}", attr_val, span)
            }
-            Self::AttrEnd(span) => write!(f, "end of attributes at {}", span),
            // TODO: Safe truncated comment.
            Self::Comment(_, span) => write!(f, "comment at {}", span),
            // TODO: Safe truncated text.
@ -600,7 +577,6 @@ impl Token {
            | AttrName(_, span)
            | AttrValue(_, span)
            | AttrValueFragment(_, span)
-            | AttrEnd(span)
            | Comment(_, span)
            | Text(_, span)
            | CData(_, span)
--- a/tamer/src/xir/reader.rs
+++ b/tamer/src/xir/reader.rs
@ -226,11 +226,6 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
            tokbuf.push_front(Token::AttrValue(value, DUMMY_SPAN));
        }

-        // Indicate the end of attributes even if no attributes were output.
-        // This allows for a reliable delimiter that can be used without
-        //   lookahead for streaming attribute parsing.
-        tokbuf.push_front(Token::AttrEnd(DUMMY_SPAN));
-
        Ok(())
    }
 }
--- a/tamer/src/xir/reader/test.rs
+++ b/tamer/src/xir/reader/test.rs
@ -95,7 +95,6 @@ fn empty_node_without_prefix_or_attributes() {
        result.expect("parsing failed"),
        vec![
            Token::Open("empty-node".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
        ],
    );
@ -115,7 +114,6 @@ fn does_not_resolve_xmlns() {
            // Since we didn't parse @xmlns, it's still an attribute.
            Token::AttrName("xmlns".unwrap_into(), DUMMY_SPAN),
            Token::AttrValue("noresolve:UNESC".intern(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
        ],
    );
@ -135,7 +133,6 @@ fn empty_node_with_prefix_without_attributes_unresolved() {
            Token::Open(("x", "empty-node").unwrap_into(), DUMMY_SPAN),
            Token::AttrName(("xmlns", "x").unwrap_into(), DUMMY_SPAN),
            Token::AttrValue("noresolve:UNESC".intern(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
        ],
    );
@ -174,7 +171,6 @@ fn multiple_attrs_ordered() {
            Token::AttrValue("b:UNESC".intern(), DUMMY_SPAN),
            Token::AttrName(("b", "baz").unwrap_into(), DUMMY_SPAN),
            Token::AttrValue("c:UNESC".intern(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
        ],
    );
@ -196,7 +192,6 @@ fn permits_duplicate_attrs() {
            Token::AttrValue("a:UNESC".intern(), DUMMY_SPAN),
            Token::AttrName("attr".unwrap_into(), DUMMY_SPAN),
            Token::AttrValue("b:UNESC".intern(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
        ],
    );
@ -212,9 +207,7 @@ fn child_node_self_closing() {
        result.expect("parsing failed"),
        vec![
            Token::Open("root".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Open("child".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
            Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
        ],
@ -231,12 +224,9 @@ fn sibling_nodes() {
        result.expect("parsing failed"),
        vec![
            Token::Open("root".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Open("child".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
            Token::Open("child".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
            Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
        ],
@ -253,11 +243,9 @@ fn child_node_with_attrs() {
        result.expect("parsing failed"),
        vec![
            Token::Open("root".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Open("child".unwrap_into(), DUMMY_SPAN),
            Token::AttrName("foo".unwrap_into(), DUMMY_SPAN),
            Token::AttrValue("bar:UNESC".intern(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
            Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
        ],
@ -274,7 +262,6 @@ fn child_text() {
        result.expect("parsing failed"),
        vec![
            Token::Open("text".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Text("foo bar:UNESC".into(), DUMMY_SPAN),
            Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
        ],
@ -291,10 +278,8 @@ fn mixed_child_content() {
        result.expect("parsing failed"),
        vec![
            Token::Open("text".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Text("foo:UNESC".into(), DUMMY_SPAN),
            Token::Open("em".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Text("bar:UNESC".into(), DUMMY_SPAN),
            Token::Close(Some("em".unwrap_into()), DUMMY_SPAN),
            Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
@ -322,10 +307,8 @@ fn mixed_child_content_with_newlines() {
        vec![
            Token::Text("\n:UNESC".into(), DUMMY_SPAN),
            Token::Open("root".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Text("\n  :UNESC".into(), DUMMY_SPAN),
            Token::Open("child".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
            Token::Text("\n:UNESC".into(), DUMMY_SPAN),
            Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
@ -345,7 +328,6 @@ fn comment() {
        vec![
            Token::Comment("root".into(), DUMMY_SPAN),
            Token::Open("root".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Comment("<child>".into(), DUMMY_SPAN),
            Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
        ],
@ -367,7 +349,6 @@ lines-->
        result.expect("parsing failed"),
        vec![
            Token::Open("mult".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Comment("comment\non multiple\nlines".into(), DUMMY_SPAN),
            Token::Text("\n:UNESC".into(), DUMMY_SPAN),
            Token::Close(Some("mult".unwrap_into()), DUMMY_SPAN),
@ -386,9 +367,7 @@ fn permits_mismatched_tags() {
        result.expect("parsing failed"),
        vec![
            Token::Open("root".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Open("child".unwrap_into(), DUMMY_SPAN),
-            Token::AttrEnd(DUMMY_SPAN),
            Token::Close(None, DUMMY_SPAN),
            Token::Close(Some("mismatch".unwrap_into()), DUMMY_SPAN),
        ],
--- a/tamer/src/xir/tree.rs
+++ b/tamer/src/xir/tree.rs
@ -533,11 +533,6 @@ impl<SA: StackAttrParseState> ParseState for Stack<SA> {
                    *self = Self::AttrState(estack, attrs, sa);
                    Ok(Incomplete)
                }
-                // This will likely go away with AttrEnd.
-                Ok(Done) => {
-                    *self = Self::BuddingElement(estack.consume_attrs(attrs));
-                    Ok(Incomplete)
-                }
                Ok(Dead(lookahead)) => {
                    *self = Self::BuddingElement(estack.consume_attrs(attrs));
                    self.parse_token(lookahead)
@ -596,8 +591,6 @@ impl<SA: StackAttrParseState> Stack<SA> {
                    Self::BuddingElement(pstack) => Some(pstack.store()),

                    // Opening a child element in attribute parsing context.
-                    // Automatically close the attributes despite a missing
-                    //   AttrEnd to accommodate non-reader XIR.
                    Self::BuddingAttrList(pstack, attr_list) => {
                        Some(pstack.consume_attrs(attr_list).store())
                    }
@ -626,10 +619,6 @@ impl<SA: StackAttrParseState> Stack<SA> {
                .try_close(name, span)
                .map(ElementStack::consume_child_or_complete),

-            // We can implicitly complete the attribute list if there's a
-            //   missing `Token::AttrEnd`,
-            //     which alleviates us from having to unnecessarily generate
-            //     it outside of readers.
            Self::BuddingAttrList(stack, attr_list) => stack
                .consume_attrs(attr_list)
                .try_close(name, span)
@ -660,10 +649,6 @@ impl<SA: StackAttrParseState> Stack<SA> {
                ParseStatus::Object(Tree::Element(ele))
            }

-            // This parser has completed relative to its initial context and
-            //   is not expecting any further input.
-            Stack::Done => ParseStatus::Done,
-
            _ => {
                *self = new_stack;
                ParseStatus::Incomplete
--- a/tamer/src/xir/tree/attr/parse.rs
+++ b/tamer/src/xir/tree/attr/parse.rs
@ -52,8 +52,6 @@ impl ParseState for AttrParserState {
        use AttrParserState::*;

        match (take(self), tok) {
-            (Empty, Token::AttrEnd(_)) => return Ok(ParseStatus::Done),
-
            (Empty, Token::AttrName(name, span)) => {
                *self = Name(name, span);
                Ok(ParseStatus::Incomplete)
@ -187,11 +185,11 @@ mod test {

        // But we provide something else unexpected.
        assert_eq!(
-            sut.parse_token(Token::AttrEnd(*S2)),
+            sut.parse_token(Token::Close(None, *S2)),
            Err(AttrParseError::AttrValueExpected(
                attr,
                *S,
-                Token::AttrEnd(*S2)
+                Token::Close(None, *S2)
            ))
        );

@ -214,12 +212,4 @@ mod test {
        // Finally, we should now be in an accepting state.
        assert!(sut.is_accepting());
    }
-
-    #[test]
-    fn yields_none_on_attr_end() {
-        let mut sut = AttrParserState::default();
-
-        assert_eq!(sut.parse_token(Token::AttrEnd(*S)), Ok(ParseStatus::Done));
-        assert!(sut.is_accepting());
-    }
 }
--- a/tamer/src/xir/tree/parse.rs
+++ b/tamer/src/xir/tree/parse.rs
@ -169,8 +169,6 @@ impl<S: ParseState, I: TokenStream> Iterator for Parser<S, I> {

                use ParseStatus::*;
                match self.state.parse_token(tok) {
-                    Ok(Done) => None,
-
                    // Nothing handled this dead state,
                    //   and we cannot discard a lookahead token,
                    //   so we have no choice but to produce an error.
@ -303,8 +301,7 @@ pub enum ParseStatus<T> {
    /// Parsing of an object is complete.
    ///
    /// This does not indicate that the parser is complete,
-    ///   as more objects may be able to be emitted;
-    ///     see [`ParseStatus::Done`].
+    ///   as more objects may be able to be emitted.
    Object(T),

    /// Parser encountered a dead state relative to the given token.
@ -331,13 +328,6 @@ pub enum ParseStatus<T> {
    /// If there is no parent context to handle the token,
    ///   [`Parser`] must yield an error.
    Dead(Token),
-
-    /// Parsing is complete.
-    ///
-    /// This should cause an iterator to yield [`None`].
-    /// If a parser is a combination of multiple [`ParseState`]s,
-    ///   this should transition to the next appropriate state.
-    Done,
 }

 /// Result of a parsing operation.
@ -362,8 +352,8 @@ impl<T> From<ParseStatus<T>> for Parsed<T> {
        match status {
            ParseStatus::Incomplete => Parsed::Incomplete,
            ParseStatus::Object(x) => Parsed::Object(x),
-            ParseStatus::Dead(_) | ParseStatus::Done => {
-                unreachable!("Dead/Done status must be filtered by Parser")
+            ParseStatus::Dead(_) => {
+                unreachable!("Dead status must be filtered by Parser")
            }
        }
    }
@ -394,13 +384,13 @@ pub mod test {

        fn parse_token(&mut self, tok: Token) -> ParseStateResult<Self> {
            match tok {
-                Token::AttrEnd(..) => {
+                Token::Comment(..) => {
                    *self = Self::Done;
                }
                Token::Close(..) => {
                    return Err(EchoStateError::InnerError(tok))
                }
-                Token::Comment(..) => return Ok(ParseStatus::Dead(tok)),
+                Token::Text(..) => return Ok(ParseStatus::Dead(tok)),
                _ => {}
            }

@ -433,14 +423,15 @@ pub mod test {

    #[test]
    fn successful_parse_in_accepting_state_with_spans() {
-        // EchoState is placed into a Done state given AttrEnd.
-        let mut toks = [Token::AttrEnd(DS)].into_iter();
+        // EchoState is placed into a Done state given Comment.
+        let tok = Token::Comment("foo".into(), DS);
+        let mut toks = once(tok.clone());

        let mut sut = Sut::from(&mut toks);

        // The first token should be processed normally.
        // EchoState proxies the token back.
-        assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd(DS)))), sut.next());
+        assert_eq!(Some(Ok(Parsed::Object(tok))), sut.next());

        // This is now the end of the token stream,
        //   which should be okay provided that the first token put us into
@ -494,11 +485,12 @@ pub mod test {
    fn fails_when_parser_is_finalized_in_non_accepting_state() {
        // Set up so that we have a single token that we can use for
        //   recovery as part of the same iterator.
+        let recovery = Token::Comment("recov".into(), DS);
        let mut toks = [
            // Used purely to populate a Span.
            Token::Close(None, DS),
            // Recovery token here:
-            Token::AttrEnd(DS),
+            recovery.clone(),
        ]
        .into_iter();

@ -524,7 +516,7 @@ pub mod test {
        // `toks` above is set up already for this,
        //   which allows us to assert that we received back the same `sut`.
        let mut sut = result.unwrap_err().0;
-        assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd(DS)))), sut.next());
+        assert_eq!(Some(Ok(Parsed::Object(recovery))), sut.next());

        // And so we should now be in an accepting state,
        //   able to finalize.
@ -533,8 +525,8 @@ pub mod test {

    #[test]
    fn unhandled_dead_state_results_in_error() {
-        // A comment will cause our parser to return Dead.
-        let tok = Token::Comment("dead".into(), DS);
+        // A Text will cause our parser to return Dead.
+        let tok = Token::Text("dead".into(), DS);
        let mut toks = once(tok.clone());

        let mut sut = Sut::from(&mut toks);
--- a/tamer/src/xir/tree/test.rs
+++ b/tamer/src/xir/tree/test.rs
@ -201,11 +201,6 @@ fn empty_element_with_attrs_from_toks() {
    assert_eq!(sut.next(), None);
 }

-// We should accommodate missing AttrEnd in an element context so that we
-//   can parse generated XIR without having to emit AttrEnd if we know it
-//   will not be necessary.
-// I may come to regret that accommodation after we have to go back and add
-//   AttrEnd to systems that weren't providing it.
 #[test]
 fn child_element_after_attrs() {
    let name = ("ns", "elem").unwrap_into();
@ -217,7 +212,6 @@ fn child_element_after_attrs() {
        Token::Open(name, *S),
        Token::AttrName(attr, *S),
        Token::AttrValue(val, *S2),
-        // No AttrEnd
        Token::Open(child, *S),
        Token::Close(None, *S2),
        Token::Close(Some(name), *S3),
@ -408,7 +402,6 @@ fn parser_attr_multiple() {
        Token::AttrValue(val1, *S2),
        Token::AttrName(attr2, *S2),
        Token::AttrValue(val2, *S3),
-        Token::AttrEnd(*S3),
        // Token that we should _not_ hit.
        Token::Text("nohit".into(), *S),
    ]
@ -418,13 +411,16 @@ fn parser_attr_multiple() {

    assert_eq!(sut.next(), Some(Ok(Attr::new(attr1, val1, (*S, *S2)))));
    assert_eq!(sut.next(), Some(Ok(Attr::new(attr2, val2, (*S2, *S3)))));
-    assert_eq!(sut.next(), None);

-    // Parsing must stop after AttrEnd,
+    // Parsing must stop after the last attribute,
    //   after which some other parser can continue on the same token
-    //   stream.
-    // Even if there _were_ more attributes,
-    //   this parser is spent and cannot continue.
-    drop(sut);
-    assert_eq!(toks.next(), Some(Token::Text("nohit".into(), *S)));
+    //   stream
+    //     (using this token as a lookahead).
+    assert_eq!(
+        sut.next(),
+        Some(Err(ParseError::UnexpectedToken(Token::Text(
+            "nohit".into(),
+            *S
+        ))))
+    );
 }
--- a/tamer/src/xir/writer.rs
+++ b/tamer/src/xir/writer.rs
@ -255,9 +255,6 @@ impl<S: Escaper> XmlWriter<S> for Token {
                Ok(W::AttrFragmentAdjacent)
            }

-            // AttrEnd is ignored by the writer (and is optional).
-            (Self::AttrEnd(..), x) => Ok(x),
-
            // TODO: We have no way of knowing if text should be formatted
            //   as CData,
            //     which may also be beneficial to avoid escaping if we
@ -497,18 +494,6 @@ mod test {
        Ok(())
    }

-    // AttrEnd does not even need to be in a semantically valid position; we
-    // just ignore it entirely.
-    #[test]
-    fn ignores_attr_end() -> TestResult {
-        let result = Token::AttrEnd(*S)
-            .write_new(WriterState::NodeOpen, &MockEscaper::default())?;
-        assert_eq!(result.0, b"");
-        assert_eq!(result.1, WriterState::NodeOpen);
-
-        Ok(())
-    }
-
    #[test]
    fn writes_text() -> TestResult {
        let text = "test unescaped".intern();
@ -573,7 +558,6 @@ mod test {
            Token::Open(root, *S),
            Token::AttrName(("an", "attr").try_into()?, *S),
            Token::AttrValue("value".intern(), *S),
-            Token::AttrEnd(*S),
            Token::Text("text".intern(), *S),
            Token::Open(("c", "child").try_into()?, *S),
            Token::Whitespace(" ".try_into()?, *S),