tamer: xir::xir::tree::parse_attrs: Isolated attribute parsing

This produces an `AttrList` independent from a containing `Element`. Upcoming changes may further permit the parser to yield smaller components that are not part of an aggregate. DEV-10863
2021-11-03 14:37:05 -04:00 · 2021-11-03 14:37:05 -04:00 · c7eb50b636
parent 54e1877d20
commit c7eb50b636
3 changed files with 263 additions and 21 deletions
--- a/tamer/src/ir/xir/tree.rs
+++ b/tamer/src/ir/xir/tree.rs
@ -47,7 +47,7 @@
 //! They have slightly different use cases and tradeoffs:
 //!
 //! [`parse`] yields a [`Result`] containing [`Parsed`],
-//!   which _may_ contain a [`Parsed::Object`],
+//!   which _may_ contain a [`Parsed::Tree`],
 //!   but it's more likely to contain [`Parsed::Incomplete`];
 //!     this is because it typically takes multiple [`Token`]s to complete
 //!     parsing within a given context.
@ -70,7 +70,7 @@
 //!     which does two things:
 //!
 //!   1. It filters out all [`Parsed::Incomplete`]; and
-//!   2. On [`Parsed::Object`],
+//!   2. On [`Parsed::Tree`],
 //!        it yields the inner [`Tree`].
 //!
 //! This is a much more convenient API,
@ -504,6 +504,9 @@ pub enum Stack {
    /// An attribute whose value is being constructed of value fragments,
    ///   after which it will be attached to an element.
    AttrFragments(Option<ElementStack>, AttrList, AttrParts),
+
+    /// A completed [`AttrList`] without any [`Element`] context.
+    IsolatedAttrList(AttrList),
 }

 impl Default for Stack {
@ -530,11 +533,26 @@ impl Stack {
        Ok(Self::BuddingElement(ElementStack {
            element,
            pstack: match self {
-                // Opening a root element (or lack of context)
+                // Opening a root element (or lack of context).
                Self::Empty => Ok(None),
-                // Open a child element
+
+                // Open a child element.
                Self::BuddingElement(pstack) => Ok(Some(pstack.store())),

+                // Opening a child element in attribute parsing context.
+                // Automatically close the attributes despite a missing
+                //   AttrEnd to accommodate non-reader XIR.
+                Self::BuddingAttrList(Some(pstack), attr_list) => {
+                    Ok(Some(pstack.consume_attrs(attr_list).store()))
+                }
+
+                // Attempting to open a child element in an isolated
+                //   attribute parsing context means that `AttrEnd` was not
+                //   provided.
+                Self::BuddingAttrList(None, ..) => {
+                    Err(ParseError::AttrNameExpected(Token::Open(name, span)))
+                }
+
                _ => todo! {},
            }?,
        }))
@ -565,6 +583,11 @@ impl Stack {
                .try_close(name, span)
                .map(ElementStack::consume_child_or_complete),

+            // See the error variant description for more information.
+            Self::BuddingAttrList(None, ..) => {
+                Err(ParseError::MissingIsolatedAttrEnd(span))
+            }
+
            _ => todo! {},
        }
    }
@ -654,7 +677,9 @@ impl Stack {
    ///   [`Element`].
    fn end_attrs(self) -> Result<Self> {
        Ok(match self {
-            Self::BuddingAttrList(None, _attr_list) => todo!("completed attrs"),
+            Self::BuddingAttrList(None, attr_list) => {
+                Self::IsolatedAttrList(attr_list)
+            }

            Self::BuddingAttrList(Some(ele_stack), attr_list) => {
                Self::BuddingElement(ele_stack.consume_attrs(attr_list))
@ -723,6 +748,11 @@ impl ParserState {
        }
    }

+    /// Initialize the state of the parser with the given [`Stack`].
+    fn with(stack: Stack) -> Self {
+        Self { stack }
+    }
+
    /// Consume a single XIR [`Token`] and attempt to parse it within the
    ///   context of the current [`Stack`].
    ///
@ -764,7 +794,8 @@ impl ParserState {
    /// Emit a completed object or store the current stack for further processing.
    fn store_or_emit(&mut self, new_stack: Stack) -> Parsed {
        match new_stack {
-            Stack::ClosedElement(ele) => Parsed::Object(Tree::Element(ele)),
+            Stack::ClosedElement(ele) => Parsed::Tree(Tree::Element(ele)),
+            Stack::IsolatedAttrList(attr_list) => Parsed::AttrList(attr_list),

            _ => {
                self.stack = new_stack;
@ -787,10 +818,32 @@ pub enum ParseError {
        close: (QName, Span),
    },

+    /// [`Token::AttrEnd`] was expected in an isolated attribute context,
+    ///   but [`Token::Close`] was encountered instead.
+    ///
+    /// This means that we encountered an element close while parsing
+    ///   attributes in an isolated context,
+    ///     which may happen if we're parsing only attributes as part
+    ///     of a larger XIR stream.
+    /// This should never happen if our XIR is well-formed _from a reader_,
+    ///     but could happen if we generate XIR that we are not expecting to
+    ///     subsequently parse.
+    ///
+    /// There is nothing the user can do to correct it;
+    ///   this represents a bug in the compiler.
+    MissingIsolatedAttrEnd(Span),
+
+    /// An attribute was expected as the next [`Token`].
+    AttrNameExpected(Token),
+
+    /// Token stream ended before attribute parsing was complete.
+    UnexpectedAttrEof,
+
    /// Not yet implemented.
    Todo(Token, Stack),
 }

+// TODO: Token needs to implement Display!
 impl Display for ParseError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
@ -806,6 +859,33 @@ impl Display for ParseError {
                )
            }

+            Self::MissingIsolatedAttrEnd(span) => {
+                // Try to be helpful to developers and users alike.
+                #[cfg(test)]
+                let testmsg = "or a problem with your test case";
+                #[cfg(not(test))]
+                let testmsg = "and should be reported";
+
+                write!(
+                    f,
+                    "internal error: expecting AttrEnd, found Close at {}; \
+                       this represents a compiler bug {}",
+                    span, testmsg
+                )
+            }
+
+            Self::AttrNameExpected(tok) => {
+                write!(f, "attribute name expected, found `{:?}`", tok)
+            }
+
+            // TODO: Perhaps we should include the last-encountered Span.
+            Self::UnexpectedAttrEof => {
+                write!(
+                    f,
+                    "unexpected end of input during isolated attribute parsing",
+                )
+            }
+
            Self::Todo(tok, stack) => {
                write!(
                    f,
@ -830,7 +910,10 @@ impl Display for ParseError {
 #[derive(Debug, Eq, PartialEq)]
 pub enum Parsed {
    /// Parsing of an object is complete.
-    Object(Tree),
+    Tree(Tree),
+
+    /// Parsing of an isolated attribute list is complete.
+    AttrList(AttrList),

    /// The parser needs more token data to emit an object
    ///   (the active context is not yet complete).
@ -868,7 +951,7 @@ pub fn parse(state: &mut ParserState, tok: Token) -> Option<Result<Parsed>> {
 /// Unlike [`parse`],
 ///   which is intended for use with [`Iterator::scan`],
 ///   this will yield /only/ when the underlying parser yields
-///   [`Parsed::Object`],
+///   [`Parsed::Tree`],
 ///     unwrapping the inner [`Tree`] value.
 /// This interface is far more convenient,
 ///   but comes at the cost of not knowing how many parsing steps a single
@ -891,11 +974,53 @@ pub fn parser_from(
 ) -> impl Iterator<Item = Result<Tree>> {
    toks.scan(ParserState::new(), parse)
        .filter_map(|parsed| match parsed {
-            Ok(Parsed::Object(tree)) => Some(Ok(tree)),
+            Ok(Parsed::Tree(tree)) => Some(Ok(tree)),
            Ok(Parsed::Incomplete) => None,
            Err(x) => Some(Err(x)),
+
+            // These make no sense in this context and should never occur.
+            Ok(Parsed::AttrList(x)) => unreachable!(
+                "unexpected yield by XIRT (Tree expected): {:?}",
+                x
+            ),
        })
 }

+/// Begin parsing in an isolated attribute context,
+///   producing an [`AttrList`] that is detached from any [`Element`].
+///
+/// This is useful when you wish to consume a XIR stream and collect only
+///   the attributes of an element.
+/// If you wish to process an entire element,
+///   use [`parser_from`] instead.
+///
+/// Parsing must begin at a [`Token::AttrName`] token.
+///
+/// This will consume tokens until reaching [`Token::AttrEnd`],
+///   and so it is important that the XIR stream contain this delimiter;
+///     this should be the case with all readers.
+#[inline]
+pub fn parse_attrs<'a>(
+    toks: &mut impl TokenStream,
+    dest: AttrList,
+) -> Result<AttrList> {
+    let mut state = ParserState::with(Stack::BuddingAttrList(None, dest));
+
+    loop {
+        match toks.next().and_then(|tok| parse(&mut state, tok)) {
+            None => return Err(ParseError::UnexpectedAttrEof),
+            Some(Err(err)) => return Err(err),
+            Some(Ok(Parsed::Incomplete)) => continue,
+            Some(Ok(Parsed::AttrList(attr_list))) => return Ok(attr_list),
+
+            // These make no sense in this context and should never occur.
+            Some(Ok(Parsed::Tree(x))) => unreachable!(
+                "unexpected yield by XIRT (AttrList expected): {:?}",
+                x
+            ),
+        }
+    }
+}
+
 #[cfg(test)]
 mod test;
--- a/tamer/src/ir/xir/tree/attr.rs
+++ b/tamer/src/ir/xir/tree/attr.rs
@ -283,6 +283,11 @@ impl AttrList {
    pub fn find(&self, name: QName) -> Option<&Attr> {
        self.attrs.iter().find(|attr| attr.name() == name)
    }
+
+    /// Returns [`true`] if the list contains no attributes.
+    pub fn is_empty(&self) -> bool {
+        self.attrs.is_empty()
+    }
 }

 impl From<Vec<Attr>> for AttrList {
--- a/tamer/src/ir/xir/tree/test.rs
+++ b/tamer/src/ir/xir/tree/test.rs
@ -102,10 +102,7 @@ fn empty_element_self_close_from_toks() {
    let mut sut = toks.scan(ParserState::new(), parse);

    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
-    assert_eq!(
-        sut.next(),
-        Some(Ok(Parsed::Object(Tree::Element(expected))))
-    );
+    assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
    assert_eq!(sut.next(), None);
 }

@ -128,10 +125,7 @@ fn empty_element_balanced_close_from_toks() {
    let mut sut = toks.scan(ParserState::new(), parse);

    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
-    assert_eq!(
-        sut.next(),
-        Some(Ok(Parsed::Object(Tree::Element(expected))))
-    );
+    assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
    assert_eq!(sut.next(), None);
 }

@ -209,10 +203,53 @@ fn empty_element_with_attrs_from_toks() {
    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValueFragment
    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValueFragment
    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValue
-    assert_eq!(
-        sut.next(),
-        Some(Ok(Parsed::Object(Tree::Element(expected))))
-    );
+    assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
+    assert_eq!(sut.next(), None);
+}
+
+// We should accommodate missing AttrEnd in an element context so that we
+//   can parse generated XIR without having to emit AttrEnd if we know it
+//   will not be necessary.
+// I may come to regret that accommodation after we have to go back and add
+//   AttrEnd to systems that weren't providing it.
+#[test]
+fn child_element_after_attrs() {
+    let name = ("ns", "elem").unwrap_into();
+    let child = "child".unwrap_into();
+    let attr = "a".unwrap_into();
+    let val = AttrValue::Escaped("val".intern());
+
+    let toks = [
+        Token::Open(name, *S),
+        Token::AttrName(attr, *S),
+        Token::AttrValue(val, *S2),
+        // No AttrEnd
+        Token::Open(child, *S),
+        Token::Close(None, *S2),
+        Token::Close(Some(name), *S3),
+    ]
+    .into_iter();
+
+    let expected = Element {
+        name,
+        attrs: Some(AttrList::from(vec![Attr::new(attr, val, (*S, *S2))])),
+        children: vec![Tree::Element(Element {
+            name: child,
+            attrs: None,
+            children: vec![],
+            span: (*S, *S2),
+        })],
+        span: (*S, *S3),
+    };
+
+    let mut sut = toks.scan(ParserState::new(), parse);
+
+    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open
+    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrName
+    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValue
+    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open
+    assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Close
+    assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
    assert_eq!(sut.next(), None);
 }

@ -348,3 +385,78 @@ fn parser_from_filters_incomplete() {
    assert_eq!(sut.next(), Some(Ok(Tree::Element(expected))));
    assert_eq!(sut.next(), None);
 }
+
+#[test]
+fn parse_attrs_fails_if_first_token_is_non_attr() {
+    let tok = Token::Open("foo".unwrap_into(), *S);
+    let mut toks = [tok.clone()].into_iter();
+
+    assert_eq!(
+        Err(ParseError::AttrNameExpected(tok)),
+        parse_attrs(&mut toks, AttrList::new()),
+    );
+
+    // The token should have been consumed, not copied.
+    assert_eq!(0, toks.len());
+}
+
+// Since the purpose of this function is to parse the complete attribute
+// list, it must fail if it does not encounter `AttrEnd`.
+#[test]
+fn parse_attrs_fails_if_end_before_attr_end() {
+    let mut toks = [
+        Token::AttrName("foo".unwrap_into(), *S),
+        Token::AttrValue(AttrValue::Escaped("bar".into()), *S),
+        // No Token::AttrEnd
+    ]
+    .into_iter();
+
+    assert_eq!(
+        Err(ParseError::UnexpectedAttrEof),
+        parse_attrs(&mut toks, AttrList::new()),
+    );
+}
+
+#[test]
+fn parse_attrs_fails_if_missing_attr_end() {
+    // Let's also ensure we fail if some other token is available in place
+    // of Token::AttrEnd.
+    let mut toks = [
+        Token::AttrName("foo".unwrap_into(), *S),
+        Token::AttrValue(AttrValue::Escaped("bar".into()), *S2),
+        // No Token::AttrEnd
+        Token::Close(None, *S3),
+    ]
+    .into_iter();
+
+    assert_eq!(
+        Err(ParseError::MissingIsolatedAttrEnd(*S3)),
+        parse_attrs(&mut toks, AttrList::new()),
+    );
+}
+
+#[test]
+fn parse_attrs_isolated() {
+    let attr1 = "one".unwrap_into();
+    let attr2 = "two".unwrap_into();
+    let val1 = AttrValue::Escaped("val1".into());
+    let val2 = AttrValue::Escaped("val2".into());
+
+    // Let's also ensure we fail if some other token is available in place
+    // of Token::AttrEnd.
+    let mut toks = [
+        Token::AttrName(attr1, *S),
+        Token::AttrValue(val1, *S2),
+        Token::AttrName(attr2, *S2),
+        Token::AttrValue(val2, *S3),
+        Token::AttrEnd,
+    ]
+    .into_iter();
+
+    let expected = AttrList::from([
+        Attr::new(attr1, val1, (*S, *S2)),
+        Attr::new(attr2, val2, (*S2, *S3)),
+    ]);
+
+    assert_eq!(expected, parse_attrs(&mut toks, AttrList::new()).unwrap());
+}