tamer: xir::xir::tree::parse_attrs: Isolated attribute parsing
This produces an `AttrList` independent from a containing `Element`. Upcoming changes may further permit the parser to yield smaller components that are not part of an aggregate. DEV-10863main
parent
54e1877d20
commit
c7eb50b636
|
@ -47,7 +47,7 @@
|
|||
//! They have slightly different use cases and tradeoffs:
|
||||
//!
|
||||
//! [`parse`] yields a [`Result`] containing [`Parsed`],
|
||||
//! which _may_ contain a [`Parsed::Object`],
|
||||
//! which _may_ contain a [`Parsed::Tree`],
|
||||
//! but it's more likely to contain [`Parsed::Incomplete`];
|
||||
//! this is because it typically takes multiple [`Token`]s to complete
|
||||
//! parsing within a given context.
|
||||
|
@ -70,7 +70,7 @@
|
|||
//! which does two things:
|
||||
//!
|
||||
//! 1. It filters out all [`Parsed::Incomplete`]; and
|
||||
//! 2. On [`Parsed::Object`],
|
||||
//! 2. On [`Parsed::Tree`],
|
||||
//! it yields the inner [`Tree`].
|
||||
//!
|
||||
//! This is a much more convenient API,
|
||||
|
@ -504,6 +504,9 @@ pub enum Stack {
|
|||
/// An attribute whose value is being constructed of value fragments,
|
||||
/// after which it will be attached to an element.
|
||||
AttrFragments(Option<ElementStack>, AttrList, AttrParts),
|
||||
|
||||
/// A completed [`AttrList`] without any [`Element`] context.
|
||||
IsolatedAttrList(AttrList),
|
||||
}
|
||||
|
||||
impl Default for Stack {
|
||||
|
@ -530,11 +533,26 @@ impl Stack {
|
|||
Ok(Self::BuddingElement(ElementStack {
|
||||
element,
|
||||
pstack: match self {
|
||||
// Opening a root element (or lack of context)
|
||||
// Opening a root element (or lack of context).
|
||||
Self::Empty => Ok(None),
|
||||
// Open a child element
|
||||
|
||||
// Open a child element.
|
||||
Self::BuddingElement(pstack) => Ok(Some(pstack.store())),
|
||||
|
||||
// Opening a child element in attribute parsing context.
|
||||
// Automatically close the attributes despite a missing
|
||||
// AttrEnd to accommodate non-reader XIR.
|
||||
Self::BuddingAttrList(Some(pstack), attr_list) => {
|
||||
Ok(Some(pstack.consume_attrs(attr_list).store()))
|
||||
}
|
||||
|
||||
// Attempting to open a child element in an isolated
|
||||
// attribute parsing context means that `AttrEnd` was not
|
||||
// provided.
|
||||
Self::BuddingAttrList(None, ..) => {
|
||||
Err(ParseError::AttrNameExpected(Token::Open(name, span)))
|
||||
}
|
||||
|
||||
_ => todo! {},
|
||||
}?,
|
||||
}))
|
||||
|
@ -565,6 +583,11 @@ impl Stack {
|
|||
.try_close(name, span)
|
||||
.map(ElementStack::consume_child_or_complete),
|
||||
|
||||
// See the error variant description for more information.
|
||||
Self::BuddingAttrList(None, ..) => {
|
||||
Err(ParseError::MissingIsolatedAttrEnd(span))
|
||||
}
|
||||
|
||||
_ => todo! {},
|
||||
}
|
||||
}
|
||||
|
@ -654,7 +677,9 @@ impl Stack {
|
|||
/// [`Element`].
|
||||
fn end_attrs(self) -> Result<Self> {
|
||||
Ok(match self {
|
||||
Self::BuddingAttrList(None, _attr_list) => todo!("completed attrs"),
|
||||
Self::BuddingAttrList(None, attr_list) => {
|
||||
Self::IsolatedAttrList(attr_list)
|
||||
}
|
||||
|
||||
Self::BuddingAttrList(Some(ele_stack), attr_list) => {
|
||||
Self::BuddingElement(ele_stack.consume_attrs(attr_list))
|
||||
|
@ -723,6 +748,11 @@ impl ParserState {
|
|||
}
|
||||
}
|
||||
|
||||
/// Initialize the state of the parser with the given [`Stack`].
|
||||
fn with(stack: Stack) -> Self {
|
||||
Self { stack }
|
||||
}
|
||||
|
||||
/// Consume a single XIR [`Token`] and attempt to parse it within the
|
||||
/// context of the current [`Stack`].
|
||||
///
|
||||
|
@ -764,7 +794,8 @@ impl ParserState {
|
|||
/// Emit a completed object or store the current stack for further processing.
|
||||
fn store_or_emit(&mut self, new_stack: Stack) -> Parsed {
|
||||
match new_stack {
|
||||
Stack::ClosedElement(ele) => Parsed::Object(Tree::Element(ele)),
|
||||
Stack::ClosedElement(ele) => Parsed::Tree(Tree::Element(ele)),
|
||||
Stack::IsolatedAttrList(attr_list) => Parsed::AttrList(attr_list),
|
||||
|
||||
_ => {
|
||||
self.stack = new_stack;
|
||||
|
@ -787,10 +818,32 @@ pub enum ParseError {
|
|||
close: (QName, Span),
|
||||
},
|
||||
|
||||
/// [`Token::AttrEnd`] was expected in an isolated attribute context,
|
||||
/// but [`Token::Close`] was encountered instead.
|
||||
///
|
||||
/// This means that we encountered an element close while parsing
|
||||
/// attributes in an isolated context,
|
||||
/// which may happen if we're parsing only attributes as part
|
||||
/// of a larger XIR stream.
|
||||
/// This should never happen if our XIR is well-formed _from a reader_,
|
||||
/// but could happen if we generate XIR that we are not expecting to
|
||||
/// subsequently parse.
|
||||
///
|
||||
/// There is nothing the user can do to correct it;
|
||||
/// this represents a bug in the compiler.
|
||||
MissingIsolatedAttrEnd(Span),
|
||||
|
||||
/// An attribute was expected as the next [`Token`].
|
||||
AttrNameExpected(Token),
|
||||
|
||||
/// Token stream ended before attribute parsing was complete.
|
||||
UnexpectedAttrEof,
|
||||
|
||||
/// Not yet implemented.
|
||||
Todo(Token, Stack),
|
||||
}
|
||||
|
||||
// TODO: Token needs to implement Display!
|
||||
impl Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
|
@ -806,6 +859,33 @@ impl Display for ParseError {
|
|||
)
|
||||
}
|
||||
|
||||
Self::MissingIsolatedAttrEnd(span) => {
|
||||
// Try to be helpful to developers and users alike.
|
||||
#[cfg(test)]
|
||||
let testmsg = "or a problem with your test case";
|
||||
#[cfg(not(test))]
|
||||
let testmsg = "and should be reported";
|
||||
|
||||
write!(
|
||||
f,
|
||||
"internal error: expecting AttrEnd, found Close at {}; \
|
||||
this represents a compiler bug {}",
|
||||
span, testmsg
|
||||
)
|
||||
}
|
||||
|
||||
Self::AttrNameExpected(tok) => {
|
||||
write!(f, "attribute name expected, found `{:?}`", tok)
|
||||
}
|
||||
|
||||
// TODO: Perhaps we should include the last-encountered Span.
|
||||
Self::UnexpectedAttrEof => {
|
||||
write!(
|
||||
f,
|
||||
"unexpected end of input during isolated attribute parsing",
|
||||
)
|
||||
}
|
||||
|
||||
Self::Todo(tok, stack) => {
|
||||
write!(
|
||||
f,
|
||||
|
@ -830,7 +910,10 @@ impl Display for ParseError {
|
|||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum Parsed {
|
||||
/// Parsing of an object is complete.
|
||||
Object(Tree),
|
||||
Tree(Tree),
|
||||
|
||||
/// Parsing of an isolated attribute list is complete.
|
||||
AttrList(AttrList),
|
||||
|
||||
/// The parser needs more token data to emit an object
|
||||
/// (the active context is not yet complete).
|
||||
|
@ -868,7 +951,7 @@ pub fn parse(state: &mut ParserState, tok: Token) -> Option<Result<Parsed>> {
|
|||
/// Unlike [`parse`],
|
||||
/// which is intended for use with [`Iterator::scan`],
|
||||
/// this will yield /only/ when the underlying parser yields
|
||||
/// [`Parsed::Object`],
|
||||
/// [`Parsed::Tree`],
|
||||
/// unwrapping the inner [`Tree`] value.
|
||||
/// This interface is far more convenient,
|
||||
/// but comes at the cost of not knowing how many parsing steps a single
|
||||
|
@ -891,11 +974,53 @@ pub fn parser_from(
|
|||
) -> impl Iterator<Item = Result<Tree>> {
|
||||
toks.scan(ParserState::new(), parse)
|
||||
.filter_map(|parsed| match parsed {
|
||||
Ok(Parsed::Object(tree)) => Some(Ok(tree)),
|
||||
Ok(Parsed::Tree(tree)) => Some(Ok(tree)),
|
||||
Ok(Parsed::Incomplete) => None,
|
||||
Err(x) => Some(Err(x)),
|
||||
|
||||
// These make no sense in this context and should never occur.
|
||||
Ok(Parsed::AttrList(x)) => unreachable!(
|
||||
"unexpected yield by XIRT (Tree expected): {:?}",
|
||||
x
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
/// Begin parsing in an isolated attribute context,
|
||||
/// producing an [`AttrList`] that is detached from any [`Element`].
|
||||
///
|
||||
/// This is useful when you wish to consume a XIR stream and collect only
|
||||
/// the attributes of an element.
|
||||
/// If you wish to process an entire element,
|
||||
/// use [`parser_from`] instead.
|
||||
///
|
||||
/// Parsing must begin at a [`Token::AttrName`] token.
|
||||
///
|
||||
/// This will consume tokens until reaching [`Token::AttrEnd`],
|
||||
/// and so it is important that the XIR stream contain this delimiter;
|
||||
/// this should be the case with all readers.
|
||||
#[inline]
|
||||
pub fn parse_attrs<'a>(
|
||||
toks: &mut impl TokenStream,
|
||||
dest: AttrList,
|
||||
) -> Result<AttrList> {
|
||||
let mut state = ParserState::with(Stack::BuddingAttrList(None, dest));
|
||||
|
||||
loop {
|
||||
match toks.next().and_then(|tok| parse(&mut state, tok)) {
|
||||
None => return Err(ParseError::UnexpectedAttrEof),
|
||||
Some(Err(err)) => return Err(err),
|
||||
Some(Ok(Parsed::Incomplete)) => continue,
|
||||
Some(Ok(Parsed::AttrList(attr_list))) => return Ok(attr_list),
|
||||
|
||||
// These make no sense in this context and should never occur.
|
||||
Some(Ok(Parsed::Tree(x))) => unreachable!(
|
||||
"unexpected yield by XIRT (AttrList expected): {:?}",
|
||||
x
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
|
|
@ -283,6 +283,11 @@ impl AttrList {
|
|||
pub fn find(&self, name: QName) -> Option<&Attr> {
|
||||
self.attrs.iter().find(|attr| attr.name() == name)
|
||||
}
|
||||
|
||||
/// Returns [`true`] if the list contains no attributes.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.attrs.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Attr>> for AttrList {
|
||||
|
|
|
@ -102,10 +102,7 @@ fn empty_element_self_close_from_toks() {
|
|||
let mut sut = toks.scan(ParserState::new(), parse);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
|
||||
assert_eq!(
|
||||
sut.next(),
|
||||
Some(Ok(Parsed::Object(Tree::Element(expected))))
|
||||
);
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
|
||||
assert_eq!(sut.next(), None);
|
||||
}
|
||||
|
||||
|
@ -128,10 +125,7 @@ fn empty_element_balanced_close_from_toks() {
|
|||
let mut sut = toks.scan(ParserState::new(), parse);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)));
|
||||
assert_eq!(
|
||||
sut.next(),
|
||||
Some(Ok(Parsed::Object(Tree::Element(expected))))
|
||||
);
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
|
||||
assert_eq!(sut.next(), None);
|
||||
}
|
||||
|
||||
|
@ -209,10 +203,53 @@ fn empty_element_with_attrs_from_toks() {
|
|||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValueFragment
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValueFragment
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValue
|
||||
assert_eq!(
|
||||
sut.next(),
|
||||
Some(Ok(Parsed::Object(Tree::Element(expected))))
|
||||
);
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
|
||||
assert_eq!(sut.next(), None);
|
||||
}
|
||||
|
||||
// We should accommodate missing AttrEnd in an element context so that we
|
||||
// can parse generated XIR without having to emit AttrEnd if we know it
|
||||
// will not be necessary.
|
||||
// I may come to regret that accommodation after we have to go back and add
|
||||
// AttrEnd to systems that weren't providing it.
|
||||
#[test]
|
||||
fn child_element_after_attrs() {
|
||||
let name = ("ns", "elem").unwrap_into();
|
||||
let child = "child".unwrap_into();
|
||||
let attr = "a".unwrap_into();
|
||||
let val = AttrValue::Escaped("val".intern());
|
||||
|
||||
let toks = [
|
||||
Token::Open(name, *S),
|
||||
Token::AttrName(attr, *S),
|
||||
Token::AttrValue(val, *S2),
|
||||
// No AttrEnd
|
||||
Token::Open(child, *S),
|
||||
Token::Close(None, *S2),
|
||||
Token::Close(Some(name), *S3),
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
let expected = Element {
|
||||
name,
|
||||
attrs: Some(AttrList::from(vec![Attr::new(attr, val, (*S, *S2))])),
|
||||
children: vec![Tree::Element(Element {
|
||||
name: child,
|
||||
attrs: None,
|
||||
children: vec![],
|
||||
span: (*S, *S2),
|
||||
})],
|
||||
span: (*S, *S3),
|
||||
};
|
||||
|
||||
let mut sut = toks.scan(ParserState::new(), parse);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrName
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValue
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Open
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // Close
|
||||
assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
|
||||
assert_eq!(sut.next(), None);
|
||||
}
|
||||
|
||||
|
@ -348,3 +385,78 @@ fn parser_from_filters_incomplete() {
|
|||
assert_eq!(sut.next(), Some(Ok(Tree::Element(expected))));
|
||||
assert_eq!(sut.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_attrs_fails_if_first_token_is_non_attr() {
|
||||
let tok = Token::Open("foo".unwrap_into(), *S);
|
||||
let mut toks = [tok.clone()].into_iter();
|
||||
|
||||
assert_eq!(
|
||||
Err(ParseError::AttrNameExpected(tok)),
|
||||
parse_attrs(&mut toks, AttrList::new()),
|
||||
);
|
||||
|
||||
// The token should have been consumed, not copied.
|
||||
assert_eq!(0, toks.len());
|
||||
}
|
||||
|
||||
// Since the purpose of this function is to parse the complete attribute
|
||||
// list, it must fail if it does not encounter `AttrEnd`.
|
||||
#[test]
|
||||
fn parse_attrs_fails_if_end_before_attr_end() {
|
||||
let mut toks = [
|
||||
Token::AttrName("foo".unwrap_into(), *S),
|
||||
Token::AttrValue(AttrValue::Escaped("bar".into()), *S),
|
||||
// No Token::AttrEnd
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
assert_eq!(
|
||||
Err(ParseError::UnexpectedAttrEof),
|
||||
parse_attrs(&mut toks, AttrList::new()),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_attrs_fails_if_missing_attr_end() {
|
||||
// Let's also ensure we fail if some other token is available in place
|
||||
// of Token::AttrEnd.
|
||||
let mut toks = [
|
||||
Token::AttrName("foo".unwrap_into(), *S),
|
||||
Token::AttrValue(AttrValue::Escaped("bar".into()), *S2),
|
||||
// No Token::AttrEnd
|
||||
Token::Close(None, *S3),
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
assert_eq!(
|
||||
Err(ParseError::MissingIsolatedAttrEnd(*S3)),
|
||||
parse_attrs(&mut toks, AttrList::new()),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_attrs_isolated() {
|
||||
let attr1 = "one".unwrap_into();
|
||||
let attr2 = "two".unwrap_into();
|
||||
let val1 = AttrValue::Escaped("val1".into());
|
||||
let val2 = AttrValue::Escaped("val2".into());
|
||||
|
||||
// Let's also ensure we fail if some other token is available in place
|
||||
// of Token::AttrEnd.
|
||||
let mut toks = [
|
||||
Token::AttrName(attr1, *S),
|
||||
Token::AttrValue(val1, *S2),
|
||||
Token::AttrName(attr2, *S2),
|
||||
Token::AttrValue(val2, *S3),
|
||||
Token::AttrEnd,
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
let expected = AttrList::from([
|
||||
Attr::new(attr1, val1, (*S, *S2)),
|
||||
Attr::new(attr2, val2, (*S2, *S3)),
|
||||
]);
|
||||
|
||||
assert_eq!(expected, parse_attrs(&mut toks, AttrList::new()).unwrap());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue