tamer: ir::xir::Token::AttrEnd: New token type
The purpose of this token is to implement a lazy streaming attribute collection operation without a token of lookup, which would complicate parsing or require that a TokenStream provide a `peek` method. This is only required for readers to produce, since readers will be feeding data to parsers. I have the writer ignoring it. If you're looking back at this commit, the question is whether this was a bad idea: it introduces inconsistencies into the token stream depending on the context, which can be confusing and error-prone. The intent is to have the parser throw an explicit error if the new token is missing in the context in which it is required, which will safely handle the issue, but does defer it to runtime. But only readers need auditing, and there's only one XIR reader at the moment. DEV-10863main
parent
18ab032ba0
commit
7e6cb2c948
|
@ -549,6 +549,20 @@ pub enum Token {
|
|||
/// components of generated attribute values.
|
||||
AttrValueFragment(AttrValue, Span),
|
||||
|
||||
/// A delimiter indicating that attribute processing has ended and the
|
||||
/// next token will be either a child node or [`Token::Close`].
|
||||
///
|
||||
/// This allows for streaming attribute collection without any
|
||||
/// lookahead,
|
||||
/// which would otherwise require an iterator supporting a `peek`
|
||||
/// operation.
|
||||
///
|
||||
/// This is mandatory for _readers_ to produce,
|
||||
/// but _writers must ignore it and not require it to be present_,
|
||||
/// allowing for the reduction of token counts for generated XIR in
|
||||
/// situations where we know that it will not be further parsed.
|
||||
AttrEnd,
|
||||
|
||||
/// Comment node.
|
||||
Comment(Text, Span),
|
||||
|
||||
|
|
|
@ -205,6 +205,11 @@ impl<B: BufRead> XmlXirReader<B> {
|
|||
tokbuf.push_front(Token::AttrValue(value, DUMMY_SPAN));
|
||||
}
|
||||
|
||||
// Indicate the end of attributes even if no attributes were output.
|
||||
// This allows for a reliable delimiter that can be used without
|
||||
// lookahead for streaming attribute parsing.
|
||||
tokbuf.push_front(Token::AttrEnd);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,6 +64,7 @@ fn empty_node_without_prefix_or_attributes() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("empty-node".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -86,6 +87,7 @@ fn does_not_resolve_xmlns() {
|
|||
AttrValue::Escaped("noresolve".into()),
|
||||
DUMMY_SPAN
|
||||
),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -108,6 +110,7 @@ fn empty_node_with_prefix_without_attributes_unresolved() {
|
|||
AttrValue::Escaped("noresolve".into()),
|
||||
DUMMY_SPAN
|
||||
),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -146,6 +149,7 @@ fn multiple_attrs_ordered() {
|
|||
Token::AttrValue(AttrValue::Escaped("b".into()), DUMMY_SPAN),
|
||||
Token::AttrName(("b", "baz").unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrValue(AttrValue::Escaped("c".into()), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -167,6 +171,7 @@ fn permits_duplicate_attrs() {
|
|||
Token::AttrValue(AttrValue::Escaped("a".into()), DUMMY_SPAN),
|
||||
Token::AttrName("attr".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrValue(AttrValue::Escaped("b".into()), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -182,7 +187,9 @@ fn child_node_self_closing() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -199,9 +206,12 @@ fn sibling_nodes() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -218,9 +228,11 @@ fn child_node_with_attrs() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrName("foo".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrValue(AttrValue::Escaped("bar".into()), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -237,6 +249,7 @@ fn child_text() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("foo bar".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -253,8 +266,10 @@ fn mixed_child_content() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("foo".into()), DUMMY_SPAN),
|
||||
Token::Open("em".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("bar".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("em".unwrap_into()), DUMMY_SPAN),
|
||||
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
||||
|
@ -283,8 +298,10 @@ fn mixed_child_content_with_newlines() {
|
|||
vec![
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("\n ".into()), DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
|
@ -303,6 +320,7 @@ fn child_cdata() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("cd".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
// Escaped by quick_xml.
|
||||
Token::Text(Text::Escaped("<foo />".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("cd".unwrap_into()), DUMMY_SPAN),
|
||||
|
@ -320,8 +338,10 @@ fn mixed_child_text_and_cdata() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("cd".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("foo".into()), DUMMY_SPAN),
|
||||
Token::Open("bar".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
// Escaped by quick_xml.
|
||||
Token::Text(Text::Escaped("<baz/>".into()), DUMMY_SPAN),
|
||||
|
@ -341,6 +361,7 @@ fn comment() {
|
|||
vec![
|
||||
Token::Comment(Text::Unescaped("root".into()), DUMMY_SPAN),
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Comment(Text::Unescaped("<child>".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -363,6 +384,7 @@ lines-->
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("mult".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Comment(
|
||||
Text::Unescaped("comment\non multiple\nlines".into()),
|
||||
DUMMY_SPAN
|
||||
|
@ -384,7 +406,9 @@ fn permits_mismatched_tags() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Close(Some("mismatch".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
|
|
@ -247,6 +247,9 @@ impl XmlWriter for Token {
|
|||
Ok(S::AttrFragmentAdjacent)
|
||||
}
|
||||
|
||||
// AttrEnd is ignored by the writer (and is optional).
|
||||
(Self::AttrEnd, x) => Ok(x),
|
||||
|
||||
// Unescaped not yet supported, but you could use CData.
|
||||
(
|
||||
Self::Text(Text::Escaped(text), _),
|
||||
|
@ -481,6 +484,17 @@ mod test {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
// AttrEnd does not even need to be in a semantically valid position; we
|
||||
// just ignore it entirely.
|
||||
#[test]
|
||||
fn ignores_attr_end() -> TestResult {
|
||||
let result = Token::AttrEnd.write_new(WriterState::NodeOpen)?;
|
||||
assert_eq!(result.0, b"");
|
||||
assert_eq!(result.1, WriterState::NodeOpen);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn writes_escaped_text() -> TestResult {
|
||||
// Just to be sure it's not trying to escape when we say it
|
||||
|
@ -563,6 +577,7 @@ mod test {
|
|||
Token::Open(root, *S),
|
||||
Token::AttrName(("an", "attr").try_into()?, *S),
|
||||
Token::AttrValue(AttrValue::Escaped("value".intern()), *S),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("text".intern()), *S),
|
||||
Token::Open(("c", "child").try_into()?, *S),
|
||||
Token::Whitespace(" ".try_into()?, *S),
|
||||
|
|
Loading…
Reference in New Issue