tamer: ir::xir::reader: Comment parsing

Comments re-use Text, but they are _not_ escaped, so we need to take care
with the type to ensure that, if the value were ever used with a
Token::Text, that we don't end up injecting XML.
main
Mike Gerwitz 2021-10-21 22:04:45 -04:00
parent fdb8e5998c
commit d72ab3675c
2 changed files with 50 additions and 0 deletions

View File

@ -130,6 +130,13 @@ impl<B: BufRead> XmlXirReader<B> {
))
}
// Comments are _not_ returned escaped.
QuickXmlEvent::Comment(bytes) => Some(
bytes.intern_utf8().map_err(Error::from).and_then(|text| {
Ok(Token::Comment(Text::Unescaped(text), DUMMY_SPAN))
}),
),
x => todo!("event: {:?}", x),
},
}

View File

@ -330,6 +330,49 @@ fn mixed_child_text_and_cdata() {
);
}
#[test]
fn comment() {
let sut = Sut::new(r#"<!--root--><root><!--<child>--></root>"#.as_bytes());
let result = sut.collect::<Result<Vec<_>>>();
assert_eq!(
result.expect("parsing failed"),
vec![
Token::Comment(Text::Unescaped("root".into()), DUMMY_SPAN),
Token::Open("root".unwrap_into(), DUMMY_SPAN),
Token::Comment(Text::Unescaped("<child>".into()), DUMMY_SPAN),
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
],
);
}
#[test]
fn comment_multiline() {
let sut = Sut::new(
r#"<mult><!--comment
on multiple
lines-->
</mult>"#
.as_bytes(),
);
let result = sut.collect::<Result<Vec<_>>>();
assert_eq!(
result.expect("parsing failed"),
vec![
Token::Open("mult".unwrap_into(), DUMMY_SPAN),
Token::Comment(
Text::Unescaped("comment\non multiple\nlines".into()),
DUMMY_SPAN
),
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
Token::Close(Some("mult".unwrap_into()), DUMMY_SPAN),
],
);
}
// TODO: Enough information for error recovery and reporting.
#[test]
fn node_name_invalid_utf8() {