tamer: ir::xir::reader: CData parsing
quick_xml provides us the value escaped, so we can just handle this the same way as Text for now. In the future, we may want to distinguish between the two so that we can reconstruct an identical XML document, but at the moment CData isn't used at all in TAME sources or outputs, and so I'm not going to worry about it for now. DEV-10863main
parent
8b212959c8
commit
fdb8e5998c
|
@ -120,7 +120,11 @@ impl<B: BufRead> XmlXirReader<B> {
|
|||
self.refill_buf()
|
||||
}
|
||||
|
||||
QuickXmlEvent::Text(bytes) => {
|
||||
// quick_xml gives us escaped bytes for CData,
|
||||
// so handle them identically.
|
||||
// The question is whether we'll want to distinguish the two
|
||||
// in the future to reproduce the source document on write.
|
||||
QuickXmlEvent::Text(bytes) | QuickXmlEvent::CData(bytes) => {
|
||||
Some(bytes.intern_utf8().map_err(Error::from).and_then(
|
||||
|text| Ok(Token::Text(Text::Escaped(text), DUMMY_SPAN)),
|
||||
))
|
||||
|
|
|
@ -293,6 +293,43 @@ fn mixed_child_content_with_newlines() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn child_cdata() {
|
||||
let sut = Sut::new(r#"<cd><![CDATA[<foo />]]></cd>"#.as_bytes());
|
||||
|
||||
let result = sut.collect::<Result<Vec<_>>>();
|
||||
|
||||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("cd".unwrap_into(), DUMMY_SPAN),
|
||||
// Escaped by quick_xml.
|
||||
Token::Text(Text::Escaped("<foo />".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("cd".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mixed_child_text_and_cdata() {
|
||||
let sut = Sut::new(r#"<cd>foo<bar/><![CDATA[<baz/>]]></cd>"#.as_bytes());
|
||||
|
||||
let result = sut.collect::<Result<Vec<_>>>();
|
||||
|
||||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("cd".unwrap_into(), DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("foo".into()), DUMMY_SPAN),
|
||||
Token::Open("bar".unwrap_into(), DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
// Escaped by quick_xml.
|
||||
Token::Text(Text::Escaped("<baz/>".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("cd".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: Enough information for error recovery and reporting.
|
||||
#[test]
|
||||
fn node_name_invalid_utf8() {
|
||||
|
|
Loading…
Reference in New Issue