tamer: ir::xir::reader: Text and mixed content
It's nice being able to breeze through changes, since that's been a pretty rare thing so far, given all the foundational work that has been needed. This should get us pretty damn close to being able to parse the `xmlo` files for the reader linker, if we're not there already. DEV-10863main
parent
13a779ec9c
commit
8b212959c8
|
@ -22,7 +22,7 @@
|
|||
//! This uses [`quick_xml`] as the parser.
|
||||
|
||||
use super::{AttrValue, Error, Token};
|
||||
use crate::{span::DUMMY_SPAN, sym::GlobalSymbolInternBytes};
|
||||
use crate::{ir::xir::Text, span::DUMMY_SPAN, sym::GlobalSymbolInternBytes};
|
||||
use quick_xml::{
|
||||
self,
|
||||
events::{attributes::Attributes, BytesStart, Event as QuickXmlEvent},
|
||||
|
@ -120,6 +120,12 @@ impl<B: BufRead> XmlXirReader<B> {
|
|||
self.refill_buf()
|
||||
}
|
||||
|
||||
QuickXmlEvent::Text(bytes) => {
|
||||
Some(bytes.intern_utf8().map_err(Error::from).and_then(
|
||||
|text| Ok(Token::Text(Text::Escaped(text), DUMMY_SPAN)),
|
||||
))
|
||||
}
|
||||
|
||||
x => todo!("event: {:?}", x),
|
||||
},
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
use super::*;
|
||||
use crate::{
|
||||
convert::ExpectInto,
|
||||
ir::xir::{AttrValue, Token},
|
||||
ir::xir::{AttrValue, Text, Token},
|
||||
span::DUMMY_SPAN,
|
||||
};
|
||||
|
||||
|
@ -227,6 +227,72 @@ fn child_node_with_attrs() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn child_text() {
|
||||
let sut = Sut::new(r#"<text>foo bar</text>"#.as_bytes());
|
||||
|
||||
let result = sut.collect::<Result<Vec<_>>>();
|
||||
|
||||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("foo bar".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mixed_child_content() {
|
||||
let sut = Sut::new(r#"<text>foo<em>bar</em></text>"#.as_bytes());
|
||||
|
||||
let result = sut.collect::<Result<Vec<_>>>();
|
||||
|
||||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("foo".into()), DUMMY_SPAN),
|
||||
Token::Open("em".unwrap_into(), DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("bar".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("em".unwrap_into()), DUMMY_SPAN),
|
||||
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// This is how XML is typically written; people don't perceive it as mixed,
|
||||
// even though it is. This intentionally adds newlines before and after the
|
||||
// opening and closing tags of the root node.
|
||||
#[test]
|
||||
fn mixed_child_content_with_newlines() {
|
||||
let sut = Sut::new(
|
||||
r#"
|
||||
<root>
|
||||
<child />
|
||||
</root>
|
||||
"#
|
||||
.as_bytes(),
|
||||
);
|
||||
|
||||
let result = sut.collect::<Result<Vec<_>>>();
|
||||
|
||||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("\n ".into()), DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: Enough information for error recovery and reporting.
|
||||
#[test]
|
||||
fn node_name_invalid_utf8() {
|
||||
|
|
Loading…
Reference in New Issue