tamer: xir::reader: Divorce from `parse`

The reader previously yielded a `ParsedResult`, presumably to simplify
lowering operations.  But the reader is not a `ParseState`, and does not
otherwise use the parsing API, so this was an inappropriate and confusing
coupling.

This resolves that, introducing a new `lowerable` which will translate an
iterator into something that can be placed in a lowering pipeline.

See the previous commit for more information.

DEV-13708
main
Mike Gerwitz 2023-02-21 15:10:13 -05:00
parent 963688f889
commit 29178f2360
6 changed files with 151 additions and 154 deletions

View File

@ -49,7 +49,9 @@ use tamer::{
InterpError, InterpolateNir, Nir, NirToAir, NirToAirError, XirfToNir, InterpError, InterpolateNir, Nir, NirToAir, NirToAirError, XirfToNir,
XirfToNirError, XirfToNirError,
}, },
parse::{FinalizeError, Lower, ParseError, ParsedObject, UnknownToken}, parse::{
lowerable, FinalizeError, Lower, ParseError, ParsedObject, UnknownToken,
},
xir::{ xir::{
self, self,
flat::{RefinedText, XirToXirf, XirToXirfError, XirfToken}, flat::{RefinedText, XirToXirf, XirToXirfError, XirfToken},
@ -92,15 +94,13 @@ fn src_reader<'a>(
fn copy_xml_to<'e, W: io::Write + 'e>( fn copy_xml_to<'e, W: io::Write + 'e>(
mut fout: W, mut fout: W,
escaper: &'e DefaultEscaper, escaper: &'e DefaultEscaper,
) -> impl FnMut( ) -> impl FnMut(&Result<XirToken, XirError>) + 'e {
&tamer::parse::ParsedResult<ParsedObject<UnknownToken, XirToken, XirError>>, use tamer::xir::writer::XmlWriter;
) + 'e {
use tamer::{parse::Parsed, xir::writer::XmlWriter};
let mut xmlwriter = Default::default(); let mut xmlwriter = Default::default();
move |tok_result| match tok_result { move |tok_result| match tok_result {
Ok(Parsed::Object(tok)) => { Ok(tok) => {
xmlwriter = tok.write(&mut fout, xmlwriter, escaper).unwrap(); xmlwriter = tok.write(&mut fout, xmlwriter, escaper).unwrap();
} }
_ => (), _ => (),
@ -140,18 +140,17 @@ fn compile<R: Reporter>(
// TODO: We're just echoing back out XIR, // TODO: We're just echoing back out XIR,
// which will be the same sans some formatting. // which will be the same sans some formatting.
let src = &mut src_reader(src_path, &escaper)? let src = &mut lowerable(src_reader(src_path, &escaper)?.inspect({
.inspect({ #[cfg(not(feature = "wip-asg-derived-xmli"))]
#[cfg(not(feature = "wip-asg-derived-xmli"))] {
{ copy_xml_to(fout, &escaper)
copy_xml_to(fout, &escaper) }
} #[cfg(feature = "wip-asg-derived-xmli")]
#[cfg(feature = "wip-asg-derived-xmli")] {
{ |_| ()
|_| () }
} }))
}) .map(|result| result.map_err(RecoverableError::from));
.map(|result| result.map_err(RecoverableError::from));
// TODO: Determine a good default capacity once we have this populated // TODO: Determine a good default capacity once we have this populated
// and can come up with some heuristics. // and can come up with some heuristics.

View File

@ -41,7 +41,8 @@ use crate::{
XmloToken, XmloToken,
}, },
parse::{ parse::{
FinalizeError, Lower, ParseError, Parsed, ParsedObject, UnknownToken, lowerable, FinalizeError, Lower, ParseError, Parsed, ParsedObject,
UnknownToken,
}, },
sym::{GlobalSymbolResolve, SymbolId}, sym::{GlobalSymbolResolve, SymbolId},
xir::{ xir::{
@ -106,7 +107,7 @@ fn load_xmlo<P: AsRef<Path>, S: Escaper>(
VisitOnceFile::Visited => return Ok((asg, state)), VisitOnceFile::Visited => return Ok((asg, state)),
}; };
let src = &mut XmlXirReader::new(file, escaper, ctx) let src = &mut lowerable(XmlXirReader::new(file, escaper, ctx))
.map(|result| result.map_err(TameldError::from)); .map(|result| result.map_err(TameldError::from));
// TODO: This entire block is a WIP and will be incrementally // TODO: This entire block is a WIP and will be incrementally

View File

@ -30,7 +30,7 @@ mod trace;
pub mod util; pub mod util;
pub use error::{FinalizeError, ParseError}; pub use error::{FinalizeError, ParseError};
pub use lower::{Lower, LowerIter, ParsedObject}; pub use lower::{lowerable, Lower, LowerIter, ParsedObject};
pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser}; pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser};
pub use state::{ pub use state::{
context::{Context, Empty as EmptyContext, NoContext}, context::{Context, Empty as EmptyContext, NoContext},

View File

@ -21,7 +21,8 @@
use super::{ use super::{
state::ClosedParseState, FinalizeError, FinalizedParser, NoContext, Object, state::ClosedParseState, FinalizeError, FinalizedParser, NoContext, Object,
ParseError, ParseState, Parsed, Parser, Token, TransitionResult, ParseError, ParseState, Parsed, ParsedResult, Parser, Token,
TransitionResult,
}; };
use crate::diagnose::Diagnostic; use crate::diagnose::Diagnostic;
use std::{fmt::Display, iter, marker::PhantomData}; use std::{fmt::Display, iter, marker::PhantomData};
@ -248,6 +249,22 @@ pub trait WidenedError<S: ParseState, LS: ParseState> = Diagnostic
pub type WidenedParsedResult<S, E> = pub type WidenedParsedResult<S, E> =
Result<Parsed<<S as ParseState>::Object>, E>; Result<Parsed<<S as ParseState>::Object>, E>;
/// Make the provided [`Iterator`] `iter` usable in a `Lower` pipeline.
///
/// This will produce an iterator that shares the same output as a
/// [`Parser`],
/// so that it may participate in a lowering pipeline even if it is not
/// itself a [`ParseState`].
///
/// See [`ParsedObject`] for more information.
pub fn lowerable<T: Token, O: Object, E: Diagnostic + PartialEq>(
iter: impl Iterator<Item = Result<O, E>>,
) -> impl Iterator<Item = ParsedResult<ParsedObject<T, O, E>>> {
iter.map(|result| {
result.map(Parsed::Object).map_err(ParseError::StateError)
})
}
/// Representation of a [`ParseState`] producing some type of [`Object`]. /// Representation of a [`ParseState`] producing some type of [`Object`].
/// ///
/// This is intended to be used not as a value, /// This is intended to be used not as a value,

View File

@ -26,7 +26,6 @@ use super::{
Token, Token,
}; };
use crate::{ use crate::{
parse::{ParseError, Parsed, ParsedObject, ParsedResult, UnknownToken},
span::Context, span::Context,
sym::{st::raw::WS_EMPTY, GlobalSymbolInternBytes}, sym::{st::raw::WS_EMPTY, GlobalSymbolInternBytes},
}; };
@ -37,9 +36,7 @@ use quick_xml::{
}, },
Error as QuickXmlError, Error as QuickXmlError,
}; };
use std::{borrow::Cow, collections::VecDeque, io::BufRead, result}; use std::{borrow::Cow, collections::VecDeque, io::BufRead};
pub type Result<T> = result::Result<T, Error>;
/// Parse XML into a XIR [`Token`] stream. /// Parse XML into a XIR [`Token`] stream.
/// ///
@ -110,7 +107,7 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
/// ///
/// This is intended to be invoked once the buffer has been depleted by /// This is intended to be invoked once the buffer has been depleted by
/// [`XmlXirReader::next`]. /// [`XmlXirReader::next`].
pub fn refill_buf(&mut self) -> Option<Result<Token>> { pub fn refill_buf(&mut self) -> Option<Result<Token, Error>> {
// Clear any previous buffer to free unneeded data. // Clear any previous buffer to free unneeded data.
self.tokbuf.clear(); self.tokbuf.clear();
self.readbuf.clear(); self.readbuf.clear();
@ -263,7 +260,11 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
/// people unfamiliar with the system do not have expectations that /// people unfamiliar with the system do not have expectations that
/// are going to be unmet, /// are going to be unmet,
/// which may result in subtle (or even serious) problems. /// which may result in subtle (or even serious) problems.
fn validate_decl(decl: &BytesDecl, pos: usize, ctx: Context) -> Result<()> { fn validate_decl(
decl: &BytesDecl,
pos: usize,
ctx: Context,
) -> Result<(), Error> {
// Starts after `<?`, which we want to include. // Starts after `<?`, which we want to include.
let decl_ptr = decl.as_ptr() as usize - 2 + pos; let decl_ptr = decl.as_ptr() as usize - 2 + pos;
@ -320,7 +321,7 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
pos: usize, pos: usize,
ctx: Context, ctx: Context,
empty_tag: bool, empty_tag: bool,
) -> Result<Token> { ) -> Result<Token, Error> {
// Starts after the opening tag `<`, so adjust. // Starts after the opening tag `<`, so adjust.
let addr = ele.as_ptr() as usize - 1; let addr = ele.as_ptr() as usize - 1;
let len = ele.name().len(); let len = ele.name().len();
@ -453,7 +454,7 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
ele_ptr: usize, ele_ptr: usize,
ele_pos: usize, ele_pos: usize,
ctx: Context, ctx: Context,
) -> Result<bool> { ) -> Result<bool, Error> {
let mut found = false; let mut found = false;
// Disable checks to allow duplicate attributes; // Disable checks to allow duplicate attributes;
@ -559,21 +560,14 @@ where
B: BufRead, B: BufRead,
S: Escaper, S: Escaper,
{ {
type Item = ParsedResult<ParsedObject<UnknownToken, Token, Error>>; type Item = Result<Token, Error>;
/// Produce the next XIR [`Token`] from the input. /// Produce the next XIR [`Token`] from the input.
/// ///
/// For more information on how this reader operates, /// For more information on how this reader operates,
/// see [`XmlXirReader`]. /// see [`XmlXirReader`].
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.tokbuf self.tokbuf.pop_back().map(Ok).or_else(|| self.refill_buf())
.pop_back()
.map(|tok| Ok(Parsed::Object(tok)))
.or_else(|| {
self.refill_buf().map(|result| {
result.map(Parsed::Object).map_err(ParseError::StateError)
})
})
} }
} }

View File

@ -18,15 +18,10 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>. // along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::*; use super::*;
use crate::parse::UnknownToken;
use crate::sym::GlobalSymbolIntern; use crate::sym::GlobalSymbolIntern;
use crate::xir::{CloseSpan, OpenSpan}; use crate::xir::{CloseSpan, OpenSpan};
use crate::{ use crate::{
convert::ExpectInto, convert::ExpectInto,
parse::{
ParseError::StateError as PE,
Parsed::{self, Object as O},
},
span::dummy::DUMMY_CONTEXT as DC, span::dummy::DUMMY_CONTEXT as DC,
xir::{Error, Token}, xir::{Error, Token},
}; };
@ -49,8 +44,7 @@ use std::borrow::Cow;
/// redundant checks. /// redundant checks.
type Sut<'a, B, S> = XmlXirReader<'a, B, S>; type Sut<'a, B, S> = XmlXirReader<'a, B, S>;
type SutResultCollect = type SutResultCollect = Result<Vec<Token>, Error>;
result::Result<Vec<Parsed<Token>>, ParseError<UnknownToken, Error>>;
#[derive(Debug, Default)] #[derive(Debug, Default)]
struct MockEscaper {} struct MockEscaper {}
@ -61,9 +55,7 @@ impl Escaper for MockEscaper {
unreachable!("Reader should not be escaping!") unreachable!("Reader should not be escaping!")
} }
fn unescape_bytes( fn unescape_bytes(value: &[u8]) -> Result<Cow<[u8]>, SpanlessError> {
value: &[u8],
) -> result::Result<Cow<[u8]>, SpanlessError> {
let mut unesc = value.to_owned(); let mut unesc = value.to_owned();
unesc.extend_from_slice(b":UNESC"); unesc.extend_from_slice(b":UNESC");
@ -106,8 +98,8 @@ fn empty_node_without_prefix_or_attributes_or_whitespace() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("empty-node".unwrap_into(), OpenSpan(a, 10))), Token::Open("empty-node".unwrap_into(), OpenSpan(a, 10)),
O(Token::Close(None, CloseSpan(b, 0))), Token::Close(None, CloseSpan(b, 0)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -128,8 +120,8 @@ fn empty_node_without_prefix_or_attributes() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("empty-node".unwrap_into(), OpenSpan(a, 10))), Token::Open("empty-node".unwrap_into(), OpenSpan(a, 10)),
O(Token::Close(None, CloseSpan(b, 0))), Token::Close(None, CloseSpan(b, 0)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -150,11 +142,11 @@ fn does_not_resolve_xmlns() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("no-ns".unwrap_into(), OpenSpan(a, 5))), Token::Open("no-ns".unwrap_into(), OpenSpan(a, 5)),
// Since we didn't parse @xmlns, it's still an attribute. // Since we didn't parse @xmlns, it's still an attribute.
O(Token::AttrName("xmlns".unwrap_into(), b)), Token::AttrName("xmlns".unwrap_into(), b),
O(Token::AttrValue("noresolve:UNESC".intern(), c)), Token::AttrValue("noresolve:UNESC".intern(), c),
O(Token::Close(None, CloseSpan(d, 0))), Token::Close(None, CloseSpan(d, 0)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -176,13 +168,10 @@ fn empty_node_with_prefix_without_attributes_unresolved() {
// Should be the QName, _unresolved_. // Should be the QName, _unresolved_.
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open( Token::Open(("x", "empty-node").unwrap_into(), OpenSpan(a, 12)),
("x", "empty-node").unwrap_into(), Token::AttrName(("xmlns", "x").unwrap_into(), b),
OpenSpan(a, 12) Token::AttrValue("noresolve:UNESC".intern(), c),
)), Token::Close(None, CloseSpan(d, 0)),
O(Token::AttrName(("xmlns", "x").unwrap_into(), b)),
O(Token::AttrValue("noresolve:UNESC".intern(), c)),
O(Token::Close(None, CloseSpan(d, 0))),
]), ]),
sut.collect(), sut.collect(),
); );
@ -204,7 +193,7 @@ fn prefix_with_empty_local_name_invalid_qname() {
match result { match result {
Ok(_) => panic!("expected failure"), Ok(_) => panic!("expected failure"),
Err(given) => { Err(given) => {
assert_eq!(PE(Error::InvalidQName("x:".into(), a)), given); assert_eq!(Error::InvalidQName("x:".into(), a), given);
} }
} }
} }
@ -228,14 +217,14 @@ fn multiple_attrs_ordered() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("ele".unwrap_into(), OpenSpan(a, 3))), Token::Open("ele".unwrap_into(), OpenSpan(a, 3)),
O(Token::AttrName("foo".unwrap_into(), b)), Token::AttrName("foo".unwrap_into(), b),
O(Token::AttrValue("a:UNESC".intern(), c)), Token::AttrValue("a:UNESC".intern(), c),
O(Token::AttrName("bar".unwrap_into(), d)), Token::AttrName("bar".unwrap_into(), d),
O(Token::AttrValue("b:UNESC".intern(), e)), Token::AttrValue("b:UNESC".intern(), e),
O(Token::AttrName(("b", "baz").unwrap_into(), f)), Token::AttrName(("b", "baz").unwrap_into(), f),
O(Token::AttrValue("c:UNESC".intern(), g)), Token::AttrValue("c:UNESC".intern(), g),
O(Token::Close(None, CloseSpan(h, 0))), Token::Close(None, CloseSpan(h, 0)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -258,10 +247,10 @@ fn empty_attr_value() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("ele".unwrap_into(), OpenSpan(a, 3))), Token::Open("ele".unwrap_into(), OpenSpan(a, 3)),
O(Token::AttrName("empty".unwrap_into(), b)), Token::AttrName("empty".unwrap_into(), b),
O(Token::AttrValue(":UNESC".intern(), c)), Token::AttrValue(":UNESC".intern(), c),
O(Token::Close(None, CloseSpan(d, 0))), Token::Close(None, CloseSpan(d, 0)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -286,12 +275,12 @@ fn permits_duplicate_attrs() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("dup".unwrap_into(), OpenSpan(a, 3))), Token::Open("dup".unwrap_into(), OpenSpan(a, 3)),
O(Token::AttrName("attr".unwrap_into(), b)), Token::AttrName("attr".unwrap_into(), b),
O(Token::AttrValue("a:UNESC".intern(), c)), Token::AttrValue("a:UNESC".intern(), c),
O(Token::AttrName("attr".unwrap_into(), d)), Token::AttrName("attr".unwrap_into(), d),
O(Token::AttrValue("b:UNESC".intern(), e)), Token::AttrValue("b:UNESC".intern(), e),
O(Token::Close(None, CloseSpan(f, 0))), Token::Close(None, CloseSpan(f, 0)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -312,8 +301,8 @@ fn open_close_no_child() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("nochild".unwrap_into(), OpenSpan(a, 7))), Token::Open("nochild".unwrap_into(), OpenSpan(a, 7)),
O(Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7))), Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -333,8 +322,8 @@ fn open_close_no_child_open_tag_whitespace() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("nochild".unwrap_into(), OpenSpan(a, 7))), Token::Open("nochild".unwrap_into(), OpenSpan(a, 7)),
O(Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7))), Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -354,8 +343,8 @@ fn open_close_no_child_close_tag_whitespace() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("nochild".unwrap_into(), OpenSpan(a, 7))), Token::Open("nochild".unwrap_into(), OpenSpan(a, 7)),
O(Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7))), Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -378,10 +367,10 @@ fn child_node_self_closing() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))), Token::Open("root".unwrap_into(), OpenSpan(a, 4)),
O(Token::Open("child".unwrap_into(), OpenSpan(b, 5))), Token::Open("child".unwrap_into(), OpenSpan(b, 5)),
O(Token::Close(None, CloseSpan(c, 0))), Token::Close(None, CloseSpan(c, 0)),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(d, 4))), Token::Close(Some("root".unwrap_into()), CloseSpan(d, 4)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -403,12 +392,12 @@ fn sibling_nodes() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))), Token::Open("root".unwrap_into(), OpenSpan(a, 4)),
O(Token::Open("child".unwrap_into(), OpenSpan(b, 5))), Token::Open("child".unwrap_into(), OpenSpan(b, 5)),
O(Token::Close(None, CloseSpan(c, 0))), Token::Close(None, CloseSpan(c, 0)),
O(Token::Open("child".unwrap_into(), OpenSpan(d, 5))), Token::Open("child".unwrap_into(), OpenSpan(d, 5)),
O(Token::Close(None, CloseSpan(e, 0))), Token::Close(None, CloseSpan(e, 0)),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(f, 4))), Token::Close(Some("root".unwrap_into()), CloseSpan(f, 4)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -430,12 +419,12 @@ fn child_node_with_attrs() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))), Token::Open("root".unwrap_into(), OpenSpan(a, 4)),
O(Token::Open("child".unwrap_into(), OpenSpan(b, 5))), Token::Open("child".unwrap_into(), OpenSpan(b, 5)),
O(Token::AttrName("foo".unwrap_into(), c)), Token::AttrName("foo".unwrap_into(), c),
O(Token::AttrValue("bar:UNESC".intern(), d)), Token::AttrValue("bar:UNESC".intern(), d),
O(Token::Close(None, CloseSpan(e, 0))), Token::Close(None, CloseSpan(e, 0)),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(f, 4))), Token::Close(Some("root".unwrap_into()), CloseSpan(f, 4)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -454,9 +443,9 @@ fn child_text() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("text".unwrap_into(), OpenSpan(a, 4))), Token::Open("text".unwrap_into(), OpenSpan(a, 4)),
O(Token::Text("foo bar:UNESC".into(), b)), Token::Text("foo bar:UNESC".into(), b),
O(Token::Close(Some("text".unwrap_into()), CloseSpan(c, 4))), Token::Close(Some("text".unwrap_into()), CloseSpan(c, 4)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -478,12 +467,12 @@ fn mixed_child_content() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("text".unwrap_into(), OpenSpan(a, 4))), Token::Open("text".unwrap_into(), OpenSpan(a, 4)),
O(Token::Text("foo:UNESC".into(), b)), Token::Text("foo:UNESC".into(), b),
O(Token::Open("em".unwrap_into(), OpenSpan(c, 2))), Token::Open("em".unwrap_into(), OpenSpan(c, 2)),
O(Token::Text("bar:UNESC".into(), d)), Token::Text("bar:UNESC".into(), d),
O(Token::Close(Some("em".unwrap_into()), CloseSpan(e, 2))), Token::Close(Some("em".unwrap_into()), CloseSpan(e, 2)),
O(Token::Close(Some("text".unwrap_into()), CloseSpan(f, 4))), Token::Close(Some("text".unwrap_into()), CloseSpan(f, 4)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -518,14 +507,14 @@ fn mixed_child_content_with_newlines() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Text("\n:UNESC".into(), a)), Token::Text("\n:UNESC".into(), a),
O(Token::Open("root".unwrap_into(), OpenSpan(b, 4))), Token::Open("root".unwrap_into(), OpenSpan(b, 4)),
O(Token::Text("\n :UNESC".into(), c)), Token::Text("\n :UNESC".into(), c),
O(Token::Open("child".unwrap_into(), OpenSpan(d, 5))), Token::Open("child".unwrap_into(), OpenSpan(d, 5)),
O(Token::Close(None, CloseSpan(e, 0))), Token::Close(None, CloseSpan(e, 0)),
O(Token::Text("\n:UNESC".into(), f)), Token::Text("\n:UNESC".into(), f),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(g, 4))), Token::Close(Some("root".unwrap_into()), CloseSpan(g, 4)),
O(Token::Text("\n:UNESC".into(), h)), Token::Text("\n:UNESC".into(), h),
]), ]),
sut.collect(), sut.collect(),
); );
@ -545,10 +534,10 @@ fn comment() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Comment("root".into(), a)), Token::Comment("root".into(), a),
O(Token::Open("root".unwrap_into(), OpenSpan(b, 4))), Token::Open("root".unwrap_into(), OpenSpan(b, 4)),
O(Token::Comment("<child>".into(), c)), Token::Comment("<child>".into(), c),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(d, 4))), Token::Close(Some("root".unwrap_into()), CloseSpan(d, 4)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -574,10 +563,10 @@ lines-->
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("mult".unwrap_into(), OpenSpan(a, 4))), Token::Open("mult".unwrap_into(), OpenSpan(a, 4)),
O(Token::Comment("comment\non multiple\nlines".into(), b)), Token::Comment("comment\non multiple\nlines".into(), b),
O(Token::Text("\n:UNESC".into(), c)), Token::Text("\n:UNESC".into(), c),
O(Token::Close(Some("mult".unwrap_into()), CloseSpan(d, 4))), Token::Close(Some("mult".unwrap_into()), CloseSpan(d, 4)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -598,13 +587,10 @@ fn permits_mismatched_tags() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))), Token::Open("root".unwrap_into(), OpenSpan(a, 4)),
O(Token::Open("child".unwrap_into(), OpenSpan(b, 5))), Token::Open("child".unwrap_into(), OpenSpan(b, 5)),
O(Token::Close(None, CloseSpan(c, 0))), Token::Close(None, CloseSpan(c, 0)),
O(Token::Close( Token::Close(Some("mismatch".unwrap_into()), CloseSpan(d, 8)),
Some("mismatch".unwrap_into()),
CloseSpan(d, 8)
)),
]), ]),
sut.collect(), sut.collect(),
); );
@ -622,7 +608,7 @@ fn node_name_invalid_utf8() {
match result { match result {
Ok(_) => panic!("expected failure"), Ok(_) => panic!("expected failure"),
Err(PE(Error::InvalidUtf8(_, bytes, given_span))) => { Err(Error::InvalidUtf8(_, bytes, given_span)) => {
assert_eq!(bytes, &[INVALID_UTF8_BYTE]); assert_eq!(bytes, &[INVALID_UTF8_BYTE]);
assert_eq!(span, given_span); assert_eq!(span, given_span);
} }
@ -644,7 +630,7 @@ fn attr_name_invalid_utf8() {
match result { match result {
Ok(_) => panic!("expected failure"), Ok(_) => panic!("expected failure"),
Err(PE(Error::InvalidUtf8(_, bytes, given_span))) => { Err(Error::InvalidUtf8(_, bytes, given_span)) => {
assert_eq!(bytes, &[INVALID_UTF8_BYTE]); assert_eq!(bytes, &[INVALID_UTF8_BYTE]);
assert_eq!(span, given_span); assert_eq!(span, given_span);
} }
@ -666,7 +652,7 @@ fn attr_value_invalid_utf8() {
match result { match result {
Ok(_) => panic!("expected failure"), Ok(_) => panic!("expected failure"),
Err(PE(Error::InvalidUtf8(_, bytes, given_span))) => { Err(Error::InvalidUtf8(_, bytes, given_span)) => {
// Doesn't make it to the Escaper. // Doesn't make it to the Escaper.
assert_eq!(bytes, &[b'b', b'a', b'd', INVALID_UTF8_BYTE]); assert_eq!(bytes, &[b'b', b'a', b'd', INVALID_UTF8_BYTE]);
assert_eq!(span, given_span); assert_eq!(span, given_span);
@ -689,8 +675,8 @@ fn valid_xml_decl_no_encoding() {
assert_eq!( assert_eq!(
Ok(vec![ Ok(vec![
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))), Token::Open("root".unwrap_into(), OpenSpan(a, 4)),
O(Token::Close(None, CloseSpan(b, 0))), Token::Close(None, CloseSpan(b, 0)),
]), ]),
sut.collect() sut.collect()
); );
@ -721,7 +707,7 @@ fn invalid_xml_decl_version() {
let span = DC.span(15, 3); let span = DC.span(15, 3);
assert_eq!( assert_eq!(
Err(PE(Error::UnsupportedXmlVersion("1.1".intern(), span))), Err(Error::UnsupportedXmlVersion("1.1".intern(), span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -736,7 +722,7 @@ fn invalid_xml_encoding() {
let span = DC.span(30, 7); let span = DC.span(30, 7);
assert_eq!( assert_eq!(
Err(PE(Error::UnsupportedEncoding("latin-1".intern(), span))), Err(Error::UnsupportedEncoding("latin-1".intern(), span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -774,7 +760,7 @@ fn attr_single_no_value_no_eq() {
let span = DC.span(10, 0); let span = DC.span(10, 0);
assert_eq!( assert_eq!(
Err(PE(Error::AttrValueExpected(None, span))), Err(Error::AttrValueExpected(None, span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -789,7 +775,7 @@ fn attr_single_no_value_with_eq() {
let span = DC.span(11, 0); let span = DC.span(11, 0);
assert_eq!( assert_eq!(
Err(PE(Error::AttrValueExpected(None, span))), Err(Error::AttrValueExpected(None, span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -805,7 +791,7 @@ fn attr_multi_no_value_no_eq() {
assert_eq!( assert_eq!(
// quick-xml doesn't provide the name // quick-xml doesn't provide the name
Err(PE(Error::AttrValueExpected(None, span))), Err(Error::AttrValueExpected(None, span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -822,7 +808,7 @@ fn attr_multi_no_value_with_eq() {
let span = DC.span(11, 0); let span = DC.span(11, 0);
assert_eq!( assert_eq!(
Err(PE(Error::AttrValueUnquoted(None, span))), Err(Error::AttrValueUnquoted(None, span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -838,7 +824,7 @@ fn attr_multiple_no_value_no_eq_then_good() {
assert_eq!( assert_eq!(
// quick-xml doesn't provide the name // quick-xml doesn't provide the name
Err(PE(Error::AttrValueExpected(None, span))), Err(Error::AttrValueExpected(None, span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -853,7 +839,7 @@ fn empty_element_qname_no_attrs() {
let span = DC.span(1, 0); let span = DC.span(1, 0);
assert_eq!( assert_eq!(
Err(PE(Error::InvalidQName("".intern(), span))), Err(Error::InvalidQName("".intern(), span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -868,7 +854,7 @@ fn empty_element_qname_with_space_no_attrs() {
let span = DC.span(1, 0); let span = DC.span(1, 0);
assert_eq!( assert_eq!(
Err(PE(Error::InvalidQName("".intern(), span))), Err(Error::InvalidQName("".intern(), span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -882,7 +868,7 @@ fn empty_element_qname_with_attr() {
let span = DC.span(1, 9); let span = DC.span(1, 9);
assert_eq!( assert_eq!(
Err(PE(Error::InvalidQName("foo=\"bar\"".intern(), span))), Err(Error::InvalidQName("foo=\"bar\"".intern(), span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -897,7 +883,7 @@ fn empty_element_qname_with_space_with_attr() {
let span = DC.span(1, 0); let span = DC.span(1, 0);
assert_eq!( assert_eq!(
Err(PE(Error::InvalidQName("".intern(), span))), Err(Error::InvalidQName("".intern(), span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }
@ -915,7 +901,7 @@ fn space_before_element_name() {
let span = DC.span(1, 0); let span = DC.span(1, 0);
assert_eq!( assert_eq!(
Err(PE(Error::InvalidQName("".intern(), span))), Err(Error::InvalidQName("".intern(), span)),
sut.collect::<SutResultCollect>() sut.collect::<SutResultCollect>()
); );
} }