tamer: xir: Remove Text enum
Like previous commits, this replaces the explicit escaping context with the convention that all values retrieved from `xir` are unescaped on read and escaped on write. Comments are a notable TODO, since we must escape only `--`. CData is also an issue. I had _expected_ to use it as a means to avoid unescaping fragments, but I had forgotten that quick_xml hard-codes escaping on read, so that it can re-use BytesStart! That is terribly unfortunate, and may result in us having to re-implement our own read method in the future to avoid this nonsense. So I'm just leaving it as a TODO for now. DEV-11081main
parent
8723ca154d
commit
5233822322
|
@ -138,7 +138,7 @@ mod writer {
|
|||
Writer as QuickXmlWriter,
|
||||
};
|
||||
use std::borrow::Cow;
|
||||
use tamer::xir::{writer::XmlWriter, Text};
|
||||
use tamer::xir::{writer::XmlWriter, Escaper};
|
||||
use tamer::{span::Span, xir::DefaultEscaper};
|
||||
|
||||
const FRAGMENT: &str = r#"<fragment>
|
||||
|
@ -205,6 +205,12 @@ This is pretend fragment text. We need a lot of it.</fragment>
|
|||
let val1 = "value".intern();
|
||||
let val2 = "value2".intern();
|
||||
|
||||
// Prime the cache, since BytesStart is already assumed to be
|
||||
// escaped. We will have cached on read in a real-world scenario.
|
||||
let escaper = DefaultEscaper::default();
|
||||
escaper.escape(val1);
|
||||
escaper.escape(val2);
|
||||
|
||||
bench.iter(|| {
|
||||
(0..1000).for_each(|_| {
|
||||
vec![
|
||||
|
@ -216,7 +222,7 @@ This is pretend fragment text. We need a lot of it.</fragment>
|
|||
Token::Close(None, span),
|
||||
]
|
||||
.into_iter()
|
||||
.write(&mut buf, Default::default(), &DefaultEscaper::default())
|
||||
.write(&mut buf, Default::default(), &escaper)
|
||||
.unwrap();
|
||||
});
|
||||
});
|
||||
|
@ -250,14 +256,15 @@ This is pretend fragment text. We need a lot of it.</fragment>
|
|||
let frag: SymbolId = FRAGMENT.intern();
|
||||
let span = Span::from_byte_interval((0, 0), "path".intern());
|
||||
|
||||
// Prime the cache, since BytesStart is already assumed to be
|
||||
// escaped.
|
||||
let escaper = DefaultEscaper::default();
|
||||
escaper.escape(frag);
|
||||
|
||||
bench.iter(|| {
|
||||
(0..50).for_each(|_| {
|
||||
Token::Text(Text::Escaped(frag), span)
|
||||
.write(
|
||||
&mut buf,
|
||||
Default::default(),
|
||||
&DefaultEscaper::default(),
|
||||
)
|
||||
Token::Text(frag, span)
|
||||
.write(&mut buf, Default::default(), &escaper)
|
||||
.unwrap();
|
||||
});
|
||||
});
|
||||
|
|
|
@ -35,7 +35,7 @@ use crate::{
|
|||
sym::{st::*, SymbolId},
|
||||
xir::{
|
||||
iter::{elem_wrap, ElemWrapIter},
|
||||
QName, Text, Token,
|
||||
QName, Token,
|
||||
},
|
||||
};
|
||||
use arrayvec::ArrayVec;
|
||||
|
@ -306,7 +306,7 @@ impl Iterator for FragmentIter {
|
|||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter
|
||||
.by_ref()
|
||||
.map(|frag| Token::Text(Text::Escaped(frag), LSPAN))
|
||||
.map(|frag| Token::Text(frag, LSPAN))
|
||||
.next()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -490,8 +490,8 @@ macro_rules! test_exec_sec {
|
|||
// Order _absolutely_ matters,
|
||||
// since the purpose of the linker is to put things into the correct
|
||||
// order for execution.
|
||||
assert_eq!(nodes[0].as_text(), Some(&Text::Escaped(frag_a)));
|
||||
assert_eq!(nodes[1].as_text(), Some(&Text::Escaped(frag_b)));
|
||||
assert_eq!(Some(frag_a), nodes[0].as_sym());
|
||||
assert_eq!(Some(frag_b), nodes[1].as_sym());
|
||||
|
||||
assert_eq!(nodes.len(), 2);
|
||||
|
||||
|
|
|
@ -297,10 +297,9 @@ impl TryFrom<&str> for Whitespace {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<Whitespace> for Text {
|
||||
impl From<Whitespace> for SymbolId {
|
||||
fn from(ws: Whitespace) -> Self {
|
||||
// Whitespace needs no escaping
|
||||
Self::Escaped(ws.0)
|
||||
ws.0
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -450,37 +449,6 @@ impl Display for QName {
|
|||
}
|
||||
}
|
||||
|
||||
/// Represents text and its escaped state.
|
||||
///
|
||||
/// Being explicit about the state of escaping allows us to skip checks when
|
||||
/// we know that the generated text could not possibly require escaping.
|
||||
/// This does, however, put the onus on the caller to ensure that they got
|
||||
/// the escaping status correct.
|
||||
/// (TODO: More information on why this burden isn"t all that bad,
|
||||
/// despite the risk.)
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Text {
|
||||
/// Text node that requires escaping.
|
||||
///
|
||||
/// Unescaped text requires further processing before writing.
|
||||
///
|
||||
/// Note that,
|
||||
/// since the unescaped text is interned,
|
||||
/// it may be wasteful to intern a large text node with the intent of
|
||||
/// escaping and re-interning later.
|
||||
/// Instead,
|
||||
/// if escaping is only needed for writing,
|
||||
/// it is likely better to leave it to the writer to escape,
|
||||
/// which does _not_ require interning of the resulting string.
|
||||
Unescaped(SymbolId),
|
||||
|
||||
/// Text node that either has already been escaped or is known not to
|
||||
/// require escaping.
|
||||
///
|
||||
/// Escaped text can be written as-is without any further processing.
|
||||
Escaped(SymbolId),
|
||||
}
|
||||
|
||||
/// Lightly-structured XML tokens with associated [`Span`]s.
|
||||
///
|
||||
/// This is a streamable IR for XML.
|
||||
|
@ -549,23 +517,21 @@ pub enum Token {
|
|||
AttrEnd,
|
||||
|
||||
/// Comment node.
|
||||
Comment(Text, Span),
|
||||
Comment(SymbolId, Span),
|
||||
|
||||
/// Character data as part of an element.
|
||||
///
|
||||
/// See also [`CData`](Token::CData) variant.
|
||||
Text(Text, Span),
|
||||
Text(SymbolId, Span),
|
||||
|
||||
/// CData node (`<![CDATA[...]]>`).
|
||||
///
|
||||
/// See also [`Text`](Token::Text) variant.
|
||||
///
|
||||
/// _Warning: It is up to the caller to ensure that the string `]]>` is
|
||||
/// not present in the text!_
|
||||
/// This is intended for reading existing XML data where CData is
|
||||
/// already present,
|
||||
/// not for producing new CData safely!
|
||||
CData(Text, Span),
|
||||
CData(SymbolId, Span),
|
||||
|
||||
/// Similar to `Text`,
|
||||
/// but intended for use where only whitespace is allowed,
|
||||
|
@ -704,10 +670,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn whitespace_as_text() -> TestResult {
|
||||
assert_eq!(
|
||||
Text::Escaped(" ".intern()),
|
||||
Whitespace::try_from(" ")?.into(),
|
||||
);
|
||||
assert_eq!(" ".intern(), Whitespace::try_from(" ")?.into(),);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
//! This uses [`quick_xml`] as the parser.
|
||||
|
||||
use super::{DefaultEscaper, Error, Escaper, Token};
|
||||
use crate::{span::DUMMY_SPAN, sym::GlobalSymbolInternBytes, xir::Text};
|
||||
use crate::{span::DUMMY_SPAN, sym::GlobalSymbolInternBytes};
|
||||
use quick_xml::{
|
||||
self,
|
||||
events::{attributes::Attributes, BytesStart, Event as QuickXmlEvent},
|
||||
|
@ -140,21 +140,26 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
|
|||
self.refill_buf()
|
||||
}
|
||||
|
||||
// quick_xml gives us escaped bytes for CData,
|
||||
// so handle them identically.
|
||||
// The question is whether we'll want to distinguish the two
|
||||
// in the future to reproduce the source document on write.
|
||||
QuickXmlEvent::Text(bytes) | QuickXmlEvent::CData(bytes) => {
|
||||
Some(bytes.intern_utf8().map_err(Error::from).and_then(
|
||||
|text| Ok(Token::Text(Text::Escaped(text), DUMMY_SPAN)),
|
||||
))
|
||||
}
|
||||
// quick_xml _escapes_ the unescaped CData before handing it
|
||||
// off to us,
|
||||
// which is a complete waste since we'd just have to
|
||||
// unescape it again.
|
||||
QuickXmlEvent::CData(bytes) => todo!("CData: {:?}", bytes),
|
||||
|
||||
QuickXmlEvent::Text(bytes) => Some(
|
||||
bytes
|
||||
.intern_utf8()
|
||||
.map_err(Error::from)
|
||||
.and_then(|sym| self.escaper.unescape(sym))
|
||||
.map(|unesc| Token::Text(unesc, DUMMY_SPAN)),
|
||||
),
|
||||
|
||||
// Comments are _not_ returned escaped.
|
||||
QuickXmlEvent::Comment(bytes) => Some(
|
||||
bytes.intern_utf8().map_err(Error::from).and_then(|text| {
|
||||
Ok(Token::Comment(Text::Unescaped(text), DUMMY_SPAN))
|
||||
}),
|
||||
bytes
|
||||
.intern_utf8()
|
||||
.map_err(Error::from)
|
||||
.map(|text| Token::Comment(text, DUMMY_SPAN)),
|
||||
),
|
||||
|
||||
x => todo!("event: {:?}", x),
|
||||
|
|
|
@ -24,7 +24,7 @@ use crate::sym::GlobalSymbolIntern;
|
|||
use crate::{
|
||||
convert::ExpectInto,
|
||||
span::DUMMY_SPAN,
|
||||
xir::{Error, Text, Token},
|
||||
xir::{Error, Token},
|
||||
};
|
||||
|
||||
/// These tests use [`quick_xml`] directly,
|
||||
|
@ -275,7 +275,7 @@ fn child_text() {
|
|||
vec![
|
||||
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("foo bar".into()), DUMMY_SPAN),
|
||||
Token::Text("foo bar:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -292,10 +292,10 @@ fn mixed_child_content() {
|
|||
vec![
|
||||
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("foo".into()), DUMMY_SPAN),
|
||||
Token::Text("foo:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Open("em".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("bar".into()), DUMMY_SPAN),
|
||||
Token::Text("bar:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("em".unwrap_into()), DUMMY_SPAN),
|
||||
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -320,56 +320,16 @@ fn mixed_child_content_with_newlines() {
|
|||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("\n ".into()), DUMMY_SPAN),
|
||||
Token::Text("\n :UNESC".into(), DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn child_cdata() {
|
||||
new_sut!(sut = r#"<cd><![CDATA[<foo />]]></cd>"#);
|
||||
|
||||
let result = sut.collect::<Result<Vec<_>>>();
|
||||
|
||||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("cd".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
// Escaped by quick_xml.
|
||||
Token::Text(Text::Escaped("<foo />".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("cd".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mixed_child_text_and_cdata() {
|
||||
new_sut!(sut = r#"<cd>foo<bar/><![CDATA[<baz/>]]></cd>"#);
|
||||
|
||||
let result = sut.collect::<Result<Vec<_>>>();
|
||||
|
||||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("cd".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("foo".into()), DUMMY_SPAN),
|
||||
Token::Open("bar".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
// Escaped by quick_xml.
|
||||
Token::Text(Text::Escaped("<baz/>".into()), DUMMY_SPAN),
|
||||
Token::Close(Some("cd".unwrap_into()), DUMMY_SPAN),
|
||||
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
@ -383,10 +343,10 @@ fn comment() {
|
|||
assert_eq!(
|
||||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Comment(Text::Unescaped("root".into()), DUMMY_SPAN),
|
||||
Token::Comment("root".into(), DUMMY_SPAN),
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Comment(Text::Unescaped("<child>".into()), DUMMY_SPAN),
|
||||
Token::Comment("<child>".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -408,11 +368,8 @@ lines-->
|
|||
vec![
|
||||
Token::Open("mult".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::Comment(
|
||||
Text::Unescaped("comment\non multiple\nlines".into()),
|
||||
DUMMY_SPAN
|
||||
),
|
||||
Token::Text(Text::Escaped("\n".into()), DUMMY_SPAN),
|
||||
Token::Comment("comment\non multiple\nlines".into(), DUMMY_SPAN),
|
||||
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("mult".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
|
|
@ -194,7 +194,7 @@
|
|||
//! For more information,
|
||||
//! see [`AttrParts`].
|
||||
|
||||
use super::{QName, Text, Token, TokenResultStream, TokenStream};
|
||||
use super::{QName, Token, TokenResultStream, TokenStream};
|
||||
use crate::{span::Span, sym::SymbolId};
|
||||
use std::{fmt::Display, iter, mem::take};
|
||||
|
||||
|
@ -224,7 +224,7 @@ pub enum Tree {
|
|||
///
|
||||
/// A text node cannot contain other [`Tree`] elements;
|
||||
/// sibling text nodes must exist within an [`Element`].
|
||||
Text(Text, Span),
|
||||
Text(SymbolId, Span),
|
||||
|
||||
/// This variant exists purely because `#[non_exhaustive]` has no effect
|
||||
/// within the crate.
|
||||
|
@ -246,9 +246,9 @@ impl Into<Option<Element>> for Tree {
|
|||
}
|
||||
}
|
||||
|
||||
impl Into<Option<Text>> for Tree {
|
||||
impl Into<Option<SymbolId>> for Tree {
|
||||
#[inline]
|
||||
fn into(self) -> Option<Text> {
|
||||
fn into(self) -> Option<SymbolId> {
|
||||
match self {
|
||||
Self::Text(text, _) => Some(text),
|
||||
_ => None,
|
||||
|
@ -280,22 +280,17 @@ impl Tree {
|
|||
matches!(self, Self::Element(_))
|
||||
}
|
||||
|
||||
/// Yield a reference to the inner value if it is a [`Text`],
|
||||
/// otherwise [`None`].
|
||||
/// Yield a string representation of the element,
|
||||
/// if applicable.
|
||||
///
|
||||
/// This is incomplete.
|
||||
#[inline]
|
||||
pub fn as_text<'a>(&'a self) -> Option<&'a Text> {
|
||||
pub fn as_sym(&self) -> Option<SymbolId> {
|
||||
match self {
|
||||
Self::Text(text, _) => Some(text),
|
||||
Self::Text(sym, ..) => Some(*sym),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Yield the inner value if it is a [`Text`],
|
||||
/// otherwise [`None`].
|
||||
#[inline]
|
||||
pub fn into_text(self) -> Option<Text> {
|
||||
self.into()
|
||||
}
|
||||
}
|
||||
|
||||
/// Element node.
|
||||
|
@ -729,7 +724,7 @@ impl Stack {
|
|||
/// Appends a text node as a child of an element.
|
||||
///
|
||||
/// This is valid only for a [`Stack::BuddingElement`].
|
||||
fn text(self, value: Text, span: Span) -> Result<Self> {
|
||||
fn text(self, value: SymbolId, span: Span) -> Result<Self> {
|
||||
Ok(match self {
|
||||
Self::BuddingElement(mut ele) => {
|
||||
ele.element.children.push(Tree::Text(value, span));
|
||||
|
|
|
@ -31,8 +31,6 @@ lazy_static! {
|
|||
}
|
||||
|
||||
mod tree {
|
||||
use crate::xir::Text;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
|
@ -47,20 +45,19 @@ mod tree {
|
|||
let tree = Tree::Element(ele.clone());
|
||||
|
||||
assert_eq!(Some(&ele), tree.as_element());
|
||||
assert_eq!(None, tree.as_text());
|
||||
assert_eq!(None, Into::<Option<SymbolId>>::into(tree));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn text_from_tree() {
|
||||
let text = Text::Escaped("foo".intern());
|
||||
let text = "foo".intern();
|
||||
let tree = Tree::Text(text, *S);
|
||||
|
||||
assert!(!tree.is_element());
|
||||
assert_eq!(None, tree.as_element());
|
||||
assert_eq!(None, tree.clone().into_element());
|
||||
|
||||
assert_eq!(Some(&text), tree.as_text());
|
||||
assert_eq!(Some(text), tree.into_text());
|
||||
assert_eq!(Some(text), tree.into());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -332,7 +329,7 @@ fn element_with_child_with_attributes() {
|
|||
#[test]
|
||||
fn element_with_text() {
|
||||
let parent = "parent".unwrap_into();
|
||||
let text = Text::Escaped("inner text".into());
|
||||
let text = "inner text".into();
|
||||
|
||||
let toks = [
|
||||
Token::Open(parent, *S),
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
use super::{DefaultEscaper, Escaper};
|
||||
use super::{Error as XirError, QName, Token, TokenStream};
|
||||
use crate::sym::GlobalSymbolResolve;
|
||||
use crate::xir::Text;
|
||||
use std::io::{Error as IoError, Write};
|
||||
use std::result;
|
||||
|
||||
|
@ -259,35 +258,21 @@ impl<S: Escaper> XmlWriter<S> for Token {
|
|||
// AttrEnd is ignored by the writer (and is optional).
|
||||
(Self::AttrEnd, x) => Ok(x),
|
||||
|
||||
// Unescaped not yet supported, but you could use CData.
|
||||
(
|
||||
Self::Text(Text::Escaped(text), _),
|
||||
W::NodeExpected | W::NodeOpen,
|
||||
) => {
|
||||
// TODO: We have no way of knowing if text should be formatted
|
||||
// as CData,
|
||||
// which may also be beneficial to avoid escaping if we
|
||||
// haven't yet encountered the unescaped representation.
|
||||
(Self::Text(text, _), W::NodeExpected | W::NodeOpen) => {
|
||||
prev_state.close_tag_if_open(sink)?;
|
||||
sink.write(text.lookup_str().as_bytes())?;
|
||||
sink.write(escaper.escape(text).lookup_str().as_bytes())?;
|
||||
|
||||
Ok(W::NodeExpected)
|
||||
}
|
||||
|
||||
// Escaped not yet supported, but you could use Text.
|
||||
(
|
||||
Self::CData(Text::Unescaped(text), _),
|
||||
W::NodeExpected | W::NodeOpen,
|
||||
) => {
|
||||
prev_state.close_tag_if_open(sink)?;
|
||||
sink.write(b"<![CDATA[")?;
|
||||
sink.write(text.lookup_str().as_bytes())?;
|
||||
sink.write(b"]]>")?;
|
||||
|
||||
Ok(W::NodeExpected)
|
||||
}
|
||||
|
||||
// Unescaped not yet supported, since we do not have a use case.
|
||||
(
|
||||
Self::Comment(Text::Escaped(comment), _),
|
||||
W::NodeExpected | W::NodeOpen,
|
||||
) => {
|
||||
// XXX: While we currently only output comments that have been
|
||||
// _read_ as comments,
|
||||
// that will not always be the case and we must escape `--`!
|
||||
(Self::Comment(comment, _), W::NodeExpected | W::NodeOpen) => {
|
||||
prev_state.close_tag_if_open(sink)?;
|
||||
sink.write(b"<!--")?;
|
||||
sink.write(comment.lookup_str().as_bytes())?;
|
||||
|
@ -304,9 +289,7 @@ impl<S: Escaper> XmlWriter<S> for Token {
|
|||
|
||||
// As-of-yet unsupported operations that weren't needed at the
|
||||
// time of writing, but were planned for in the design of Xir.
|
||||
(invalid @ Self::AttrName(_, _), W::AttrNameAdjacent)
|
||||
| (invalid @ Self::Text(Text::Unescaped(_), _), W::NodeExpected)
|
||||
| (invalid @ Self::CData(Text::Escaped(_), _), W::NodeExpected) => {
|
||||
(invalid @ Self::AttrName(_, _), W::AttrNameAdjacent) => {
|
||||
Err(Error::Todo(format!("{:?}", invalid), prev_state))
|
||||
}
|
||||
|
||||
|
@ -345,11 +328,10 @@ mod test {
|
|||
use crate::{
|
||||
span::Span,
|
||||
sym::GlobalSymbolIntern,
|
||||
xir::{QName, Text, Whitespace},
|
||||
xir::{QName, Whitespace},
|
||||
};
|
||||
|
||||
type TestResult = std::result::Result<(), Error>;
|
||||
type Esc = DefaultEscaper;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct MockEscaper {}
|
||||
|
@ -377,7 +359,7 @@ mod test {
|
|||
fn writes_beginning_node_tag_without_prefix() -> TestResult {
|
||||
let name = QName::new_local("no-prefix".try_into()?);
|
||||
let result = Token::Open(name, *S)
|
||||
.write_new(Default::default(), &Esc::default())?;
|
||||
.write_new(Default::default(), &MockEscaper::default())?;
|
||||
|
||||
assert_eq!(result.0, b"<no-prefix");
|
||||
assert_eq!(result.1, WriterState::NodeOpen);
|
||||
|
@ -389,7 +371,7 @@ mod test {
|
|||
fn writes_beginning_node_tag_with_prefix() -> TestResult {
|
||||
let name = QName::try_from(("prefix", "element-name"))?;
|
||||
let result = Token::Open(name, *S)
|
||||
.write_new(Default::default(), &Esc::default())?;
|
||||
.write_new(Default::default(), &MockEscaper::default())?;
|
||||
|
||||
assert_eq!(result.0, b"<prefix:element-name");
|
||||
assert_eq!(result.1, WriterState::NodeOpen);
|
||||
|
@ -401,7 +383,7 @@ mod test {
|
|||
fn closes_open_node_when_opening_another() -> TestResult {
|
||||
let name = QName::try_from(("p", "another-element"))?;
|
||||
let result = Token::Open(name, *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
|
||||
assert_eq!(result.0, b"><p:another-element");
|
||||
assert_eq!(result.1, WriterState::NodeOpen);
|
||||
|
@ -412,7 +394,7 @@ mod test {
|
|||
#[test]
|
||||
fn closes_open_node_as_empty_element() -> TestResult {
|
||||
let result = Token::Close(None, *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
|
||||
assert_eq!(result.0, b"/>");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
@ -425,7 +407,7 @@ mod test {
|
|||
let name = QName::try_from(("a", "closed-element"))?;
|
||||
|
||||
let result = Token::Close(Some(name), *S)
|
||||
.write_new(WriterState::NodeExpected, &Esc::default())?;
|
||||
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
|
||||
|
||||
assert_eq!(result.0, b"</a:closed-element>");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
@ -440,7 +422,7 @@ mod test {
|
|||
let name = QName::try_from(("b", "closed-element"))?;
|
||||
|
||||
let result = Token::Close(Some(name), *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
|
||||
assert_eq!(result.0, b"></b:closed-element>");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
@ -452,7 +434,7 @@ mod test {
|
|||
#[test]
|
||||
fn whitespace_within_open_node() -> TestResult {
|
||||
let result = Token::Whitespace(Whitespace::try_from(" \t ")?, *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
|
||||
assert_eq!(result.0, b" \t ");
|
||||
assert_eq!(result.1, WriterState::NodeOpen);
|
||||
|
@ -467,13 +449,13 @@ mod test {
|
|||
|
||||
// Namespace prefix
|
||||
let result = Token::AttrName(name_ns, *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
assert_eq!(result.0, b" some:attr");
|
||||
assert_eq!(result.1, WriterState::AttrNameAdjacent);
|
||||
|
||||
// No namespace prefix
|
||||
let result = Token::AttrName(name_local, *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
assert_eq!(result.0, b" nons");
|
||||
assert_eq!(result.1, WriterState::AttrNameAdjacent);
|
||||
|
||||
|
@ -519,8 +501,8 @@ mod test {
|
|||
// just ignore it entirely.
|
||||
#[test]
|
||||
fn ignores_attr_end() -> TestResult {
|
||||
let result =
|
||||
Token::AttrEnd.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
let result = Token::AttrEnd
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
assert_eq!(result.0, b"");
|
||||
assert_eq!(result.1, WriterState::NodeOpen);
|
||||
|
||||
|
@ -528,62 +510,39 @@ mod test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn writes_escaped_text() -> TestResult {
|
||||
// Just to be sure it's not trying to escape when we say it
|
||||
// shouldn't, we include a character that must otherwise be escaped.
|
||||
let text = Text::Escaped("test > escaped".intern());
|
||||
fn writes_text() -> TestResult {
|
||||
let text = "test unescaped".intern();
|
||||
|
||||
// When a node is expected.
|
||||
let result = Token::Text(text, *S)
|
||||
.write_new(WriterState::NodeExpected, &Esc::default())?;
|
||||
assert_eq!(result.0, b"test > escaped");
|
||||
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
|
||||
assert_eq!(result.0, b"test unescaped:ESC");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
||||
// When a node is still open.
|
||||
let result = Token::Text(text, *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
assert_eq!(result.0, b">test > escaped");
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
assert_eq!(result.0, b">test unescaped:ESC");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn writes_unescaped_data() -> TestResult {
|
||||
fn writes_comment() -> TestResult {
|
||||
// Just to be sure it's not trying to escape when we say it
|
||||
// shouldn't, we include a character that must otherwise be escaped.
|
||||
let text = Text::Unescaped("test > unescaped".intern());
|
||||
|
||||
// When a node is expected.
|
||||
let result = Token::CData(text, *S)
|
||||
.write_new(WriterState::NodeExpected, &Esc::default())?;
|
||||
assert_eq!(result.0, b"<![CDATA[test > unescaped]]>");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
||||
// When a node is still open.
|
||||
let result = Token::CData(text, *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
assert_eq!(result.0, b"><![CDATA[test > unescaped]]>");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn writes_escaped_comment() -> TestResult {
|
||||
// Just to be sure it's not trying to escape when we say it
|
||||
// shouldn't, we include a character that must otherwise be escaped.
|
||||
let comment = Text::Escaped("comment > escaped".intern());
|
||||
let comment = "comment > escaped".intern();
|
||||
|
||||
// When a node is expected.
|
||||
let result = Token::Comment(comment, *S)
|
||||
.write_new(WriterState::NodeExpected, &Esc::default())?;
|
||||
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
|
||||
assert_eq!(result.0, b"<!--comment > escaped-->");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
||||
// When a node is still open.
|
||||
let result = Token::Comment(comment, *S)
|
||||
.write_new(WriterState::NodeOpen, &Esc::default())?;
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
assert_eq!(result.0, b"><!--comment > escaped-->");
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
||||
|
@ -596,7 +555,7 @@ mod test {
|
|||
Token::AttrValue("".intern(), *S).write(
|
||||
&mut vec![],
|
||||
WriterState::NodeExpected,
|
||||
&Esc::default()
|
||||
&MockEscaper::default()
|
||||
),
|
||||
Err(Error::UnexpectedToken(_, WriterState::NodeExpected)),
|
||||
));
|
||||
|
@ -615,18 +574,18 @@ mod test {
|
|||
Token::AttrName(("an", "attr").try_into()?, *S),
|
||||
Token::AttrValue("value".intern(), *S),
|
||||
Token::AttrEnd,
|
||||
Token::Text(Text::Escaped("text".intern()), *S),
|
||||
Token::Text("text".intern(), *S),
|
||||
Token::Open(("c", "child").try_into()?, *S),
|
||||
Token::Whitespace(" ".try_into()?, *S),
|
||||
Token::Close(None, *S),
|
||||
Token::Close(Some(root), *S),
|
||||
]
|
||||
.into_iter()
|
||||
.write_new(Default::default(), &Esc::default())?;
|
||||
.write_new(Default::default(), &MockEscaper::default())?;
|
||||
|
||||
assert_eq!(
|
||||
result.0,
|
||||
br#"<r:root an:attr="value">text<c:child /></r:root>"#
|
||||
br#"<r:root an:attr="value:ESC">text:ESC<c:child /></r:root>"#
|
||||
);
|
||||
assert_eq!(result.1, WriterState::NodeExpected);
|
||||
|
||||
|
|
Loading…
Reference in New Issue