tamer: xir: {NodeStream=>Token}
I decided not to do this in a previous commit because I had documented "NodeStream" elsewhere, so I'd like it to be in the Git history to understand its evolution. This never was a "Node" stream beyond the initial concept phase, because it represents tokens that aren't themselves nodes. It is intended to generate XML nodes, but may need to accommodate non-nodes (e.g. XML declarations) in the future. The name originated from `Node`, which was a tree-based IR that was initially conceived, but removed because it's not yet needed. What we need is a streaming IR for xmle writing, and then for reading and echoing back out XML for the new frontend.main
parent
a23bae5e4d
commit
cd1eae95ca
|
@ -40,7 +40,7 @@ extern crate tamer;
|
|||
extern crate test;
|
||||
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use tamer::ir::xir::{NCName, NodeStream, QName};
|
||||
use tamer::ir::xir::{NCName, QName, Token};
|
||||
use tamer::sym::{GlobalSymbolIntern, GlobalSymbolResolve, SymbolId};
|
||||
use test::Bencher;
|
||||
|
||||
|
@ -212,12 +212,12 @@ This is pretend fragment text. We need a lot of it.</fragment>
|
|||
bench.iter(|| {
|
||||
(0..1000).for_each(|_| {
|
||||
vec![
|
||||
NodeStream::Open(name, span),
|
||||
NodeStream::AttrName(attr1, span),
|
||||
NodeStream::AttrValue(AttrValue::Escaped(val1), span),
|
||||
NodeStream::AttrName(attr2, span),
|
||||
NodeStream::AttrValue(AttrValue::Escaped(val2), span),
|
||||
NodeStream::SelfClose(span),
|
||||
Token::Open(name, span),
|
||||
Token::AttrName(attr1, span),
|
||||
Token::AttrValue(AttrValue::Escaped(val1), span),
|
||||
Token::AttrName(attr2, span),
|
||||
Token::AttrValue(AttrValue::Escaped(val2), span),
|
||||
Token::SelfClose(span),
|
||||
]
|
||||
.into_iter()
|
||||
.write(&mut buf, Default::default())
|
||||
|
@ -256,7 +256,7 @@ This is pretend fragment text. We need a lot of it.</fragment>
|
|||
|
||||
bench.iter(|| {
|
||||
(0..500).for_each(|_| {
|
||||
NodeStream::Text(Text::Escaped(frag), span)
|
||||
Token::Text(Text::Escaped(frag), span)
|
||||
.write(&mut buf, Default::default())
|
||||
.unwrap();
|
||||
});
|
||||
|
|
|
@ -319,8 +319,15 @@ pub enum AttrValue<Ix: SymbolIndexSize> {
|
|||
Escaped(SymbolId<Ix>),
|
||||
}
|
||||
|
||||
/// Lightly-structured XML tokens with associated [`Span`]s.
|
||||
///
|
||||
/// This is a streamable IR for XML.
|
||||
/// A writer requires knowledge only of a previous state,
|
||||
/// such as whether a node is open,
|
||||
/// and so this IR can be processed by a simple state machine
|
||||
/// (see [`writer::WriterState`]).
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum NodeStream<Ix: SymbolIndexSize> {
|
||||
pub enum Token<Ix: SymbolIndexSize> {
|
||||
/// Opening tag of an element.
|
||||
Open(QName<Ix>, Span),
|
||||
|
||||
|
@ -359,12 +366,12 @@ pub enum NodeStream<Ix: SymbolIndexSize> {
|
|||
|
||||
/// Character data as part of an element.
|
||||
///
|
||||
/// See also [`CData`](NodeStream::CData) variant.
|
||||
/// See also [`CData`](Token::CData) variant.
|
||||
Text(Text<Ix>, Span),
|
||||
|
||||
/// CData node (`<![CDATA[...]]>`).
|
||||
///
|
||||
/// See also [`Text`](NodeStream::Text) variant.
|
||||
/// See also [`Text`](Token::Text) variant.
|
||||
///
|
||||
/// _Warning: It is up to the caller to ensure that the string `]]>` is
|
||||
/// not present in the text!_
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
//! Lower XIR stream into an XML byte stream via [`Write`].
|
||||
|
||||
use super::{Error as XirError, NodeStream, QName};
|
||||
use super::{Error as XirError, QName, Token};
|
||||
use crate::ir::xir::{AttrValue, Text};
|
||||
use crate::sym::GlobalSymbolResolve;
|
||||
use crate::sym::SymbolIndexSize;
|
||||
|
@ -129,7 +129,7 @@ impl<Ix: SymbolIndexSize> XmlWriter for QName<Ix> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<Ix: SymbolIndexSize> XmlWriter for NodeStream<Ix> {
|
||||
impl<Ix: SymbolIndexSize> XmlWriter for Token<Ix> {
|
||||
fn write<W: Write>(self, sink: &mut W, prev_state: WriterState) -> Result {
|
||||
type S = WriterState; // More concise
|
||||
|
||||
|
@ -249,7 +249,7 @@ impl<Ix: SymbolIndexSize> XmlWriter for NodeStream<Ix> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<Ix: SymbolIndexSize, I: Iterator<Item = NodeStream<Ix>>> XmlWriter for I {
|
||||
impl<Ix: SymbolIndexSize, I: Iterator<Item = Token<Ix>>> XmlWriter for I {
|
||||
fn write<W: Write>(
|
||||
mut self,
|
||||
sink: &mut W,
|
||||
|
@ -287,7 +287,7 @@ mod test {
|
|||
let name = QName::<Ix>::new_local("no-prefix".try_into()?);
|
||||
|
||||
assert_eq!(
|
||||
NodeStream::Open(name, *S).write(&mut buf, Default::default())?,
|
||||
Token::Open(name, *S).write(&mut buf, Default::default())?,
|
||||
WriterState::NodeOpen
|
||||
);
|
||||
|
||||
|
@ -302,7 +302,7 @@ mod test {
|
|||
let name = QName::<Ix>::try_from(("prefix", "element-name"))?;
|
||||
|
||||
assert_eq!(
|
||||
NodeStream::Open(name, *S).write(&mut buf, Default::default())?,
|
||||
Token::Open(name, *S).write(&mut buf, Default::default())?,
|
||||
WriterState::NodeOpen
|
||||
);
|
||||
|
||||
|
@ -317,8 +317,7 @@ mod test {
|
|||
let name = QName::<Ix>::try_from(("p", "another-element"))?;
|
||||
|
||||
assert_eq!(
|
||||
NodeStream::Open(name, *S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
Token::Open(name, *S).write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::NodeOpen
|
||||
);
|
||||
|
||||
|
@ -332,7 +331,7 @@ mod test {
|
|||
let mut buf = vec![];
|
||||
|
||||
assert_eq!(
|
||||
NodeStream::<Ix>::SelfClose(*S)
|
||||
Token::<Ix>::SelfClose(*S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
|
@ -347,7 +346,7 @@ mod test {
|
|||
let name = QName::<Ix>::try_from(("a", "closed-element"))?;
|
||||
|
||||
assert_eq!(
|
||||
NodeStream::Close(name, *S)
|
||||
Token::Close(name, *S)
|
||||
.write(&mut buf, WriterState::NodeExpected)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
|
@ -365,8 +364,7 @@ mod test {
|
|||
let name = QName::<Ix>::try_from(("b", "closed-element"))?;
|
||||
|
||||
assert_eq!(
|
||||
NodeStream::Close(name, *S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
Token::Close(name, *S).write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
|
||||
|
@ -381,7 +379,7 @@ mod test {
|
|||
let mut buf = vec![];
|
||||
|
||||
assert_eq!(
|
||||
NodeStream::<Ix>::Whitespace(Whitespace::try_from(" \t ")?, *S)
|
||||
Token::<Ix>::Whitespace(Whitespace::try_from(" \t ")?, *S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::NodeOpen
|
||||
);
|
||||
|
@ -399,7 +397,7 @@ mod test {
|
|||
|
||||
// Namespace prefix
|
||||
assert_eq!(
|
||||
NodeStream::AttrName(name_ns, *S)
|
||||
Token::AttrName(name_ns, *S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::AttrNameAdjacent
|
||||
);
|
||||
|
@ -409,7 +407,7 @@ mod test {
|
|||
|
||||
// No namespace prefix
|
||||
assert_eq!(
|
||||
NodeStream::AttrName(name_local, *S)
|
||||
Token::AttrName(name_local, *S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::AttrNameAdjacent
|
||||
);
|
||||
|
@ -427,7 +425,7 @@ mod test {
|
|||
let value = AttrValue::<Ix>::Escaped("test \" escaped".intern());
|
||||
|
||||
assert_eq!(
|
||||
NodeStream::AttrValue(value, *S)
|
||||
Token::AttrValue(value, *S)
|
||||
.write(&mut buf, WriterState::AttrNameAdjacent)?,
|
||||
WriterState::NodeOpen
|
||||
);
|
||||
|
@ -447,8 +445,7 @@ mod test {
|
|||
|
||||
// When a node is expected.
|
||||
assert_eq!(
|
||||
NodeStream::Text(text, *S)
|
||||
.write(&mut buf, WriterState::NodeExpected)?,
|
||||
Token::Text(text, *S).write(&mut buf, WriterState::NodeExpected)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
assert_eq!(buf, b"test > escaped");
|
||||
|
@ -457,8 +454,7 @@ mod test {
|
|||
|
||||
// When a node is still open.
|
||||
assert_eq!(
|
||||
NodeStream::Text(text, *S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
Token::Text(text, *S).write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
assert_eq!(buf, b">test > escaped");
|
||||
|
@ -476,7 +472,7 @@ mod test {
|
|||
|
||||
// When a node is expected.
|
||||
assert_eq!(
|
||||
NodeStream::CData(text, *S)
|
||||
Token::CData(text, *S)
|
||||
.write(&mut buf, WriterState::NodeExpected)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
|
@ -486,8 +482,7 @@ mod test {
|
|||
|
||||
// When a node is still open.
|
||||
assert_eq!(
|
||||
NodeStream::CData(text, *S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
Token::CData(text, *S).write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
assert_eq!(buf, b"><![CDATA[test > unescaped]]>");
|
||||
|
@ -505,7 +500,7 @@ mod test {
|
|||
|
||||
// When a node is expected.
|
||||
assert_eq!(
|
||||
NodeStream::Comment(comment, *S)
|
||||
Token::Comment(comment, *S)
|
||||
.write(&mut buf, WriterState::NodeExpected)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
|
@ -515,7 +510,7 @@ mod test {
|
|||
|
||||
// When a node is still open.
|
||||
assert_eq!(
|
||||
NodeStream::Comment(comment, *S)
|
||||
Token::Comment(comment, *S)
|
||||
.write(&mut buf, WriterState::NodeOpen)?,
|
||||
WriterState::NodeExpected
|
||||
);
|
||||
|
@ -527,7 +522,7 @@ mod test {
|
|||
#[test]
|
||||
fn unsupported_transition_results_in_error() -> TestResult {
|
||||
assert!(matches!(
|
||||
NodeStream::AttrValue(AttrValue::<Ix>::Escaped("".into()), *S)
|
||||
Token::AttrValue(AttrValue::<Ix>::Escaped("".into()), *S)
|
||||
.write(&mut vec![], WriterState::NodeExpected),
|
||||
Err(Error::UnexpectedToken(_, WriterState::NodeExpected)),
|
||||
));
|
||||
|
@ -543,14 +538,14 @@ mod test {
|
|||
let root: QName<Ix> = ("r", "root").try_into()?;
|
||||
|
||||
vec![
|
||||
NodeStream::Open(root, *S),
|
||||
NodeStream::AttrName(("an", "attr").try_into()?, *S),
|
||||
NodeStream::AttrValue(AttrValue::Escaped("value".intern()), *S),
|
||||
NodeStream::Text(Text::Escaped("text".intern()), *S),
|
||||
NodeStream::Open(("c", "child").try_into()?, *S),
|
||||
NodeStream::Whitespace(" ".try_into()?, *S),
|
||||
NodeStream::SelfClose(*S),
|
||||
NodeStream::Close(root, *S),
|
||||
Token::Open(root, *S),
|
||||
Token::AttrName(("an", "attr").try_into()?, *S),
|
||||
Token::AttrValue(AttrValue::Escaped("value".intern()), *S),
|
||||
Token::Text(Text::Escaped("text".intern()), *S),
|
||||
Token::Open(("c", "child").try_into()?, *S),
|
||||
Token::Whitespace(" ".try_into()?, *S),
|
||||
Token::SelfClose(*S),
|
||||
Token::Close(root, *S),
|
||||
]
|
||||
.into_iter()
|
||||
.write(&mut buf, Default::default())?;
|
||||
|
|
Loading…
Reference in New Issue