tamer: xir: {NodeStream=>Token}

I decided not to do this in a previous commit because I had documented
"NodeStream" elsewhere, so I'd like it to be in the Git history to
understand its evolution.

This never was a "Node" stream beyond the initial concept phase, because it
represents tokens that aren't themselves nodes.  It is intended to generate
XML nodes, but may need to accommodate non-nodes (e.g. XML declarations) in
the future.

The name originated from `Node`, which was a tree-based IR that was
initially conceived, but removed because it's not yet needed.  What we need
is a streaming IR for xmle writing, and then for reading and echoing back
out XML for the new frontend.
main
Mike Gerwitz 2021-08-20 10:30:27 -04:00
parent a23bae5e4d
commit cd1eae95ca
3 changed files with 46 additions and 44 deletions

View File

@ -40,7 +40,7 @@ extern crate tamer;
extern crate test;
use std::convert::{TryFrom, TryInto};
use tamer::ir::xir::{NCName, NodeStream, QName};
use tamer::ir::xir::{NCName, QName, Token};
use tamer::sym::{GlobalSymbolIntern, GlobalSymbolResolve, SymbolId};
use test::Bencher;
@ -212,12 +212,12 @@ This is pretend fragment text. We need a lot of it.</fragment>
bench.iter(|| {
(0..1000).for_each(|_| {
vec![
NodeStream::Open(name, span),
NodeStream::AttrName(attr1, span),
NodeStream::AttrValue(AttrValue::Escaped(val1), span),
NodeStream::AttrName(attr2, span),
NodeStream::AttrValue(AttrValue::Escaped(val2), span),
NodeStream::SelfClose(span),
Token::Open(name, span),
Token::AttrName(attr1, span),
Token::AttrValue(AttrValue::Escaped(val1), span),
Token::AttrName(attr2, span),
Token::AttrValue(AttrValue::Escaped(val2), span),
Token::SelfClose(span),
]
.into_iter()
.write(&mut buf, Default::default())
@ -256,7 +256,7 @@ This is pretend fragment text. We need a lot of it.</fragment>
bench.iter(|| {
(0..500).for_each(|_| {
NodeStream::Text(Text::Escaped(frag), span)
Token::Text(Text::Escaped(frag), span)
.write(&mut buf, Default::default())
.unwrap();
});

View File

@ -319,8 +319,15 @@ pub enum AttrValue<Ix: SymbolIndexSize> {
Escaped(SymbolId<Ix>),
}
/// Lightly-structured XML tokens with associated [`Span`]s.
///
/// This is a streamable IR for XML.
/// A writer requires knowledge only of a previous state,
/// such as whether a node is open,
/// and so this IR can be processed by a simple state machine
/// (see [`writer::WriterState`]).
#[derive(Debug, PartialEq, Eq)]
pub enum NodeStream<Ix: SymbolIndexSize> {
pub enum Token<Ix: SymbolIndexSize> {
/// Opening tag of an element.
Open(QName<Ix>, Span),
@ -359,12 +366,12 @@ pub enum NodeStream<Ix: SymbolIndexSize> {
/// Character data as part of an element.
///
/// See also [`CData`](NodeStream::CData) variant.
/// See also [`CData`](Token::CData) variant.
Text(Text<Ix>, Span),
/// CData node (`<![CDATA[...]]>`).
///
/// See also [`Text`](NodeStream::Text) variant.
/// See also [`Text`](Token::Text) variant.
///
/// _Warning: It is up to the caller to ensure that the string `]]>` is
/// not present in the text!_

View File

@ -19,7 +19,7 @@
//! Lower XIR stream into an XML byte stream via [`Write`].
use super::{Error as XirError, NodeStream, QName};
use super::{Error as XirError, QName, Token};
use crate::ir::xir::{AttrValue, Text};
use crate::sym::GlobalSymbolResolve;
use crate::sym::SymbolIndexSize;
@ -129,7 +129,7 @@ impl<Ix: SymbolIndexSize> XmlWriter for QName<Ix> {
}
}
impl<Ix: SymbolIndexSize> XmlWriter for NodeStream<Ix> {
impl<Ix: SymbolIndexSize> XmlWriter for Token<Ix> {
fn write<W: Write>(self, sink: &mut W, prev_state: WriterState) -> Result {
type S = WriterState; // More concise
@ -249,7 +249,7 @@ impl<Ix: SymbolIndexSize> XmlWriter for NodeStream<Ix> {
}
}
impl<Ix: SymbolIndexSize, I: Iterator<Item = NodeStream<Ix>>> XmlWriter for I {
impl<Ix: SymbolIndexSize, I: Iterator<Item = Token<Ix>>> XmlWriter for I {
fn write<W: Write>(
mut self,
sink: &mut W,
@ -287,7 +287,7 @@ mod test {
let name = QName::<Ix>::new_local("no-prefix".try_into()?);
assert_eq!(
NodeStream::Open(name, *S).write(&mut buf, Default::default())?,
Token::Open(name, *S).write(&mut buf, Default::default())?,
WriterState::NodeOpen
);
@ -302,7 +302,7 @@ mod test {
let name = QName::<Ix>::try_from(("prefix", "element-name"))?;
assert_eq!(
NodeStream::Open(name, *S).write(&mut buf, Default::default())?,
Token::Open(name, *S).write(&mut buf, Default::default())?,
WriterState::NodeOpen
);
@ -317,8 +317,7 @@ mod test {
let name = QName::<Ix>::try_from(("p", "another-element"))?;
assert_eq!(
NodeStream::Open(name, *S)
.write(&mut buf, WriterState::NodeOpen)?,
Token::Open(name, *S).write(&mut buf, WriterState::NodeOpen)?,
WriterState::NodeOpen
);
@ -332,7 +331,7 @@ mod test {
let mut buf = vec![];
assert_eq!(
NodeStream::<Ix>::SelfClose(*S)
Token::<Ix>::SelfClose(*S)
.write(&mut buf, WriterState::NodeOpen)?,
WriterState::NodeExpected
);
@ -347,7 +346,7 @@ mod test {
let name = QName::<Ix>::try_from(("a", "closed-element"))?;
assert_eq!(
NodeStream::Close(name, *S)
Token::Close(name, *S)
.write(&mut buf, WriterState::NodeExpected)?,
WriterState::NodeExpected
);
@ -365,8 +364,7 @@ mod test {
let name = QName::<Ix>::try_from(("b", "closed-element"))?;
assert_eq!(
NodeStream::Close(name, *S)
.write(&mut buf, WriterState::NodeOpen)?,
Token::Close(name, *S).write(&mut buf, WriterState::NodeOpen)?,
WriterState::NodeExpected
);
@ -381,7 +379,7 @@ mod test {
let mut buf = vec![];
assert_eq!(
NodeStream::<Ix>::Whitespace(Whitespace::try_from(" \t ")?, *S)
Token::<Ix>::Whitespace(Whitespace::try_from(" \t ")?, *S)
.write(&mut buf, WriterState::NodeOpen)?,
WriterState::NodeOpen
);
@ -399,7 +397,7 @@ mod test {
// Namespace prefix
assert_eq!(
NodeStream::AttrName(name_ns, *S)
Token::AttrName(name_ns, *S)
.write(&mut buf, WriterState::NodeOpen)?,
WriterState::AttrNameAdjacent
);
@ -409,7 +407,7 @@ mod test {
// No namespace prefix
assert_eq!(
NodeStream::AttrName(name_local, *S)
Token::AttrName(name_local, *S)
.write(&mut buf, WriterState::NodeOpen)?,
WriterState::AttrNameAdjacent
);
@ -427,7 +425,7 @@ mod test {
let value = AttrValue::<Ix>::Escaped("test \" escaped".intern());
assert_eq!(
NodeStream::AttrValue(value, *S)
Token::AttrValue(value, *S)
.write(&mut buf, WriterState::AttrNameAdjacent)?,
WriterState::NodeOpen
);
@ -447,8 +445,7 @@ mod test {
// When a node is expected.
assert_eq!(
NodeStream::Text(text, *S)
.write(&mut buf, WriterState::NodeExpected)?,
Token::Text(text, *S).write(&mut buf, WriterState::NodeExpected)?,
WriterState::NodeExpected
);
assert_eq!(buf, b"test > escaped");
@ -457,8 +454,7 @@ mod test {
// When a node is still open.
assert_eq!(
NodeStream::Text(text, *S)
.write(&mut buf, WriterState::NodeOpen)?,
Token::Text(text, *S).write(&mut buf, WriterState::NodeOpen)?,
WriterState::NodeExpected
);
assert_eq!(buf, b">test > escaped");
@ -476,7 +472,7 @@ mod test {
// When a node is expected.
assert_eq!(
NodeStream::CData(text, *S)
Token::CData(text, *S)
.write(&mut buf, WriterState::NodeExpected)?,
WriterState::NodeExpected
);
@ -486,8 +482,7 @@ mod test {
// When a node is still open.
assert_eq!(
NodeStream::CData(text, *S)
.write(&mut buf, WriterState::NodeOpen)?,
Token::CData(text, *S).write(&mut buf, WriterState::NodeOpen)?,
WriterState::NodeExpected
);
assert_eq!(buf, b"><![CDATA[test > unescaped]]>");
@ -505,7 +500,7 @@ mod test {
// When a node is expected.
assert_eq!(
NodeStream::Comment(comment, *S)
Token::Comment(comment, *S)
.write(&mut buf, WriterState::NodeExpected)?,
WriterState::NodeExpected
);
@ -515,7 +510,7 @@ mod test {
// When a node is still open.
assert_eq!(
NodeStream::Comment(comment, *S)
Token::Comment(comment, *S)
.write(&mut buf, WriterState::NodeOpen)?,
WriterState::NodeExpected
);
@ -527,7 +522,7 @@ mod test {
#[test]
fn unsupported_transition_results_in_error() -> TestResult {
assert!(matches!(
NodeStream::AttrValue(AttrValue::<Ix>::Escaped("".into()), *S)
Token::AttrValue(AttrValue::<Ix>::Escaped("".into()), *S)
.write(&mut vec![], WriterState::NodeExpected),
Err(Error::UnexpectedToken(_, WriterState::NodeExpected)),
));
@ -543,14 +538,14 @@ mod test {
let root: QName<Ix> = ("r", "root").try_into()?;
vec![
NodeStream::Open(root, *S),
NodeStream::AttrName(("an", "attr").try_into()?, *S),
NodeStream::AttrValue(AttrValue::Escaped("value".intern()), *S),
NodeStream::Text(Text::Escaped("text".intern()), *S),
NodeStream::Open(("c", "child").try_into()?, *S),
NodeStream::Whitespace(" ".try_into()?, *S),
NodeStream::SelfClose(*S),
NodeStream::Close(root, *S),
Token::Open(root, *S),
Token::AttrName(("an", "attr").try_into()?, *S),
Token::AttrValue(AttrValue::Escaped("value".intern()), *S),
Token::Text(Text::Escaped("text".intern()), *S),
Token::Open(("c", "child").try_into()?, *S),
Token::Whitespace(" ".try_into()?, *S),
Token::SelfClose(*S),
Token::Close(root, *S),
]
.into_iter()
.write(&mut buf, Default::default())?;