tamer: ir::xir: Add Token::AttrValueFragment with writer support

This is implemented only for the writer, since its use case is to be able to
concatenate strings without copying during writing.

It doesn't really make sense to support this in XIR Tree, since a reader
should never produce this.  But if we ever run into this (e.g. due to some
internal processing pipeline), we'll address it then; XIR Tree might have to
do copying, then, but should probably wait until encountering all fragments
before interning.  That'd be a distraction right now.
main
Mike Gerwitz 2021-09-21 00:13:03 -04:00
parent e95afe2658
commit c6a7988bc8
2 changed files with 62 additions and 3 deletions

View File

@ -385,12 +385,22 @@ pub enum Token<Ix: SymbolIndexSize> {
/// close!).
Close(Option<QName<Ix>>, Span),
/// Element attribute name
/// Element attribute name.
AttrName(QName<Ix>, Span),
/// Element attribute value
/// Element attribute value.
AttrValue(AttrValue<Ix>, Span),
/// A portion of an element attribute value.
///
/// This allows for concatenating values into an attribute value without
/// having to copy values.
/// The last fragment must be a [`Token::AttrValue`].
///
/// This is intended for writing to a token stream and may not be
/// emitted by readers or supported by [XIR Tree](self::tree).
AttrValueFragment(AttrValue<Ix>, Span),
/// Comment node.
Comment(Text<Ix>, Span),

View File

@ -89,8 +89,10 @@ pub enum WriterState {
NodeExpected,
/// A node is currently being output and has not yet been closed.
NodeOpen,
/// Cursor is position adjacent to an attribute name within an element.
/// Cursor is adjacent to an attribute name within an element.
AttrNameAdjacent,
/// Cursor is adjacent to an attribute fragment within an element.
AttrFragmentAdjacent,
}
impl Default for WriterState {
@ -217,6 +219,35 @@ impl<Ix: SymbolIndexSize> XmlWriter for Token<Ix> {
Ok(S::NodeOpen)
}
(
Self::AttrValue(AttrValue::Escaped(value), _),
S::AttrFragmentAdjacent,
) => {
sink.write(value.lookup_str().as_bytes())?;
sink.write(b"\"")?;
Ok(S::NodeOpen)
}
(
Self::AttrValueFragment(AttrValue::Escaped(value), _),
S::AttrNameAdjacent,
) => {
sink.write(b"=\"")?;
sink.write(value.lookup_str().as_bytes())?;
Ok(S::AttrFragmentAdjacent)
}
(
Self::AttrValueFragment(AttrValue::Escaped(value), _),
S::AttrFragmentAdjacent,
) => {
sink.write(value.lookup_str().as_bytes())?;
Ok(S::AttrFragmentAdjacent)
}
// Unescaped not yet supported, but you could use CData.
(
Self::Text(Text::Escaped(text), _),
@ -436,6 +467,24 @@ mod test {
Ok(())
}
#[test]
fn writes_escaped_attr_value_consisting_of_fragments() -> TestResult {
let value_left = AttrValue::<Ix>::Escaped("left ".intern());
let value_right = AttrValue::<Ix>::Escaped("right".intern());
let result = vec![
Token::AttrValueFragment(value_left, *S),
Token::AttrValue(value_right, *S),
]
.into_iter()
.write_new(WriterState::AttrNameAdjacent)?;
assert_eq!(result.0, br#"="left right""#);
assert_eq!(result.1, WriterState::NodeOpen);
Ok(())
}
#[test]
fn writes_escaped_text() -> TestResult {
// Just to be sure it's not trying to escape when we say it