575 lines
18 KiB
Rust
575 lines
18 KiB
Rust
// XIR writer
|
|
//
|
|
// Copyright (C) 2014-2023 Ryan Specialty, LLC.
|
|
//
|
|
// This file is part of TAME.
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
//! Lower XIR stream into an XML byte stream via [`Write`].
|
|
|
|
use super::{DefaultEscaper, Escaper};
|
|
use super::{Error as XirError, QName, Token, TokenStream};
|
|
use crate::sym::GlobalSymbolResolve;
|
|
use std::io::{Error as IoError, Write};
|
|
use std::result;
|
|
|
|
pub type Result<T = WriterState> = result::Result<T, Error>;
|
|
|
|
#[derive(Debug)]
|
|
pub enum Error {
|
|
Io(IoError),
|
|
Xir(XirError),
|
|
// TODO: No String here, since then we cannot resolve inner symbols for
|
|
// display to the user
|
|
UnexpectedToken(String, WriterState),
|
|
Todo(String, WriterState),
|
|
}
|
|
|
|
impl std::fmt::Display for Error {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self {
|
|
Self::Io(e) => e.fmt(f),
|
|
Self::Xir(e) => e.fmt(f),
|
|
Self::UnexpectedToken(tok, state) => write!(
|
|
f,
|
|
"Invalid token {} at XML writer state {:?}",
|
|
tok, state
|
|
),
|
|
Self::Todo(tok, state) => write!(
|
|
f,
|
|
"Unexpected {} at XML writer state {:?}; TAMER intends to \
|
|
support this operation, but it was not yet thought to be \
|
|
needed. More development is needed to support it, but \
|
|
hopefully very little. However, if you hit this during \
|
|
normal use of TAME, then this represents a bug.",
|
|
tok, state
|
|
),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PartialEq for Error {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
match (self, other) {
|
|
(Error::Io(_), _) => false,
|
|
(Error::Xir(a), Error::Xir(b)) => a == b,
|
|
(
|
|
Error::UnexpectedToken(aa, ab),
|
|
Error::UnexpectedToken(ba, bb),
|
|
) => aa == ba && ab == bb,
|
|
(Error::Todo(aa, ab), Error::Todo(ba, bb)) => aa == ba && ab == bb,
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::error::Error for Error {
|
|
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
|
match self {
|
|
Self::Io(e) => Some(e),
|
|
Self::Xir(e) => Some(e),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<IoError> for Error {
|
|
fn from(e: IoError) -> Self {
|
|
Self::Io(e)
|
|
}
|
|
}
|
|
|
|
impl From<XirError> for Error {
|
|
fn from(e: XirError) -> Self {
|
|
Self::Xir(e)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum WriterState {
|
|
/// A node is expected to be output next.
|
|
NodeExpected,
|
|
/// A node is currently being output and has not yet been closed.
|
|
NodeOpen,
|
|
/// Cursor is adjacent to an attribute name within an element.
|
|
AttrNameAdjacent,
|
|
/// Cursor is adjacent to an attribute fragment within an element.
|
|
AttrFragmentAdjacent,
|
|
}
|
|
|
|
impl Default for WriterState {
|
|
fn default() -> Self {
|
|
Self::NodeExpected
|
|
}
|
|
}
|
|
|
|
impl WriterState {
|
|
#[inline]
|
|
fn close_tag_if_open<W: Write>(&self, sink: &mut W) -> Result<()> {
|
|
match *self {
|
|
Self::NodeOpen => Ok(sink.write_all(b">")?),
|
|
_ => Ok(()),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write an XML representation.
|
|
///
|
|
/// This trait is intended for use with [XIR](super) [`Token`] stream.
|
|
/// It uses a finate state machine (FSM),
|
|
/// where states are represented by [`WriterState`],
|
|
/// to avoid lookahead requirements.
|
|
pub trait XmlWriter<S = DefaultEscaper>
|
|
where
|
|
S: Escaper,
|
|
Self: Sized,
|
|
{
|
|
/// Write XML representation into the provided buffer.
|
|
///
|
|
/// The writer acts as a state machine to determine whether previous
|
|
/// output requires closing.
|
|
/// Each write operation takes a previous [`WriterState`],
|
|
/// and transitions to a new [`WriterState`] after performing the
|
|
/// write operation
|
|
/// (which may be the same as the previous state).
|
|
/// This returned state must be provided to the next `write` operation
|
|
/// to produce valid output.
|
|
///
|
|
/// If you have a series of writes to perform,
|
|
/// consider using an [`Iterator`] implementing [`XmlWriter`].
|
|
#[must_use = "Write operation may fail"]
|
|
fn write<W: Write>(
|
|
self,
|
|
sink: &mut W,
|
|
prev_state: WriterState,
|
|
escaper: &S,
|
|
) -> Result;
|
|
|
|
/// Allocate a new buffer and write into it,
|
|
/// returning both the new buffer and the writer state.
|
|
///
|
|
/// See [`write`](XmlWriter::write) for more information.
|
|
///
|
|
/// This is intended primarily for testing;
|
|
/// it is recommended that you use [`write`](XmlWriter::write) instead,
|
|
/// unless you _really_ need a new owned `Vec<u8>`.
|
|
#[cfg(test)]
|
|
fn write_new(
|
|
self,
|
|
prev_state: WriterState,
|
|
escaper: &S,
|
|
) -> Result<(Vec<u8>, WriterState)> {
|
|
let mut buf = Vec::<u8>::new();
|
|
let state = self.write(&mut buf, prev_state, escaper)?;
|
|
|
|
Ok((buf, state))
|
|
}
|
|
}
|
|
|
|
impl<S: Escaper> XmlWriter<S> for QName {
|
|
#[inline]
|
|
fn write<W: Write>(
|
|
self,
|
|
sink: &mut W,
|
|
prev_state: WriterState,
|
|
_escaper: &S,
|
|
) -> Result {
|
|
if let Some(prefix) = self.prefix() {
|
|
sink.write_all(prefix.lookup_str().as_bytes())?;
|
|
sink.write_all(b":")?;
|
|
}
|
|
|
|
sink.write_all(self.local_name().lookup_str().as_bytes())?;
|
|
Ok(prev_state)
|
|
}
|
|
}
|
|
|
|
impl<S: Escaper> XmlWriter<S> for &Token {
|
|
fn write<W: Write>(
|
|
self,
|
|
sink: &mut W,
|
|
prev_state: WriterState,
|
|
escaper: &S,
|
|
) -> Result {
|
|
type W = WriterState; // More concise
|
|
|
|
use Token::*;
|
|
match (self, prev_state) {
|
|
(Open(name, _), W::NodeExpected | W::NodeOpen) => {
|
|
// If a node is still open, then we are a child.
|
|
prev_state.close_tag_if_open(sink)?;
|
|
sink.write_all(b"<")?;
|
|
name.write(sink, prev_state, escaper)?;
|
|
|
|
Ok(W::NodeOpen)
|
|
}
|
|
|
|
(Close(None, _), W::NodeOpen) => {
|
|
sink.write_all(b"/>")?;
|
|
|
|
Ok(W::NodeExpected)
|
|
}
|
|
|
|
(Close(Some(name), _), W::NodeExpected | W::NodeOpen) => {
|
|
// If open, we're going to produce an element of the form
|
|
// `<foo></foo>`.
|
|
prev_state.close_tag_if_open(sink)?;
|
|
|
|
sink.write_all(b"</")?;
|
|
name.write(sink, prev_state, escaper)?;
|
|
sink.write_all(b">")?;
|
|
|
|
Ok(W::NodeExpected)
|
|
}
|
|
|
|
(AttrName(name, _), W::NodeOpen) => {
|
|
sink.write_all(b" ")?;
|
|
name.write(sink, prev_state, escaper)?;
|
|
|
|
Ok(W::AttrNameAdjacent)
|
|
}
|
|
|
|
(AttrValue(value, _), W::AttrNameAdjacent) => {
|
|
sink.write_all(b"=\"")?;
|
|
sink.write_all(escaper.escape(*value).lookup_str().as_bytes())?;
|
|
sink.write_all(b"\"")?;
|
|
|
|
Ok(W::NodeOpen)
|
|
}
|
|
|
|
(AttrValue(value, _), W::AttrFragmentAdjacent) => {
|
|
sink.write_all(escaper.escape(*value).lookup_str().as_bytes())?;
|
|
sink.write_all(b"\"")?;
|
|
|
|
Ok(W::NodeOpen)
|
|
}
|
|
|
|
(AttrValueFragment(value, _), W::AttrNameAdjacent) => {
|
|
sink.write_all(b"=\"")?;
|
|
sink.write_all(escaper.escape(*value).lookup_str().as_bytes())?;
|
|
|
|
Ok(W::AttrFragmentAdjacent)
|
|
}
|
|
|
|
(AttrValueFragment(value, _), W::AttrFragmentAdjacent) => {
|
|
sink.write_all(escaper.escape(*value).lookup_str().as_bytes())?;
|
|
|
|
Ok(W::AttrFragmentAdjacent)
|
|
}
|
|
|
|
// TODO: We have no way of knowing if text should be formatted
|
|
// as CData,
|
|
// which may also be beneficial to avoid escaping if we
|
|
// haven't yet encountered the unescaped representation.
|
|
(Text(text, _), W::NodeExpected | W::NodeOpen) => {
|
|
prev_state.close_tag_if_open(sink)?;
|
|
sink.write_all(escaper.escape(*text).lookup_str().as_bytes())?;
|
|
|
|
Ok(W::NodeExpected)
|
|
}
|
|
|
|
// XXX: While we currently only output comments that have been
|
|
// _read_ as comments,
|
|
// that will not always be the case and we must escape `--`!
|
|
(Comment(comment, _), W::NodeExpected | W::NodeOpen) => {
|
|
prev_state.close_tag_if_open(sink)?;
|
|
sink.write_all(b"<!--")?;
|
|
sink.write_all(comment.lookup_str().as_bytes())?;
|
|
sink.write_all(b"-->")?;
|
|
|
|
Ok(W::NodeExpected)
|
|
}
|
|
|
|
// As-of-yet unsupported operations that weren't needed at the
|
|
// time of writing, but were planned for in the design of Xir.
|
|
(invalid @ AttrName(_, _), W::AttrNameAdjacent) => {
|
|
Err(Error::Todo(format!("{:?}", invalid), prev_state))
|
|
}
|
|
|
|
// Everything else represents either an invalid state transition
|
|
// that would produce invalid XML, or something that we forgot
|
|
// to account for in the above error.
|
|
(invalid, _) => Err(Error::UnexpectedToken(
|
|
format!("{:?}", invalid),
|
|
prev_state,
|
|
)),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<I: TokenStream, S: Escaper> XmlWriter<S> for I {
|
|
fn write<W: Write>(
|
|
mut self,
|
|
sink: &mut W,
|
|
initial_state: WriterState,
|
|
escaper: &S,
|
|
) -> Result {
|
|
self.try_fold(initial_state, |prev_state, tok| {
|
|
tok.write(sink, prev_state, escaper)
|
|
})
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use std::borrow::Cow;
|
|
|
|
use super::*;
|
|
use crate::{
|
|
convert::ExpectInto,
|
|
span::{dummy::DUMMY_SPAN, Span},
|
|
sym::GlobalSymbolIntern,
|
|
xir::{
|
|
error::SpanlessError,
|
|
test::{close, close_empty, open},
|
|
QName,
|
|
},
|
|
};
|
|
|
|
type TestResult = std::result::Result<(), Error>;
|
|
|
|
#[derive(Debug, Default)]
|
|
struct MockEscaper {}
|
|
|
|
// Simply adds ":ESC" as a suffix to the provided byte slice.
|
|
impl Escaper for MockEscaper {
|
|
fn escape_bytes(value: &[u8]) -> Cow<[u8]> {
|
|
let mut esc = value.to_owned();
|
|
esc.extend_from_slice(b":ESC");
|
|
|
|
Cow::Owned(esc)
|
|
}
|
|
|
|
fn unescape_bytes(
|
|
_: &[u8],
|
|
) -> result::Result<Cow<[u8]>, SpanlessError> {
|
|
unreachable!("Writer should not be unescaping!")
|
|
}
|
|
}
|
|
|
|
const S: Span = DUMMY_SPAN;
|
|
|
|
#[test]
|
|
fn writes_beginning_node_tag_without_prefix() -> TestResult {
|
|
let name = QName::new_local("no-prefix".unwrap_into());
|
|
let result = open(name, S)
|
|
.write_new(Default::default(), &MockEscaper::default())?;
|
|
|
|
assert_eq!(result.0, b"<no-prefix");
|
|
assert_eq!(result.1, WriterState::NodeOpen);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn writes_beginning_node_tag_with_prefix() -> TestResult {
|
|
let name = ("prefix", "element-name");
|
|
let result = open(name, S)
|
|
.write_new(Default::default(), &MockEscaper::default())?;
|
|
|
|
assert_eq!(result.0, b"<prefix:element-name");
|
|
assert_eq!(result.1, WriterState::NodeOpen);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn closes_open_node_when_opening_another() -> TestResult {
|
|
let name = ("p", "another-element");
|
|
let result = open(name, S)
|
|
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
|
|
|
assert_eq!(result.0, b"><p:another-element");
|
|
assert_eq!(result.1, WriterState::NodeOpen);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn closes_open_node_as_empty_element() -> TestResult {
|
|
let result = close_empty(S)
|
|
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
|
|
|
assert_eq!(result.0, b"/>");
|
|
assert_eq!(result.1, WriterState::NodeExpected);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn closing_tag_when_node_expected() -> TestResult {
|
|
let name = ("a", "closed-element");
|
|
|
|
let result = close(Some(name), S)
|
|
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
|
|
|
|
assert_eq!(result.0, b"</a:closed-element>");
|
|
assert_eq!(result.1, WriterState::NodeExpected);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Does _not_ check that it's balanced. Not our job. In fact, we want
|
|
// to explicitly support outputting malformed XML.
|
|
#[test]
|
|
fn closes_open_node_with_closing_tag() -> TestResult {
|
|
let name = ("b", "closed-element");
|
|
|
|
let result = close(Some(name), S)
|
|
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
|
|
|
assert_eq!(result.0, b"></b:closed-element>");
|
|
assert_eq!(result.1, WriterState::NodeExpected);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn writes_attr_name_to_open_node() -> TestResult {
|
|
let name_ns = ("some", "attr").unwrap_into();
|
|
let name_local = "nons".unwrap_into();
|
|
|
|
// Namespace prefix
|
|
let result = Token::AttrName(name_ns, S)
|
|
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
|
assert_eq!(result.0, b" some:attr");
|
|
assert_eq!(result.1, WriterState::AttrNameAdjacent);
|
|
|
|
// No namespace prefix
|
|
let result = Token::AttrName(name_local, S)
|
|
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
|
assert_eq!(result.0, b" nons");
|
|
assert_eq!(result.1, WriterState::AttrNameAdjacent);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn writes_attr_value_when_adjacent_to_attr() -> TestResult {
|
|
let value = "test str".intern();
|
|
|
|
let result = Token::AttrValue(value, S).write_new(
|
|
WriterState::AttrNameAdjacent,
|
|
&MockEscaper::default(),
|
|
)?;
|
|
|
|
assert_eq!(result.0, br#"="test str:ESC""#);
|
|
assert_eq!(result.1, WriterState::NodeOpen);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn writes_attr_value_consisting_of_fragments() -> TestResult {
|
|
let value_left = "left".intern();
|
|
let value_mid = " mid".intern();
|
|
let value_right = " right".intern();
|
|
|
|
let result = vec![
|
|
Token::AttrValueFragment(value_left, S),
|
|
Token::AttrValueFragment(value_mid, S),
|
|
Token::AttrValue(value_right, S),
|
|
]
|
|
.into_iter()
|
|
.write_new(WriterState::AttrNameAdjacent, &MockEscaper::default())?;
|
|
|
|
assert_eq!(result.0, br#"="left:ESC mid:ESC right:ESC""#);
|
|
assert_eq!(result.1, WriterState::NodeOpen);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn writes_text() -> TestResult {
|
|
let text = "test unescaped".intern();
|
|
|
|
// When a node is expected.
|
|
let result = Token::Text(text, S)
|
|
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
|
|
assert_eq!(result.0, b"test unescaped:ESC");
|
|
assert_eq!(result.1, WriterState::NodeExpected);
|
|
|
|
// When a node is still open.
|
|
let result = Token::Text(text, S)
|
|
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
|
assert_eq!(result.0, b">test unescaped:ESC");
|
|
assert_eq!(result.1, WriterState::NodeExpected);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn writes_comment() -> TestResult {
|
|
// Just to be sure it's not trying to escape when we say it
|
|
// shouldn't, we include a character that must otherwise be escaped.
|
|
let comment = "comment > escaped".intern();
|
|
|
|
// When a node is expected.
|
|
let result = Token::Comment(comment, S)
|
|
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
|
|
assert_eq!(result.0, b"<!--comment > escaped-->");
|
|
assert_eq!(result.1, WriterState::NodeExpected);
|
|
|
|
// When a node is still open.
|
|
let result = Token::Comment(comment, S)
|
|
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
|
assert_eq!(result.0, b"><!--comment > escaped-->");
|
|
assert_eq!(result.1, WriterState::NodeExpected);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn unsupported_transition_results_in_error() -> TestResult {
|
|
assert!(matches!(
|
|
Token::AttrValue("".intern(), S).write(
|
|
&mut vec![],
|
|
WriterState::NodeExpected,
|
|
&MockEscaper::default()
|
|
),
|
|
Err(Error::UnexpectedToken(_, WriterState::NodeExpected)),
|
|
));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// The cherry on top, just to demonstrate how this will be used in
|
|
// practice.
|
|
#[test]
|
|
fn test_valid_sequence_of_tokens() -> TestResult {
|
|
let root: QName = ("r", "root").unwrap_into();
|
|
|
|
let result = vec![
|
|
open(root, S),
|
|
Token::AttrName(("an", "attr").unwrap_into(), S),
|
|
Token::AttrValue("value".intern(), S),
|
|
Token::Text("text".intern(), S),
|
|
open(("c", "child"), S),
|
|
close_empty(S),
|
|
close(Some(root), S),
|
|
]
|
|
.into_iter()
|
|
.write_new(Default::default(), &MockEscaper::default())?;
|
|
|
|
assert_eq!(
|
|
result.0,
|
|
br#"<r:root an:attr="value:ESC">text:ESC<c:child/></r:root>"#
|
|
);
|
|
assert_eq!(result.1, WriterState::NodeExpected);
|
|
|
|
Ok(())
|
|
}
|
|
}
|