455 lines
14 KiB
Rust
455 lines
14 KiB
Rust
// XIR reader tests
|
|
//
|
|
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
|
//
|
|
// This file is part of TAME.
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
use std::borrow::Cow;
|
|
|
|
use super::*;
|
|
use crate::sym::GlobalSymbolIntern;
|
|
use crate::{
|
|
convert::ExpectInto,
|
|
span::DUMMY_SPAN,
|
|
xir::{Error, Token},
|
|
};
|
|
|
|
/// These tests use [`quick_xml`] directly,
|
|
/// rather than mocking it,
|
|
/// because parsing XML isn't a simple matter and we want to be sure that
|
|
/// our assumptions of how `quick_xml` performs its parsing is accurate.
|
|
/// Consequently,
|
|
/// these act more like integration tests than unit tests.
|
|
///
|
|
/// This means that `quick_xml` breakages will break these tests,
|
|
/// and that is (unlike with unit tests) exactly what we want to happen
|
|
/// here;
|
|
/// we _complement_ the behavior of quick-xml,
|
|
/// both by reimplementing certain functionality
|
|
/// (like namespace management)
|
|
/// and by relying on certain parsing behavior to eliminate
|
|
/// redundant checks.
|
|
|
|
type Sut<'a, B, S> = XmlXirReader<'a, B, S>;
|
|
|
|
#[derive(Debug, Default)]
|
|
struct MockEscaper {}
|
|
|
|
// Simply adds ":UNESC" as a suffix to the provided byte slice.
|
|
impl Escaper for MockEscaper {
|
|
fn escape_bytes(_: &[u8]) -> Cow<[u8]> {
|
|
unreachable!("Reader should not be escaping!")
|
|
}
|
|
|
|
fn unescape_bytes(value: &[u8]) -> result::Result<Cow<[u8]>, Error> {
|
|
let mut unesc = value.to_owned();
|
|
unesc.extend_from_slice(b":UNESC");
|
|
|
|
Ok(Cow::Owned(unesc))
|
|
}
|
|
}
|
|
|
|
/// A byte that will be invalid provided that there is either no following
|
|
/// UTF-8 byte,
|
|
/// or if it's followed by another byte that is invalid in that
|
|
/// position.
|
|
const INVALID_UTF8_BYTE: u8 = 0b11000000u8;
|
|
|
|
// SAFETY: We want an invalid UTF-8 str for tests.
|
|
// (We can use raw bytes and avoid `unsafe`,
|
|
// but this is more convenient.)
|
|
const INVALID_STR: &str =
|
|
unsafe { std::str::from_utf8_unchecked(&[INVALID_UTF8_BYTE]) };
|
|
|
|
macro_rules! new_sut {
|
|
($sut:ident = $data:expr) => {
|
|
new_sut!(b $sut = $data.as_bytes())
|
|
};
|
|
|
|
(b $sut:ident = $data:expr) => {
|
|
let escaper = MockEscaper::default();
|
|
let $sut = Sut::new($data, &escaper);
|
|
};
|
|
}
|
|
|
|
#[test]
|
|
fn empty_node_without_prefix_or_attributes() {
|
|
new_sut!(sut = "<empty-node />");
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("empty-node".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
// Resolving namespaces is not the concern of XIR.
|
|
#[test]
|
|
fn does_not_resolve_xmlns() {
|
|
new_sut!(sut = r#"<no-ns xmlns="noresolve" />"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("no-ns".unwrap_into(), DUMMY_SPAN),
|
|
// Since we didn't parse @xmlns, it's still an attribute.
|
|
Token::AttrName("xmlns".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrValue("noresolve:UNESC".intern(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
// Resolving namespaces is not the concern of XIR.
|
|
#[test]
|
|
fn empty_node_with_prefix_without_attributes_unresolved() {
|
|
new_sut!(sut = r#"<x:empty-node xmlns:x="noresolve" />"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
// Should be the QName, _unresolved_.
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open(("x", "empty-node").unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrName(("xmlns", "x").unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrValue("noresolve:UNESC".intern(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
// TODO: Enough information for error recovery and reporting.
|
|
#[test]
|
|
fn prefix_with_empty_local_name_invalid_qname() {
|
|
// No local name (trailing colon).
|
|
new_sut!(sut = r#"<x: xmlns:x="testns" />"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
match result {
|
|
Ok(_) => panic!("expected failure"),
|
|
Err(given) => {
|
|
assert_eq!(Error::InvalidQName("x:".into()), given);
|
|
}
|
|
}
|
|
}
|
|
|
|
// The order of attributes must be retained.
|
|
#[test]
|
|
fn multiple_attrs_ordered() {
|
|
new_sut!(sut = r#"<ele foo="a" bar="b" b:baz="c" />"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("ele".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrName("foo".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrValue("a:UNESC".intern(), DUMMY_SPAN),
|
|
Token::AttrName("bar".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrValue("b:UNESC".intern(), DUMMY_SPAN),
|
|
Token::AttrName(("b", "baz").unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrValue("c:UNESC".intern(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
// Contrary to the specification, but this is the responsibility of XIRT; we
|
|
// need to allow it to support e.g. recovery, code formatting, and LSPs.
|
|
#[test]
|
|
fn permits_duplicate_attrs() {
|
|
new_sut!(sut = r#"<dup attr="a" attr="b" />"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("dup".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrName("attr".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrValue("a:UNESC".intern(), DUMMY_SPAN),
|
|
Token::AttrName("attr".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrValue("b:UNESC".intern(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn child_node_self_closing() {
|
|
new_sut!(sut = r#"<root><child /></root>"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sibling_nodes() {
|
|
new_sut!(sut = r#"<root><child /><child /></root>"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn child_node_with_attrs() {
|
|
new_sut!(sut = r#"<root><child foo="bar" /></root>"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrName("foo".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrValue("bar:UNESC".intern(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn child_text() {
|
|
new_sut!(sut = r#"<text>foo bar</text>"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Text("foo bar:UNESC".into(), DUMMY_SPAN),
|
|
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn mixed_child_content() {
|
|
new_sut!(sut = r#"<text>foo<em>bar</em></text>"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Text("foo:UNESC".into(), DUMMY_SPAN),
|
|
Token::Open("em".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Text("bar:UNESC".into(), DUMMY_SPAN),
|
|
Token::Close(Some("em".unwrap_into()), DUMMY_SPAN),
|
|
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
// This is how XML is typically written; people don't perceive it as mixed,
|
|
// even though it is. This intentionally adds newlines before and after the
|
|
// opening and closing tags of the root node.
|
|
#[test]
|
|
fn mixed_child_content_with_newlines() {
|
|
new_sut!(
|
|
sut = r#"
|
|
<root>
|
|
<child />
|
|
</root>
|
|
"#
|
|
);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
|
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Text("\n :UNESC".into(), DUMMY_SPAN),
|
|
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
|
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
|
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn comment() {
|
|
new_sut!(sut = r#"<!--root--><root><!--<child>--></root>"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Comment("root".into(), DUMMY_SPAN),
|
|
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Comment("<child>".into(), DUMMY_SPAN),
|
|
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn comment_multiline() {
|
|
new_sut!(
|
|
sut = r#"<mult><!--comment
|
|
on multiple
|
|
lines-->
|
|
</mult>"#
|
|
);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("mult".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Comment("comment\non multiple\nlines".into(), DUMMY_SPAN),
|
|
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
|
Token::Close(Some("mult".unwrap_into()), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
// XIRT handles mismatch errors; XIR must explicitly support them.
|
|
#[test]
|
|
fn permits_mismatched_tags() {
|
|
new_sut!(sut = r#"<root><child /></mismatch>"#);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
assert_eq!(
|
|
result.expect("parsing failed"),
|
|
vec![
|
|
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
|
Token::AttrEnd(DUMMY_SPAN),
|
|
Token::Close(None, DUMMY_SPAN),
|
|
Token::Close(Some("mismatch".unwrap_into()), DUMMY_SPAN),
|
|
],
|
|
);
|
|
}
|
|
|
|
// TODO: Enough information for error recovery and reporting.
|
|
#[test]
|
|
fn node_name_invalid_utf8() {
|
|
let bytes: &[u8] = &[b'<', INVALID_UTF8_BYTE, b'/', b'>'];
|
|
new_sut!(b sut = bytes);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
match result {
|
|
Ok(_) => panic!("expected failure"),
|
|
Err(Error::InvalidUtf8(_, bytes)) => {
|
|
assert_eq!(bytes, &[INVALID_UTF8_BYTE]);
|
|
}
|
|
_ => panic!("unexpected failure"),
|
|
}
|
|
}
|
|
|
|
// TODO: Enough information for error recovery and reporting.
|
|
#[test]
|
|
fn attr_name_invalid_utf8() {
|
|
let mut s = String::from("<a ");
|
|
s.push_str(INVALID_STR);
|
|
s.push_str(r#"="value"/>"#);
|
|
|
|
new_sut!(sut = s);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
match result {
|
|
Ok(_) => panic!("expected failure"),
|
|
Err(Error::InvalidUtf8(_, bytes)) => {
|
|
assert_eq!(bytes, &[INVALID_UTF8_BYTE]);
|
|
}
|
|
_ => panic!("unexpected failure"),
|
|
}
|
|
}
|
|
|
|
// TODO: Enough information for error recovery and reporting.
|
|
#[test]
|
|
fn attr_value_invalid_utf8() {
|
|
let mut s = String::from(r#"<a attr="bad"#);
|
|
s.push_str(INVALID_STR);
|
|
s.push_str(r#""/>"#);
|
|
|
|
new_sut!(sut = s);
|
|
|
|
let result = sut.collect::<Result<Vec<_>>>();
|
|
|
|
match result {
|
|
Ok(_) => panic!("expected failure"),
|
|
Err(Error::InvalidUtf8(_, bytes)) => {
|
|
// Doesn't make it to the Escaper.
|
|
assert_eq!(bytes, &[b'b', b'a', b'd', INVALID_UTF8_BYTE]);
|
|
}
|
|
_ => panic!("unexpected failure"),
|
|
}
|
|
}
|