630 lines
17 KiB
Rust
630 lines
17 KiB
Rust
// Test XIRF representation
|
||
//
|
||
// Copyright (C) 2014-2023 Ryan Specialty, LLC.
|
||
//
|
||
// This file is part of TAME.
|
||
//
|
||
// This program is free software: you can redistribute it and/or modify
|
||
// it under the terms of the GNU General Public License as published by
|
||
// the Free Software Foundation, either version 3 of the License, or
|
||
// (at your option) any later version.
|
||
//
|
||
// This program is distributed in the hope that it will be useful,
|
||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
// GNU General Public License for more details.
|
||
//
|
||
// You should have received a copy of the GNU General Public License
|
||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
//! Integration tests for XIRF parser.
|
||
//!
|
||
//! These tests take place within the context of the XIR parsing framework,
|
||
//! so they are one layer of abstraction away from unit tests.
|
||
|
||
use std::assert_matches::assert_matches;
|
||
|
||
use super::*;
|
||
use crate::convert::ExpectInto;
|
||
use crate::parse::{FinalizeError, ParseError, Parsed};
|
||
use crate::span::dummy::*;
|
||
use crate::sym::GlobalSymbolIntern;
|
||
use crate::xir::test::{
|
||
close as xir_close, close_empty as xir_close_empty, open as xir_open,
|
||
};
|
||
use std::fmt::Debug;
|
||
|
||
/// Hastily and lazily produce a [`XirfToken::Open`].
|
||
///
|
||
/// This function is not suitable for production use as it does not produce
|
||
/// a complete [`OpenSpan`].
|
||
pub fn open<Q: TryInto<QName>, S: Into<OpenSpan>, T: TextType>(
|
||
qname: Q,
|
||
span: S,
|
||
depth: Depth,
|
||
) -> XirfToken<T>
|
||
where
|
||
<Q as TryInto<QName>>::Error: Debug,
|
||
{
|
||
XirfToken::Open(qname.unwrap_into(), span.into(), depth)
|
||
}
|
||
|
||
/// Hastily and lazily produce a [`XirfToken::Close`] for an empty tag.
|
||
///
|
||
/// This is [`close`] with the omission of the `qname` argument;
|
||
/// the type parameter `Q` cannot be inferred if the value is [`None`].
|
||
///
|
||
/// This function is not suitable for production use as it does not produce
|
||
/// a complete [`OpenSpan`].
|
||
pub fn close_empty<S: Into<CloseSpan>, T: TextType>(
|
||
span: S,
|
||
depth: Depth,
|
||
) -> XirfToken<T> {
|
||
XirfToken::Close(None, span.into(), depth)
|
||
}
|
||
|
||
/// Hastily and lazily produce a [`XirfToken::Close`].
|
||
///
|
||
/// See also [`close_empty`] if `Q` cannot be inferred.
|
||
///
|
||
/// This function is not suitable for production use as it does not produce
|
||
/// a complete [`OpenSpan`].
|
||
pub fn close<Q: TryInto<QName>, S: Into<CloseSpan>, T: TextType>(
|
||
qname: Option<Q>,
|
||
span: S,
|
||
depth: Depth,
|
||
) -> XirfToken<T>
|
||
where
|
||
<Q as TryInto<QName>>::Error: Debug,
|
||
{
|
||
XirfToken::Close(qname.map(ExpectInto::unwrap_into), span.into(), depth)
|
||
}
|
||
|
||
/// Hastily and lazily produce a [`XirfToken::Attr`].
|
||
///
|
||
/// This function is intended for testing only.
|
||
pub fn attr<Q: TryInto<QName>, T: TextType>(
|
||
qname: Q,
|
||
value: SymbolId,
|
||
spans: (Span, Span),
|
||
) -> XirfToken<T>
|
||
where
|
||
<Q as TryInto<QName>>::Error: Debug,
|
||
{
|
||
XirfToken::Attr(Attr::new(qname.unwrap_into(), value, spans))
|
||
}
|
||
|
||
#[test]
|
||
fn empty_element_self_close() {
|
||
let name = ("ns", "elem");
|
||
|
||
let toks = [xir_open(name, S1), xir_close_empty(S2)].into_iter();
|
||
|
||
let sut = parse::<1, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(open(name, S1, Depth(0))),
|
||
Parsed::Object(close_empty(S2, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
// Same as above test, but with balanced closing instead of self
|
||
// closing.
|
||
#[test]
|
||
fn empty_element_balanced_close() {
|
||
let name = ("ns", "openclose");
|
||
|
||
let toks = [xir_open(name, S1), xir_close(Some(name), S2)].into_iter();
|
||
|
||
let sut = parse::<1, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(open(name, S1, Depth(0))),
|
||
Parsed::Object(close(Some(name), S2, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
// More closing tags than opening.
|
||
//
|
||
// We cannot keep the token and throw our own error because this tag may be
|
||
// part of a parent context.
|
||
#[test]
|
||
fn extra_closing_tag() {
|
||
let name = ("ns", "openclose");
|
||
let toks = [
|
||
// We need an opening tag to actually begin document parsing.
|
||
xir_open(name, S1),
|
||
xir_close(Some(name), S2),
|
||
xir_close(Some(name), S3),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<1, Text>(toks);
|
||
|
||
assert_matches!(
|
||
sut.collect::<Result<Vec<Parsed<_>>, _>>(),
|
||
Err(ParseError::UnexpectedToken(
|
||
XirToken::Close(Some(given_name), given_span),
|
||
_
|
||
)) if given_name == name.unwrap_into() && given_span == S3.into()
|
||
);
|
||
}
|
||
|
||
// This should never happen, but let's operate in a sane way in case it ever
|
||
// does, since that's not the user's fault (that is, we shouldn't have
|
||
// gotten to XIRF).
|
||
#[test]
|
||
fn extra_self_closing_tag() {
|
||
let name = ("ns", "openclose");
|
||
let toks = [
|
||
// We need an opening tag to actually begin document parsing.
|
||
xir_open(name, S1),
|
||
xir_close_empty(S2),
|
||
xir_close_empty(S3),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<1, Text>(toks);
|
||
|
||
assert_matches!(
|
||
sut.collect::<Result<Vec<Parsed<_>>, _>>(),
|
||
Err(ParseError::UnexpectedToken(XirToken::Close(None, given_span), _))
|
||
if given_span == S3.into(),
|
||
);
|
||
}
|
||
|
||
// Unbalanced should result in error. This does not test what happens
|
||
// _after_ the error.
|
||
#[test]
|
||
fn empty_element_unbalanced_close() {
|
||
let open_name = "open".unwrap_into();
|
||
let close_name = "unbalanced_name".unwrap_into();
|
||
|
||
let toks =
|
||
[xir_open(open_name, S1), xir_close(Some(close_name), S2)].into_iter();
|
||
|
||
let mut sut = parse::<1, Text>(toks);
|
||
|
||
assert_eq!(
|
||
sut.next(),
|
||
Some(Ok(Parsed::Object(open(open_name, S1, Depth(0)))))
|
||
);
|
||
assert_eq!(
|
||
sut.next(),
|
||
Some(Err(ParseError::StateError(XirToXirfError::UnbalancedTag {
|
||
open: (open_name, S1),
|
||
close: (close_name, S2),
|
||
})))
|
||
);
|
||
}
|
||
|
||
// Testing depth value.
|
||
#[test]
|
||
fn single_empty_child() {
|
||
let name = ("ns", "openclose");
|
||
let child = "child";
|
||
|
||
let toks = [
|
||
xir_open(name, S1),
|
||
xir_open(child, S2),
|
||
xir_close_empty(S3),
|
||
xir_close(Some(name), S4),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<2, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(open(name, S1, Depth(0))),
|
||
Parsed::Object(open(child, S2, Depth(1))),
|
||
Parsed::Object(close_empty(S3, Depth(1))),
|
||
Parsed::Object(close(Some(name), S4, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn depth_exceeded() {
|
||
let name = ("ns", "openclose");
|
||
let exceed = "exceed".unwrap_into();
|
||
|
||
let toks = [
|
||
xir_open(name, S1),
|
||
// This one exceeds the max depth, ...
|
||
xir_open(exceed, S2),
|
||
]
|
||
.into_iter();
|
||
|
||
// ...which is set here: MAX_DEPTH here is 1
|
||
let mut sut = parse::<1, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Some(Ok(Parsed::Object(open(name, S1, Depth(0))))),
|
||
sut.next()
|
||
);
|
||
assert_eq!(
|
||
Some(Err(ParseError::StateError(
|
||
XirToXirfError::MaxDepthExceeded {
|
||
open: (exceed, S2),
|
||
max: Depth(1),
|
||
}
|
||
))),
|
||
sut.next()
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn empty_element_with_attrs() {
|
||
let name = ("ns", "elem");
|
||
let attr1 = "a".unwrap_into();
|
||
let attr2 = "b".unwrap_into();
|
||
let val1 = "val1".intern();
|
||
let val2 = "val2".intern();
|
||
|
||
let toks = [
|
||
xir_open(name, S1),
|
||
XirToken::AttrName(attr1, S2),
|
||
XirToken::AttrValue(val1, S3),
|
||
XirToken::AttrName(attr2, S3),
|
||
XirToken::AttrValue(val2, S4),
|
||
xir_close_empty(S4),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<2, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(open(name, S1, Depth(0))),
|
||
Parsed::Incomplete,
|
||
Parsed::Object(XirfToken::Attr(Attr::new(attr1, val1, (S2, S3)))),
|
||
Parsed::Incomplete,
|
||
Parsed::Object(XirfToken::Attr(Attr::new(attr2, val2, (S3, S4)))),
|
||
Parsed::Object(close_empty(S4, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn child_element_after_attrs() {
|
||
let name = ("ns", "elem");
|
||
let child = "child";
|
||
let attr = "a".unwrap_into();
|
||
let val = "val".intern();
|
||
|
||
let toks = [
|
||
xir_open(name, S1),
|
||
XirToken::AttrName(attr, S1),
|
||
XirToken::AttrValue(val, S2),
|
||
xir_open(child, S1),
|
||
xir_close_empty(S2),
|
||
xir_close(Some(name), S3),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<2, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(open(name, S1, Depth(0))),
|
||
Parsed::Incomplete,
|
||
Parsed::Object(XirfToken::Attr(Attr::new(attr, val, (S1, S2)))),
|
||
Parsed::Object(open(child, S1, Depth(1))),
|
||
Parsed::Object(close_empty(S2, Depth(1))),
|
||
Parsed::Object(close(Some(name), S3, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn element_with_empty_sibling_children() {
|
||
let parent = "parent";
|
||
let childa = "childa";
|
||
let childb = "childb";
|
||
|
||
let toks = [
|
||
xir_open(parent, S1),
|
||
xir_open(childa, S2),
|
||
xir_close_empty(S3),
|
||
xir_open(childb, S2),
|
||
xir_close_empty(S3),
|
||
xir_close(Some(parent), S2),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<2, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(open(parent, S1, Depth(0))),
|
||
Parsed::Object(open(childa, S2, Depth(1))),
|
||
Parsed::Object(close_empty(S3, Depth(1))),
|
||
Parsed::Object(open(childb, S2, Depth(1))),
|
||
Parsed::Object(close_empty(S3, Depth(1))),
|
||
Parsed::Object(close(Some(parent), S2, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
// Ensures that attributes do not cause the parent context to be lost.
|
||
#[test]
|
||
fn element_with_child_with_attributes() {
|
||
let parent = "parent";
|
||
let child = "child";
|
||
let attr = "attr".unwrap_into();
|
||
let value = "attr value".intern();
|
||
|
||
let toks = [
|
||
xir_open(parent, S1),
|
||
xir_open(child, S1),
|
||
XirToken::AttrName(attr, S1),
|
||
XirToken::AttrValue(value, S2),
|
||
xir_close_empty(S3),
|
||
xir_close(Some(parent), S3),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<2, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(open(parent, S1, Depth(0))),
|
||
Parsed::Object(open(child, S1, Depth(1))),
|
||
Parsed::Incomplete,
|
||
Parsed::Object(XirfToken::Attr(Attr::new(attr, value, (S1, S2)))),
|
||
Parsed::Object(close_empty(S3, Depth(1))),
|
||
Parsed::Object(close(Some(parent), S3, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn element_with_text() {
|
||
let parent = "parent";
|
||
let text = "inner text".into();
|
||
|
||
let toks = [
|
||
xir_open(parent, S1),
|
||
XirToken::Text(text, S2),
|
||
xir_close(Some(parent), S3),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<1, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(open(parent, S1, Depth(0))),
|
||
Parsed::Object(XirfToken::Text(Text(text, S2), Depth(1))),
|
||
Parsed::Object(close(Some(parent), S3, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn not_accepting_state_if_element_open() {
|
||
let name = "unclosed";
|
||
let toks = [xir_open(name, S1)].into_iter();
|
||
|
||
let mut sut = parse::<1, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Some(Ok(Parsed::Object(open(name, S1, Depth(0))))),
|
||
sut.next()
|
||
);
|
||
|
||
// Element was not closed.
|
||
assert_matches!(
|
||
sut.next(),
|
||
Some(Err(ParseError::FinalizeError(
|
||
FinalizeError::UnexpectedEof(..)
|
||
)))
|
||
);
|
||
}
|
||
|
||
// XML permits comment nodes before and after the document root element.
|
||
#[test]
|
||
fn comment_before_or_after_root_ok() {
|
||
let name = "root";
|
||
let cstart = "start comment".intern();
|
||
let cend = "end comment".intern();
|
||
|
||
let toks = [
|
||
XirToken::Comment(cstart, S1),
|
||
xir_open(name, S2),
|
||
xir_close_empty(S3),
|
||
XirToken::Comment(cend, S4),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<1, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Object(XirfToken::Comment(cstart, S1, Depth(0))),
|
||
Parsed::Object(open(name, S2, Depth(0))),
|
||
Parsed::Object(close_empty(S3, Depth(0))),
|
||
Parsed::Object(XirfToken::Comment(cend, S4, Depth(0))),
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
// Similar to above,
|
||
// but with whitespace.
|
||
#[test]
|
||
fn whitespace_before_or_after_root_ok() {
|
||
let name = "root";
|
||
let ws = " ".unwrap_into();
|
||
|
||
let toks = [
|
||
XirToken::Text(ws, S1),
|
||
xir_open(name, S2),
|
||
xir_close_empty(S3),
|
||
XirToken::Text(ws, S4),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<1, RefinedText>(toks);
|
||
|
||
assert_eq!(
|
||
Ok(vec![
|
||
Parsed::Incomplete,
|
||
Parsed::Object(open(name, S2, Depth(0))),
|
||
Parsed::Object(close_empty(S3, Depth(0))),
|
||
Parsed::Incomplete,
|
||
]),
|
||
sut.collect(),
|
||
);
|
||
}
|
||
|
||
// But there must be no content at the end of the document after the closing
|
||
// root node.
|
||
// This does not test every applicable token;
|
||
// you can easily verify the actual implementation at a glance.
|
||
//
|
||
// This is just a dead parser state,
|
||
// since it's possible for XIRF to be composed and we want to return to
|
||
// the parent parser.
|
||
#[test]
|
||
fn content_after_root_close_error() {
|
||
let name = "root".unwrap_into();
|
||
|
||
let toks = [
|
||
xir_open(name, S1),
|
||
xir_close_empty(S2),
|
||
// Document ends here
|
||
xir_open(name, S3),
|
||
]
|
||
.into_iter();
|
||
|
||
let sut = parse::<1, Text>(toks);
|
||
|
||
assert_matches!(
|
||
sut.collect(),
|
||
Result::<Vec<Parsed<_>>, _>::Err(ParseError::UnexpectedToken(
|
||
XirToken::Open(given_name, given_span),
|
||
_)) if given_name == name && given_span == S3.into()
|
||
);
|
||
}
|
||
|
||
// Non-comment nodes cannot appear before the opening root tag.
|
||
#[test]
|
||
fn content_before_root_open_error() {
|
||
let text = "foo".intern();
|
||
|
||
let toks = [XirToken::Text(text, S1)].into_iter();
|
||
|
||
let sut = parse::<1, Text>(toks);
|
||
|
||
assert_eq!(
|
||
Result::<Vec<Parsed<_>>, _>::Err(ParseError::StateError(
|
||
XirToXirfError::RootOpenExpected(XirToken::Text(text, S1))
|
||
)),
|
||
sut.collect()
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn whitespace_refinement() {
|
||
// Nothing exhaustive;
|
||
// just check some notable examples.
|
||
vec![
|
||
("".into(), true),
|
||
(" ".into(), true),
|
||
("\n".into(), true),
|
||
("\n\n\t ".into(), true),
|
||
(" foo ".into(), false),
|
||
("\n .".into(), false),
|
||
(".\n ".into(), false),
|
||
]
|
||
.into_iter()
|
||
.for_each(|(given, expected)| {
|
||
let mut sut = parse::<1, RefinedText>(
|
||
vec![xir_open("root", S1), XirToken::Text(given, S1)].into_iter(),
|
||
);
|
||
|
||
let _ = sut.next(); // discard root
|
||
|
||
match sut.next().unwrap().unwrap() {
|
||
Parsed::Object(XirfToken::Text(
|
||
RefinedText::Whitespace(Whitespace(Text(ws, span))),
|
||
Depth(1),
|
||
)) => {
|
||
assert_eq!(ws, given);
|
||
assert_eq!(span, S1);
|
||
assert!(expected == true)
|
||
}
|
||
|
||
Parsed::Object(XirfToken::Text(
|
||
RefinedText::Unrefined(Text(text, span)),
|
||
Depth(1),
|
||
)) => {
|
||
assert_eq!(text, given);
|
||
assert_eq!(span, S1);
|
||
assert!(expected == false)
|
||
}
|
||
|
||
unexpected => panic!("unexpected token: {unexpected:?}"),
|
||
}
|
||
});
|
||
}
|
||
|
||
// Basic sanity check;
|
||
// the implementation is simple enough to verify almost at a glance,
|
||
// but the attribute deconstruction with lookahead could be missed so
|
||
// it's worth just testing an example.
|
||
#[test]
|
||
fn xirf_to_xir() {
|
||
use crate::parse::Lower;
|
||
|
||
let xir_toks = vec![
|
||
XirToken::Open("a".unwrap_into(), S1.into()),
|
||
XirToken::AttrName("attr".unwrap_into(), S2),
|
||
XirToken::AttrValue("value".into(), S3),
|
||
XirToken::Comment("comment".into(), S4),
|
||
XirToken::Text("text".into(), S5),
|
||
XirToken::CData("cdata".into(), S6),
|
||
XirToken::Close(Some("a".unwrap_into()), S7.into()),
|
||
];
|
||
|
||
// This type incantation
|
||
// (a) is a sorry mess because at the time of writing the lowering
|
||
// pipeline is still in need of further abstraction; and
|
||
// (b) simply parses XIR -> XirToXirf -> XirfToXir -> XIR and asserts
|
||
// that the result is the same as what was originally provided.
|
||
//
|
||
// It really does make sense if you approach it slowly and offer it food.
|
||
assert_eq!(
|
||
Ok(xir_toks.clone().into_iter().map(Parsed::Object).collect()),
|
||
Lower::<XirToXirf<1, Text>, XirfToXir<Text>, _>::lower(
|
||
&mut parse::<1, Text>(xir_toks.into_iter()),
|
||
|out| out
|
||
.filter(|x| !matches!(x, Ok(Parsed::Incomplete)))
|
||
.collect::<Result<Vec<_>, _>>()
|
||
)
|
||
);
|
||
|
||
// The lowering pipeline above requires compatible errors.
|
||
impl From<ParseError<XirfToken<Text>, XirfToXirError>>
|
||
for ParseError<XirToken, XirToXirfError>
|
||
{
|
||
fn from(_value: ParseError<XirfToken<Text>, XirfToXirError>) -> Self {
|
||
unreachable!()
|
||
}
|
||
}
|
||
}
|