tamer: xir: Introduce {Ele,Open,Close}Span

This isn't conceptally all that significant of a change, but there was a lot
of modify to get it working.  I would generally separate this into a commit
for the implementation and another commit for the integration, but I decided
to keep things together.

This serves a role similar to AttrSpan---this allows deriving a span
representing the element name from a span representing the entire XIR
token.  This will provide more useful context for errors---including the tag
delimiter(s) means that we care about the fact that an element is in that
position (as opposed to some other type of node) within the context of an
error.  However, if we are expecting an element but take issue with the
element name itself, we want to place emphasis on that instead.

This also starts to consider the issue of span contexts---a blob of detached
data that is `Span` is useful for error context, but it's not useful for
manipulation or deriving additional information.  For that, we need to
encode additional context, and this is an attempt at that.

I am interested in the concept of providing Spans that are guaranteed to
actually make sense---that are instantiated and manipulated with APIs that
ensure consistency.  But such a thing buys us very little, practically
speaking, over what I have now for TAMER, and so I don't expect to actually
implement that for this project; I'll leave that for a personal
project.  TAMER's already take a lot of my personal interests and it can
cause me a lot of grief sometimes (with regards to letting my aspirations
cause me more work).

DEV-7145
main
Mike Gerwitz 2022-06-24 13:51:49 -04:00
parent 873e5fc761
commit c671bf6a9c
17 changed files with 767 additions and 424 deletions

View File

@ -138,7 +138,7 @@ mod writer {
Writer as QuickXmlWriter,
};
use std::borrow::Cow;
use tamer::xir::{writer::XmlWriter, Escaper};
use tamer::xir::{writer::XmlWriter, CloseSpan, Escaper, OpenSpan};
use tamer::{span::Span, xir::DefaultEscaper};
const FRAGMENT: &str = r#"<fragment>
@ -214,12 +214,12 @@ This is pretend fragment text. We need a lot of it.</fragment>
bench.iter(|| {
(0..1000).for_each(|_| {
vec![
Token::Open(name, span),
Token::Open(name, OpenSpan::without_name_span(span)),
Token::AttrName(attr1, span),
Token::AttrValue(val1.into(), span),
Token::AttrName(attr2, span),
Token::AttrValue(val2.into(), span),
Token::Close(None, span),
Token::Close(None, CloseSpan::empty(span)),
]
.into_iter()
.write(&mut buf, Default::default(), &escaper)

View File

@ -36,7 +36,7 @@ use crate::{
xir::{
iter::{elem_wrap, ElemWrapIter},
st::qname::*,
QName, Token,
CloseSpan, OpenSpan, QName, Token,
},
};
use arrayvec::ArrayVec;
@ -126,7 +126,7 @@ impl<'a> DepListIter<'a> {
),
}
}).and_then(|(sym, kind, src)| {
self.toks.push(Token::Close(None, LSPAN));
self.toks.push(Token::Close(None, CloseSpan::empty(LSPAN)));
self.toks_push_attr(QN_DESC, src.desc);
self.toks_push_attr(QN_YIELDS, src.yields);
@ -148,7 +148,7 @@ impl<'a> DepListIter<'a> {
self.toks_push_attr(QN_NAME, Some(sym));
self.toks_push_obj_attrs(kind);
Some(Token::Open(QN_P_SYM, LSPAN))
Some(Token::Open(QN_P_SYM, OpenSpan::without_name_span(LSPAN)))
})
}
@ -240,12 +240,12 @@ impl MapFromsIter {
#[inline]
fn refill_toks(&mut self) -> Option<Token> {
self.iter.next().and_then(|from| {
self.toks.push(Token::Close(None, LSPAN));
self.toks.push(Token::Close(None, CloseSpan::empty(LSPAN)));
self.toks.push(Token::AttrValue(from, LSPAN));
self.toks.push(Token::AttrName(QN_NAME, LSPAN));
Some(Token::Open(QN_L_FROM, LSPAN))
Some(Token::Open(QN_L_FROM, OpenSpan::without_name_span(LSPAN)))
})
}
}

View File

@ -58,7 +58,7 @@ fn test_produces_header() -> TestResult {
})
.collect::<Vec<Token>>();
assert_eq!(Token::Open(QN_PACKAGE, LSPAN), result[0]);
assert_eq!(Token::Open(QN_PACKAGE, LSPAN.into()), result[0]);
Ok(())
}
@ -69,7 +69,7 @@ fn test_closes_package() -> TestResult {
let result = lower_iter(empty, "foo".intern(), "relroot".intern()).last();
assert_eq!(Some(Token::Close(Some(QN_PACKAGE), LSPAN)), result);
assert_eq!(Some(Token::Close(Some(QN_PACKAGE), LSPAN.into())), result);
Ok(())
}

View File

@ -78,9 +78,6 @@ pub mod global;
#[macro_use]
extern crate static_assertions;
#[cfg(test)]
#[macro_use]
extern crate lazy_static;
#[macro_use]
pub mod xir;

View File

@ -33,7 +33,7 @@ use crate::{
attr::{Attr, AttrSpan},
flat::XirfToken as Xirf,
st::qname::*,
QName,
EleSpan, QName,
},
};
@ -186,7 +186,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
match (self, tok) {
(Ready, Xirf::Open(QN_LV_PACKAGE | QN_PACKAGE, span, ..)) => {
Transition(Package(span)).incomplete()
Transition(Package(span.tag_span())).incomplete()
}
(Ready, tok) => {
@ -215,7 +215,8 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
(Package(_), Xirf::Close(..)) => Transition(Done).incomplete(),
(Package(_), Xirf::Open(QN_SYMTABLE, span, ..)) => {
Transition(Symtable(span, SS::default())).incomplete()
Transition(Symtable(span.tag_span(), SS::default()))
.incomplete()
}
(Symtable(_, ss), Xirf::Close(Some(QN_SYMTABLE), ..))
@ -231,7 +232,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
}
(SymDepsExpected, Xirf::Open(QN_SYM_DEPS, span, _)) => {
Transition(SymDeps(span, SD::default())).incomplete()
Transition(SymDeps(span.tag_span(), SD::default())).incomplete()
}
(SymDeps(_, sd), Xirf::Close(None | Some(QN_SYM_DEPS), ..))
@ -245,13 +246,16 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
}
(FragmentsExpected, Xirf::Open(QN_FRAGMENTS, span, _)) => {
Transition(Fragments(span, SF::default())).incomplete()
Transition(Fragments(span.tag_span(), SF::default()))
.incomplete()
}
(
Fragments(_, sf),
Xirf::Close(None | Some(QN_FRAGMENTS), span, _),
) if sf.is_accepting() => Transition(Eoh).ok(XmloToken::Eoh(span)),
) if sf.is_accepting() => {
Transition(Eoh).ok(XmloToken::Eoh(span.tag_span()))
}
(Fragments(span, sf), tok) => {
sf.delegate(ctx, tok, |sf| Fragments(span, sf))
@ -332,7 +336,8 @@ impl ParseState for SymtableState {
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
(Ready, Xirf::Open(QN_SYM, span, _)) => {
Transition(Sym(span, None, SymAttrs::default())).incomplete()
Transition(Sym(span.tag_span(), None, SymAttrs::default()))
.incomplete()
}
(Sym(span, None, attrs), Xirf::Close(..)) => {
@ -365,8 +370,13 @@ impl ParseState for SymtableState {
) if attrs.ty == Some(SymType::Map)
|| attrs.ty == Some(SymType::RetMap) =>
{
Transition(SymMapFrom(span_sym, name, attrs, span_from))
.incomplete()
Transition(SymMapFrom(
span_sym,
name,
attrs,
span_from.tag_span(),
))
.incomplete()
}
(
@ -395,9 +405,8 @@ impl ParseState for SymtableState {
(
Sym(span_sym, Some(name), attrs),
Xirf::Open(QN_SYM_REF, span_ref, _),
) => {
Transition(SymRef(span_sym, name, attrs, span_ref)).incomplete()
}
) => Transition(SymRef(span_sym, name, attrs, span_ref.tag_span()))
.incomplete(),
(SymRef(span_sym, name, attrs, _), Xirf::Close(..)) => {
Transition(Sym(span_sym, Some(name), attrs)).incomplete()
@ -613,7 +622,7 @@ impl ParseState for SymDepsState {
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
(Ready, Xirf::Open(QN_SYM_DEP, span, _)) => {
Transition(SymUnnamed(span)).incomplete()
Transition(SymUnnamed(span.tag_span())).incomplete()
}
(SymUnnamed(span), Xirf::Attr(Attr(QN_NAME, name, _))) => {
@ -625,7 +634,8 @@ impl ParseState for SymDepsState {
.err(XmloError::UnassociatedSymDep(span)),
(Sym(span, name), Xirf::Open(QN_SYM_REF, span_ref, _)) => {
Transition(SymRefUnnamed(span, name, span_ref)).incomplete()
Transition(SymRefUnnamed(span, name, span_ref.tag_span()))
.incomplete()
}
(
@ -723,7 +733,7 @@ impl ParseState for FragmentsState {
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
(Ready, Xirf::Open(QN_FRAGMENT, span, _)) => {
Transition(FragmentUnnamed(span)).incomplete()
Transition(FragmentUnnamed(span.tag_span())).incomplete()
}
// TODO: For whitespace, which can be stripped by XIRF.

View File

@ -29,7 +29,10 @@ use crate::{
sym::GlobalSymbolIntern,
xir::{
attr::Attr,
flat::{Depth, XirfToken as Xirf},
flat::{
test::{close, close_empty, open},
Depth, XirfToken as Xirf,
},
QName,
},
};
@ -44,8 +47,7 @@ type Sut = XmloReader;
#[test]
fn fails_on_invalid_root() {
let tok =
Xirf::Open("not-a-valid-package-node".unwrap_into(), S1, Depth(0));
let tok = open("not-a-valid-package-node", S1, Depth(0));
let mut sut = Sut::parse([tok.clone()].into_iter());
@ -61,7 +63,7 @@ fn common_parses_package_attrs(package: QName) {
let elig = "elig-class-yields".into();
let toks = [
Xirf::Open(package, S1, Depth(0)),
open(package, S1, Depth(0)),
Xirf::Attr(Attr::new("name".unwrap_into(), name, (S2, S3))),
Xirf::Attr(Attr::new("__rootpath".unwrap_into(), relroot, (S2, S3))),
Xirf::Attr(Attr::new(
@ -74,7 +76,7 @@ fn common_parses_package_attrs(package: QName) {
elig,
(S3, S4),
)),
Xirf::Close(Some(package), S2, Depth(0)),
close(Some(package), S2, Depth(0)),
]
.into_iter();
@ -116,11 +118,11 @@ fn ignores_unknown_package_attr() {
let name = "pkgroot".into();
let toks = [
Xirf::Open(QN_PACKAGE, S1, Depth(0)),
open(QN_PACKAGE, S1, Depth(0)),
Xirf::Attr(Attr::new("name".unwrap_into(), name, (S2, S3))),
// This is ignored.
Xirf::Attr(Attr::new("unknown".unwrap_into(), name, (S2, S3))),
Xirf::Close(Some(QN_PACKAGE), S2, Depth(0)),
close(Some(QN_PACKAGE), S2, Depth(0)),
]
.into_iter();
@ -140,9 +142,9 @@ fn ignores_unknown_package_attr() {
#[test]
fn symtable_err_missing_sym_name() {
let toks = [
Xirf::Open(QN_SYM, S1, Depth(0)),
open(QN_SYM, S1, Depth(0)),
// No attributes, but importantly, no name.
Xirf::Close(Some(QN_SYMTABLE), S2, Depth(0)),
close(Some(QN_SYMTABLE), S2, Depth(0)),
]
.into_iter();
@ -169,7 +171,7 @@ macro_rules! symtable_tests {
let name = stringify!($name).intern();
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
$(
Xirf::Attr(Attr(
@ -178,7 +180,7 @@ macro_rules! symtable_tests {
AttrSpan(S3, SATTRVAL)
)),
)*
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
close(Some(QN_SYM), S2, Depth(0)),
]
.into_iter();
@ -328,14 +330,14 @@ fn symtable_sym_generated_true() {
let name = "generated_true".into();
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(
("preproc", "generated").unwrap_into(),
raw::L_TRUE,
AttrSpan(S3, S4),
)),
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
close(Some(QN_SYM), S2, Depth(0)),
]
.into_iter();
@ -363,15 +365,15 @@ fn symtable_map_from() {
let map_from = "from-a".into();
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
Xirf::Open(QN_FROM, S2, Depth(1)),
open(QN_FROM, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, map_from, AttrSpan(S2, S3))),
Xirf::Close(None, S4, Depth(1)),
close_empty(S4, Depth(1)),
// />
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
close(Some(QN_SYM), S2, Depth(0)),
]
.into_iter();
@ -400,15 +402,15 @@ fn symtable_map_from_missing_name() {
let name = "sym-map-from-missing".into();
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
Xirf::Open(QN_FROM, S2, Depth(1)),
open(QN_FROM, S2, Depth(1)),
// @name missing
Xirf::Close(None, S4, Depth(1)),
close_empty(S4, Depth(1)),
// />
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
close(Some(QN_SYM), S2, Depth(0)),
]
.into_iter();
@ -425,20 +427,20 @@ fn symtable_map_from_multiple() {
let name = "sym-map-from-missing".into();
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
Xirf::Open(QN_FROM, S2, Depth(1)),
open(QN_FROM, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, "ok".into(), AttrSpan(S2, S3))),
Xirf::Close(None, S4, Depth(1)),
close_empty(S4, Depth(1)),
// />
// <preproc:from> again (err)
Xirf::Open(QN_FROM, S3, Depth(1)),
open(QN_FROM, S3, Depth(1)),
Xirf::Attr(Attr(QN_NAME, "bad".into(), AttrSpan(S2, S3))),
Xirf::Close(None, S4, Depth(1)),
close_empty(S4, Depth(1)),
// />
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
close(Some(QN_SYM), S2, Depth(0)),
]
.into_iter();
@ -456,19 +458,19 @@ fn sym_dep_event() {
let dep2 = "dep2".into();
let toks = [
Xirf::Open(QN_SYM_DEP, S1, Depth(0)),
open(QN_SYM_DEP, S1, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
// <preproc:sym-ref
Xirf::Open(QN_SYM_REF, S2, Depth(1)),
open(QN_SYM_REF, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, dep1, AttrSpan(S3, S4))),
Xirf::Close(None, S4, Depth(1)),
close_empty(S4, Depth(1)),
// />
// <preproc:sym-ref
Xirf::Open(QN_SYM_REF, S3, Depth(1)),
open(QN_SYM_REF, S3, Depth(1)),
Xirf::Attr(Attr(QN_NAME, dep2, AttrSpan(S4, S5))),
Xirf::Close(None, S4, Depth(1)),
close_empty(S4, Depth(1)),
// />
Xirf::Close(Some(QN_SYM_DEP), S5, Depth(0)),
close(Some(QN_SYM_DEP), S5, Depth(0)),
]
.into_iter();
@ -491,9 +493,9 @@ fn sym_dep_event() {
#[test]
fn sym_dep_missing_name() {
let toks = [
Xirf::Open(QN_SYM_DEP, S1, Depth(0)),
open(QN_SYM_DEP, S1, Depth(0)),
// missing @name, causes error
Xirf::Open(QN_SYM_REF, S2, Depth(1)),
open(QN_SYM_REF, S2, Depth(1)),
]
.into_iter();
@ -509,11 +511,11 @@ fn sym_ref_missing_name() {
let name = "depsym".into();
let toks = [
Xirf::Open(QN_SYM_DEP, S1, Depth(0)),
open(QN_SYM_DEP, S1, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Open(QN_SYM_REF, S2, Depth(1)),
open(QN_SYM_REF, S2, Depth(1)),
// missing @name, causes error
Xirf::Close(None, S3, Depth(1)),
close_empty(S3, Depth(1)),
]
.into_iter();
@ -533,15 +535,15 @@ fn sym_fragment_event() {
let toks = [
// first
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
open(QN_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id1, AttrSpan(S2, S3))),
Xirf::Text(frag1, S4),
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
close(Some(QN_FRAGMENT), S5, Depth(0)),
// second
Xirf::Open(QN_FRAGMENT, S2, Depth(0)),
open(QN_FRAGMENT, S2, Depth(0)),
Xirf::Attr(Attr(QN_ID, id2, AttrSpan(S3, S4))),
Xirf::Text(frag2, S5),
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
close(Some(QN_FRAGMENT), S5, Depth(0)),
]
.into_iter();
@ -563,7 +565,7 @@ fn sym_fragment_event() {
#[test]
fn sym_fragment_missing_id() {
let toks = [
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
open(QN_FRAGMENT, S1, Depth(0)),
// missing @id
Xirf::Text("text".into(), S4),
]
@ -580,7 +582,7 @@ fn sym_fragment_missing_id() {
#[test]
fn sym_fragment_empty_id() {
let toks = [
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
open(QN_FRAGMENT, S1, Depth(0)),
// empty @id
Xirf::Attr(Attr(QN_ID, "".into(), AttrSpan(S3, S4))),
Xirf::Text("text".into(), S4),
@ -601,10 +603,10 @@ fn _sym_fragment_missing_text() {
let id = "fragsym".into();
let toks = [
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
open(QN_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id, AttrSpan(S3, S4))),
// missing text
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
close(Some(QN_FRAGMENT), S5, Depth(0)),
]
.into_iter();
@ -629,34 +631,34 @@ fn xmlo_composite_parsers_header() {
let frag = "fragment text".into();
let toks_header = [
Xirf::Open(QN_PACKAGE, S1, Depth(0)),
open(QN_PACKAGE, S1, Depth(0)),
// <preproc:symtable>
Xirf::Open(QN_SYMTABLE, S2, Depth(1)),
open(QN_SYMTABLE, S2, Depth(1)),
// <preproc:sym
Xirf::Open(QN_SYM, S3, Depth(2)),
open(QN_SYM, S3, Depth(2)),
Xirf::Attr(Attr(QN_NAME, sym_name, AttrSpan(S2, S3))),
Xirf::Close(None, S4, Depth(2)),
close_empty(S4, Depth(2)),
// />
Xirf::Close(Some(QN_SYMTABLE), S4, Depth(1)),
close(Some(QN_SYMTABLE), S4, Depth(1)),
// </preproc:symtable>
// <preproc:sym-deps>
Xirf::Open(QN_SYM_DEPS, S2, Depth(1)),
open(QN_SYM_DEPS, S2, Depth(1)),
// <preproc:sym-dep
Xirf::Open(QN_SYM_DEP, S3, Depth(3)),
open(QN_SYM_DEP, S3, Depth(3)),
Xirf::Attr(Attr(QN_NAME, symdep_name, AttrSpan(S2, S3))),
Xirf::Close(Some(QN_SYM_DEP), S4, Depth(3)),
close(Some(QN_SYM_DEP), S4, Depth(3)),
// </preproc:sym-dep>
Xirf::Close(Some(QN_SYM_DEPS), S3, Depth(1)),
close(Some(QN_SYM_DEPS), S3, Depth(1)),
// </preproc:sym-deps>
// <preproc:fragments>
Xirf::Open(QN_FRAGMENTS, S2, Depth(1)),
open(QN_FRAGMENTS, S2, Depth(1)),
// <preproc:fragment
Xirf::Open(QN_FRAGMENT, S4, Depth(2)),
open(QN_FRAGMENT, S4, Depth(2)),
Xirf::Attr(Attr(QN_ID, symfrag_id, AttrSpan(S2, S3))),
Xirf::Text(frag, S5),
Xirf::Close(Some(QN_FRAGMENT), S4, Depth(2)),
close(Some(QN_FRAGMENT), S4, Depth(2)),
// </preproc:fragment>
Xirf::Close(Some(QN_FRAGMENTS), S3, Depth(1)),
close(Some(QN_FRAGMENTS), S3, Depth(1)),
// </preproc:fragments>
// No closing root node:
// ensure that we can just end at the header without parsing further.

View File

@ -28,8 +28,12 @@
//!
//! To parse an entire XML document,
//! see [`reader`].
//!
//! _Note:_ XIR refers to "opening" and "closing" tags,
//! as opposed to "start" and "end" as used in the XML specification.
//! TAMER uses a uniform terminology for all delimited data.
use crate::span::Span;
use crate::span::{Span, SpanLenSize};
use crate::sym::{
st_as_sym, GlobalSymbolIntern, GlobalSymbolInternBytes, SymbolId,
};
@ -396,6 +400,162 @@ impl Display for QName {
}
}
/// A span representing an opening (starting) element tag.
///
/// See [`EleSpan`] for more information.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct OpenSpan(Span, EleNameLen);
impl OpenSpan {
pub fn without_name_span(span: Span) -> Self {
Self(span, 0)
}
}
/// A span representing a closing (ending) element tag.
///
/// See [`EleSpan`] for more information.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct CloseSpan(Span, EleNameLen);
impl CloseSpan {
/// A [`CloseSpan`] representing the closing of an empty tag.
///
/// This type of span has no element name.
pub fn empty(span: Span) -> Self {
Self::without_name_span(span)
}
pub fn without_name_span(span: Span) -> Self {
Self(span, 0)
}
}
/// Number of bytes of whitespace following an element name in
/// [`EleSpan`].
pub type EleNameLen = SpanLenSize;
/// Spans associated with an element opening or closing tag.
///
/// The diagram below illustrates the behavior of [`EleSpan`].
/// Spans are represented by `[---]` intervals,
/// with the byte offset at each end,
/// and the single-letter span name centered below the interval.
///
/// ```text
/// <open > <open ...> </close > <empty ' />
/// |[--] | |[--] | [---] | |[---] ' []
/// |1 4 | |1 4 | 2 6 | |1 5 ' 9`10
/// | N | | N | | N | | N | ' T
/// | | | | | | | | '
/// [------] [---] [--------] [----] '
/// 0 7 0 4 0 9 0 5 '
/// T T T T '
/// ```
///
/// Above we have
///
/// - `T` = [`EleSpan::span`]; and
/// - `N` = [`EleSpan::name_span`].
///
/// The purpose of the `T` span is to represent the entire token that has
/// been emitted by XIR.
/// If an opening tag does not contain any attributes,
/// then `T` represents the entire opening tag with both the opening and
/// closing angle brackets.
/// If an opening tag is expected to contain attributes,
/// then only the opening angle bracket is included.
/// A closing tag is entirely contained by `T`.
///
/// The empty tag is separated into two tokens in XIR---a
/// [`Token::Open`] and a [`Token::Close`] with a [`None`] for the name.
/// Unlike a typical closing tag,
/// there is no `N` span available for the closing token,
/// and so requesting one via [`EleSpan::name_span`] will simply
/// return the `T` span,
/// rather than complicating the API with an [`Option`].
/// It is generally assumed that reporting on element names will occur
/// within the context of the _opening_ tag.
///
/// The tag may contain whitespace following the element name,
/// as permitted by `STag` and `ETag` in the
/// [XML specification][xmlspec-tag].
///
/// [xmlspec-tag]: https://www.w3.org/TR/xml/#dt-stag
pub trait EleSpan {
/// A [`Span`] encompassing the entire opening element token.
///
/// Note that what exactly this token represents varies.
fn span(&self) -> Span;
/// Span representing the relevant portion of the element tag.
///
/// This is a more descriptive alias of [`EleSpan::span`] that may be
/// appropriate in certain contexts.
fn tag_span(&self) -> Span {
self.span()
}
/// A [`Span`] representing only the element name,
/// if available.
///
/// An element name is _not_ available for empty tags.
/// Rather than complicating the API with [`Option`],
/// [`EleSpan::span`] is returned instead.
fn name_span(&self) -> Span;
}
impl EleSpan for OpenSpan {
fn span(&self) -> Span {
match self {
Self(t, _) => *t,
}
}
fn name_span(&self) -> Span {
match self {
// <open ...>
// ^^^^ offset '<' and length of name
//
// If the length is 0,
// then this will result in a 0-length span at the location
// that the element name ought to be,
// and so the resulting span will still be useful.
// This should not happen for tokens read using XIR,
// but may happen for system-generated tokens.
Self(t, name_len) => {
t.context().span(t.offset().saturating_add(1), *name_len)
}
}
}
}
impl EleSpan for CloseSpan {
fn span(&self) -> Span {
match self {
Self(t, _) => *t,
}
}
fn name_span(&self) -> Span {
match self {
// If the length of the element name is 0,
// then this must be an empty tag,
// which contains no independent element name.
//
// <foo ' />
// ' ^^
Self(_t, 0) => self.span(),
// </close >
// ^^^^^ offset '</' and length of name
Self(t, name_len) => {
t.context().span(t.offset().saturating_add(2), *name_len)
}
}
}
}
/// Lightly-structured XML tokens with associated [`Span`]s.
///
/// This is a streamable IR for XML.
@ -406,7 +566,7 @@ impl Display for QName {
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Token {
/// Opening tag of an element.
Open(QName, Span),
Open(QName, OpenSpan),
/// Closing tag of an element.
///
@ -430,7 +590,7 @@ pub enum Token {
/// given especially that bindings after `@` in patterns have not
/// yet been stabalized at the time of writing (but are very
/// close!).
Close(Option<QName>, Span),
Close(Option<QName>, CloseSpan),
/// Element attribute name.
AttrName(QName, Span),
@ -518,8 +678,8 @@ impl crate::parse::Token for Token {
use Token::*;
match self {
Open(_, span)
| Close(_, span)
Open(_, OpenSpan(span, _))
| Close(_, CloseSpan(span, _))
| AttrName(_, span)
| AttrValue(_, span)
| AttrValueFragment(_, span)
@ -534,16 +694,68 @@ impl crate::parse::Token for Token {
impl crate::parse::Object for Token {}
#[cfg(test)]
mod test {
pub mod test {
use super::*;
use crate::convert::ExpectInto;
use crate::sym::GlobalSymbolIntern;
use std::convert::TryInto;
use std::fmt::Debug;
type TestResult = Result<(), Box<dyn std::error::Error>>;
lazy_static! {
static ref S: Span =
Span::from_byte_interval((0, 0), "test case".intern());
// Prefer [`open`] below when possible.
impl From<Span> for OpenSpan {
fn from(span: Span) -> Self {
Self::without_name_span(span)
}
}
// Prefer [`close`] below when possible.
impl From<Span> for CloseSpan {
fn from(span: Span) -> Self {
Self::without_name_span(span)
}
}
/// Hastily and lazily produce a [`XirfToken::Open`].
///
/// This function is not suitable for production use as it does not
/// produce a complete [`OpenSpan`].
pub fn open<Q: TryInto<QName>, S: Into<OpenSpan>>(
qname: Q,
span: S,
) -> Token
where
<Q as TryInto<QName>>::Error: Debug,
{
Token::Open(qname.unwrap_into(), span.into())
}
/// Hastily and lazily produce a [`XirfToken::Close`] for an empty tag.
///
/// This is [`close`] with the omission of the `qname` argument; the
/// type parameter `Q` cannot be inferred if the value is [`None`].
///
/// This function is not suitable for production use as it does not
/// produce a complete [`OpenSpan`].
pub fn close_empty<S: Into<CloseSpan>>(span: S) -> Token {
Token::Close(None, span.into())
}
/// Hastily and lazily produce a [`XirfToken::Close`].
///
/// See also [`close_empty`] if `Q` cannot be inferred.
///
/// This function is not suitable for production use as it does not
/// produce a complete [`OpenSpan`].
pub fn close<Q: TryInto<QName>, S: Into<CloseSpan>>(
qname: Option<Q>,
span: S,
) -> Token
where
<Q as TryInto<QName>>::Error: Debug,
{
Token::Close(qname.map(ExpectInto::unwrap_into), span.into())
}
mod name {
@ -635,4 +847,58 @@ mod test {
Ok(())
}
mod ele_span {
use super::*;
use crate::span::DUMMY_CONTEXT as DC;
#[test]
fn open_without_attrs() {
// See docblock for [`EleSpan`].
const T: Span = DC.span(0, 8); // Relevant portion of tag
const N: Span = DC.span(1, 4); // Element name
let sut = OpenSpan(T, N.len());
assert_eq!(sut.span(), T);
assert_eq!(sut.name_span(), N);
}
#[test]
fn open_with_attrs() {
// See docblock for [`EleSpan`].
const T: Span = DC.span(0, 5); // Relevant portion of tag
const N: Span = DC.span(1, 4); // Element name
let sut = OpenSpan(T, N.len());
assert_eq!(sut.span(), T);
assert_eq!(sut.name_span(), N);
}
#[test]
fn close() {
// See docblock for [`EleSpan`].
const T: Span = DC.span(0, 10); // Relevant portion of tag
const N: Span = DC.span(2, 5); // Element name
let sut = CloseSpan(T, N.len());
assert_eq!(sut.span(), T);
assert_eq!(sut.name_span(), N);
}
#[test]
fn close_empty() {
// See docblock for [`EleSpan`].
const T: Span = DC.span(9, 2); // Relevant portion of tag
let sut = CloseSpan(T, 0);
assert_eq!(sut.span(), T);
// There is no name,
// only Zuul.
assert_eq!(sut.name_span(), T);
}
}
}

View File

@ -149,6 +149,7 @@ mod test {
convert::ExpectInto,
parse::{EmptyContext, ParseStatus, Parsed},
sym::GlobalSymbolIntern,
xir::test::{close_empty, open},
};
const S: Span = crate::span::DUMMY_SPAN;
@ -156,7 +157,7 @@ mod test {
#[test]
fn dead_if_first_token_is_non_attr() {
let tok = XirToken::Open("foo".unwrap_into(), S);
let tok = open("foo", S);
let sut = AttrParseState::default();
@ -206,14 +207,10 @@ mod test {
// But we provide something else unexpected.
let TransitionResult(Transition(sut), result) =
sut.parse_token(XirToken::Close(None, S2), &mut EmptyContext);
sut.parse_token(close_empty(S2), &mut EmptyContext);
assert_eq!(
result,
Err(AttrParseError::AttrValueExpected(
attr,
S,
XirToken::Close(None, S2)
))
Err(AttrParseError::AttrValueExpected(attr, S, close_empty(S2)))
);
// We should not be in an accepting state,

View File

@ -40,7 +40,7 @@
use super::{
attr::{Attr, AttrParseError, AttrParseState},
QName, Token as XirToken, TokenStream, Whitespace,
CloseSpan, OpenSpan, QName, Token as XirToken, TokenStream, Whitespace,
};
use crate::{
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
@ -50,6 +50,7 @@ use crate::{
},
span::Span,
sym::SymbolId,
xir::EleSpan,
};
use arrayvec::ArrayVec;
use std::{error::Error, fmt::Display};
@ -75,7 +76,7 @@ impl Display for Depth {
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum XirfToken {
/// Opening tag of an element.
Open(QName, Span, Depth),
Open(QName, OpenSpan, Depth),
/// Closing tag of an element.
///
@ -84,7 +85,7 @@ pub enum XirfToken {
/// If the name is [`Some`],
/// then the tag is guaranteed to be balanced
/// (matching the depth of its opening tag).
Close(Option<QName>, Span, Depth),
Close(Option<QName>, CloseSpan, Depth),
/// An attribute and its value.
///
@ -120,8 +121,8 @@ impl Token for XirfToken {
use XirfToken::*;
match self {
Open(_, span, _)
| Close(_, span, _)
Open(_, OpenSpan(span, _), _)
| Close(_, CloseSpan(span, _), _)
| Comment(_, span)
| Text(_, span)
| CData(_, span)
@ -287,14 +288,14 @@ where
match tok {
XirToken::Open(qname, span) if stack.len() == MAX_DEPTH => {
Transition(NodeExpected).err(XirToXirfError::MaxDepthExceeded {
open: (qname, span),
open: (qname, span.tag_span()),
max: Depth(MAX_DEPTH),
})
}
XirToken::Open(qname, span) => {
let depth = stack.len();
stack.push((qname, span));
stack.push((qname, span.tag_span()));
// Delegate to the attribute parser until it is complete.
Transition(AttrExpected(SA::default())).ok(Open(
@ -314,7 +315,7 @@ where
Transition(NodeExpected).err(
XirToXirfError::UnbalancedTag {
open: (open_qname, open_span),
close: (qname, close_span),
close: (qname, close_span.tag_span()),
},
)
}
@ -481,4 +482,4 @@ impl From<AttrParseError> for XirToXirfError {
}
#[cfg(test)]
mod test;
pub mod test;

View File

@ -29,6 +29,53 @@ use crate::convert::ExpectInto;
use crate::parse::{ParseError, Parsed};
use crate::span::DUMMY_SPAN;
use crate::sym::GlobalSymbolIntern;
use crate::xir::test::{
close as xir_close, close_empty as xir_close_empty, open as xir_open,
};
use std::fmt::Debug;
/// Hastily and lazily produce a [`XirfToken::Open`].
///
/// This function is not suitable for production use as it does not produce
/// a complete [`OpenSpan`].
pub fn open<Q: TryInto<QName>, S: Into<OpenSpan>>(
qname: Q,
span: S,
depth: Depth,
) -> XirfToken
where
<Q as TryInto<QName>>::Error: Debug,
{
XirfToken::Open(qname.unwrap_into(), span.into(), depth)
}
/// Hastily and lazily produce a [`XirfToken::Close`] for an empty tag.
///
/// This is [`close`] with the omission of the `qname` argument;
/// the type parameter `Q` cannot be inferred if the value is [`None`].
///
/// This function is not suitable for production use as it does not produce
/// a complete [`OpenSpan`].
pub fn close_empty<S: Into<CloseSpan>>(span: S, depth: Depth) -> XirfToken {
XirfToken::Close(None, span.into(), depth)
}
/// Hastily and lazily produce a [`XirfToken::Close`].
///
/// See also [`close_empty`] if `Q` cannot be inferred.
///
/// This function is not suitable for production use as it does not produce
/// a complete [`OpenSpan`].
pub fn close<Q: TryInto<QName>, S: Into<CloseSpan>>(
qname: Option<Q>,
span: S,
depth: Depth,
) -> XirfToken
where
<Q as TryInto<QName>>::Error: Debug,
{
XirfToken::Close(qname.map(ExpectInto::unwrap_into), span.into(), depth)
}
const S: Span = DUMMY_SPAN;
const S2: Span = S.offset_add(1).unwrap();
@ -37,16 +84,16 @@ const S4: Span = S3.offset_add(1).unwrap();
#[test]
fn empty_element_self_close() {
let name = ("ns", "elem").unwrap_into();
let name = ("ns", "elem");
let toks = [XirToken::Open(name, S), XirToken::Close(None, S2)].into_iter();
let toks = [xir_open(name, S), xir_close_empty(S2)].into_iter();
let sut = parse::<1>(toks);
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Open(name, S, Depth(0))),
Parsed::Object(XirfToken::Close(None, S2, Depth(0))),
Parsed::Object(open(name, S, Depth(0))),
Parsed::Object(close_empty(S2, Depth(0))),
]),
sut.collect(),
);
@ -56,17 +103,16 @@ fn empty_element_self_close() {
// closing.
#[test]
fn empty_element_balanced_close() {
let name = ("ns", "openclose").unwrap_into();
let name = ("ns", "openclose");
let toks =
[XirToken::Open(name, S), XirToken::Close(Some(name), S2)].into_iter();
let toks = [xir_open(name, S), xir_close(Some(name), S2)].into_iter();
let sut = parse::<1>(toks);
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Open(name, S, Depth(0))),
Parsed::Object(XirfToken::Close(Some(name), S2, Depth(0))),
Parsed::Object(open(name, S, Depth(0))),
Parsed::Object(close(Some(name), S2, Depth(0))),
]),
sut.collect(),
);
@ -78,12 +124,12 @@ fn empty_element_balanced_close() {
// part of a parent context.
#[test]
fn extra_closing_tag() {
let name = ("ns", "openclose").unwrap_into();
let name = ("ns", "openclose");
let toks = [
// We need an opening tag to actually begin document parsing.
XirToken::Open(name, S),
XirToken::Close(Some(name), S2),
XirToken::Close(Some(name), S3),
xir_open(name, S),
xir_close(Some(name), S2),
xir_close(Some(name), S3),
]
.into_iter();
@ -94,7 +140,7 @@ fn extra_closing_tag() {
Err(ParseError::UnexpectedToken(
XirToken::Close(Some(given_name), given_span),
_
)) if given_name == name && given_span == S3
)) if given_name == name.unwrap_into() && given_span == S3.into()
);
}
@ -103,12 +149,12 @@ fn extra_closing_tag() {
// gotten to XIRF).
#[test]
fn extra_self_closing_tag() {
let name = ("ns", "openclose").unwrap_into();
let name = ("ns", "openclose");
let toks = [
// We need an opening tag to actually begin document parsing.
XirToken::Open(name, S),
XirToken::Close(None, S2),
XirToken::Close(None, S3),
xir_open(name, S),
xir_close_empty(S2),
xir_close_empty(S3),
]
.into_iter();
@ -117,7 +163,7 @@ fn extra_self_closing_tag() {
assert_matches!(
sut.collect::<Result<Vec<Parsed<XirfToken>>, _>>(),
Err(ParseError::UnexpectedToken(XirToken::Close(None, given_span), _))
if given_span == S3,
if given_span == S3.into(),
);
}
@ -128,17 +174,14 @@ fn empty_element_unbalanced_close() {
let open_name = "open".unwrap_into();
let close_name = "unbalanced_name".unwrap_into();
let toks = [
XirToken::Open(open_name, S),
XirToken::Close(Some(close_name), S2),
]
.into_iter();
let toks =
[xir_open(open_name, S), xir_close(Some(close_name), S2)].into_iter();
let mut sut = parse::<1>(toks);
assert_eq!(
sut.next(),
Some(Ok(Parsed::Object(XirfToken::Open(open_name, S, Depth(0)))))
Some(Ok(Parsed::Object(open(open_name, S, Depth(0)))))
);
assert_eq!(
sut.next(),
@ -152,14 +195,14 @@ fn empty_element_unbalanced_close() {
// Testing depth value.
#[test]
fn single_empty_child() {
let name = ("ns", "openclose").unwrap_into();
let child = "child".unwrap_into();
let name = ("ns", "openclose");
let child = "child";
let toks = [
XirToken::Open(name, S),
XirToken::Open(child, S2),
XirToken::Close(None, S3),
XirToken::Close(Some(name), S4),
xir_open(name, S),
xir_open(child, S2),
xir_close_empty(S3),
xir_close(Some(name), S4),
]
.into_iter();
@ -167,10 +210,10 @@ fn single_empty_child() {
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Open(name, S, Depth(0))),
Parsed::Object(XirfToken::Open(child, S2, Depth(1))),
Parsed::Object(XirfToken::Close(None, S3, Depth(1))),
Parsed::Object(XirfToken::Close(Some(name), S4, Depth(0))),
Parsed::Object(open(name, S, Depth(0))),
Parsed::Object(open(child, S2, Depth(1))),
Parsed::Object(close_empty(S3, Depth(1))),
Parsed::Object(close(Some(name), S4, Depth(0))),
]),
sut.collect(),
);
@ -178,13 +221,13 @@ fn single_empty_child() {
#[test]
fn depth_exceeded() {
let name = ("ns", "openclose").unwrap_into();
let name = ("ns", "openclose");
let exceed = "exceed".unwrap_into();
let toks = [
XirToken::Open(name, S),
xir_open(name, S),
// This one exceeds the max depth, ...
XirToken::Open(exceed, S2),
xir_open(exceed, S2),
]
.into_iter();
@ -192,7 +235,7 @@ fn depth_exceeded() {
let mut sut = parse::<1>(toks);
assert_eq!(
Some(Ok(Parsed::Object(XirfToken::Open(name, S, Depth(0))))),
Some(Ok(Parsed::Object(open(name, S, Depth(0))))),
sut.next()
);
assert_eq!(
@ -208,19 +251,19 @@ fn depth_exceeded() {
#[test]
fn empty_element_with_attrs() {
let name = ("ns", "elem").unwrap_into();
let name = ("ns", "elem");
let attr1 = "a".unwrap_into();
let attr2 = "b".unwrap_into();
let val1 = "val1".intern();
let val2 = "val2".intern();
let toks = [
XirToken::Open(name, S),
xir_open(name, S),
XirToken::AttrName(attr1, S2),
XirToken::AttrValue(val1, S3),
XirToken::AttrName(attr2, S3),
XirToken::AttrValue(val2, S4),
XirToken::Close(None, S4),
xir_close_empty(S4),
]
.into_iter();
@ -228,12 +271,12 @@ fn empty_element_with_attrs() {
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Open(name, S, Depth(0))),
Parsed::Object(open(name, S, Depth(0))),
Parsed::Incomplete,
Parsed::Object(XirfToken::Attr(Attr::new(attr1, val1, (S2, S3)))),
Parsed::Incomplete,
Parsed::Object(XirfToken::Attr(Attr::new(attr2, val2, (S3, S4)))),
Parsed::Object(XirfToken::Close(None, S4, Depth(0))),
Parsed::Object(close_empty(S4, Depth(0))),
]),
sut.collect(),
);
@ -241,18 +284,18 @@ fn empty_element_with_attrs() {
#[test]
fn child_element_after_attrs() {
let name = ("ns", "elem").unwrap_into();
let child = "child".unwrap_into();
let name = ("ns", "elem");
let child = "child";
let attr = "a".unwrap_into();
let val = "val".intern();
let toks = [
XirToken::Open(name, S),
xir_open(name, S),
XirToken::AttrName(attr, S),
XirToken::AttrValue(val, S2),
XirToken::Open(child, S),
XirToken::Close(None, S2),
XirToken::Close(Some(name), S3),
xir_open(child, S),
xir_close_empty(S2),
xir_close(Some(name), S3),
]
.into_iter();
@ -260,12 +303,12 @@ fn child_element_after_attrs() {
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Open(name, S, Depth(0))),
Parsed::Object(open(name, S, Depth(0))),
Parsed::Incomplete,
Parsed::Object(XirfToken::Attr(Attr::new(attr, val, (S, S2)))),
Parsed::Object(XirfToken::Open(child, S, Depth(1))),
Parsed::Object(XirfToken::Close(None, S2, Depth(1))),
Parsed::Object(XirfToken::Close(Some(name), S3, Depth(0))),
Parsed::Object(open(child, S, Depth(1))),
Parsed::Object(close_empty(S2, Depth(1))),
Parsed::Object(close(Some(name), S3, Depth(0))),
]),
sut.collect(),
);
@ -273,17 +316,17 @@ fn child_element_after_attrs() {
#[test]
fn element_with_empty_sibling_children() {
let parent = "parent".unwrap_into();
let childa = "childa".unwrap_into();
let childb = "childb".unwrap_into();
let parent = "parent";
let childa = "childa";
let childb = "childb";
let toks = [
XirToken::Open(parent, S),
XirToken::Open(childa, S2),
XirToken::Close(None, S3),
XirToken::Open(childb, S2),
XirToken::Close(None, S3),
XirToken::Close(Some(parent), S2),
xir_open(parent, S),
xir_open(childa, S2),
xir_close_empty(S3),
xir_open(childb, S2),
xir_close_empty(S3),
xir_close(Some(parent), S2),
]
.into_iter();
@ -291,12 +334,12 @@ fn element_with_empty_sibling_children() {
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Open(parent, S, Depth(0))),
Parsed::Object(XirfToken::Open(childa, S2, Depth(1))),
Parsed::Object(XirfToken::Close(None, S3, Depth(1))),
Parsed::Object(XirfToken::Open(childb, S2, Depth(1))),
Parsed::Object(XirfToken::Close(None, S3, Depth(1))),
Parsed::Object(XirfToken::Close(Some(parent), S2, Depth(0))),
Parsed::Object(open(parent, S, Depth(0))),
Parsed::Object(open(childa, S2, Depth(1))),
Parsed::Object(close_empty(S3, Depth(1))),
Parsed::Object(open(childb, S2, Depth(1))),
Parsed::Object(close_empty(S3, Depth(1))),
Parsed::Object(close(Some(parent), S2, Depth(0))),
]),
sut.collect(),
);
@ -305,18 +348,18 @@ fn element_with_empty_sibling_children() {
// Ensures that attributes do not cause the parent context to be lost.
#[test]
fn element_with_child_with_attributes() {
let parent = "parent".unwrap_into();
let child = "child".unwrap_into();
let parent = "parent";
let child = "child";
let attr = "attr".unwrap_into();
let value = "attr value".intern();
let toks = [
XirToken::Open(parent, S),
XirToken::Open(child, S),
xir_open(parent, S),
xir_open(child, S),
XirToken::AttrName(attr, S),
XirToken::AttrValue(value, S2),
XirToken::Close(None, S3),
XirToken::Close(Some(parent), S3),
xir_close_empty(S3),
xir_close(Some(parent), S3),
]
.into_iter();
@ -324,12 +367,12 @@ fn element_with_child_with_attributes() {
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Open(parent, S, Depth(0))),
Parsed::Object(XirfToken::Open(child, S, Depth(1))),
Parsed::Object(open(parent, S, Depth(0))),
Parsed::Object(open(child, S, Depth(1))),
Parsed::Incomplete,
Parsed::Object(XirfToken::Attr(Attr::new(attr, value, (S, S2)))),
Parsed::Object(XirfToken::Close(None, S3, Depth(1))),
Parsed::Object(XirfToken::Close(Some(parent), S3, Depth(0))),
Parsed::Object(close_empty(S3, Depth(1))),
Parsed::Object(close(Some(parent), S3, Depth(0))),
]),
sut.collect(),
);
@ -337,13 +380,13 @@ fn element_with_child_with_attributes() {
#[test]
fn element_with_text() {
let parent = "parent".unwrap_into();
let parent = "parent";
let text = "inner text".into();
let toks = [
XirToken::Open(parent, S),
xir_open(parent, S),
XirToken::Text(text, S2),
XirToken::Close(Some(parent), S3),
xir_close(Some(parent), S3),
]
.into_iter();
@ -351,9 +394,9 @@ fn element_with_text() {
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Open(parent, S, Depth(0))),
Parsed::Object(open(parent, S, Depth(0))),
Parsed::Object(XirfToken::Text(text, S2)),
Parsed::Object(XirfToken::Close(Some(parent), S3, Depth(0))),
Parsed::Object(close(Some(parent), S3, Depth(0))),
]),
sut.collect(),
);
@ -361,13 +404,13 @@ fn element_with_text() {
#[test]
fn not_accepting_state_if_element_open() {
let name = "unclosed".unwrap_into();
let toks = [XirToken::Open(name, S)].into_iter();
let name = "unclosed";
let toks = [xir_open(name, S)].into_iter();
let mut sut = parse::<1>(toks);
assert_eq!(
Some(Ok(Parsed::Object(XirfToken::Open(name, S, Depth(0))))),
Some(Ok(Parsed::Object(open(name, S, Depth(0))))),
sut.next()
);
@ -378,14 +421,14 @@ fn not_accepting_state_if_element_open() {
// XML permits comment nodes before and after the document root element.
#[test]
fn comment_before_or_after_root_ok() {
let name = "root".unwrap_into();
let name = "root";
let cstart = "start comment".intern();
let cend = "end comment".intern();
let toks = [
XirToken::Comment(cstart, S),
XirToken::Open(name, S2),
XirToken::Close(None, S3),
xir_open(name, S2),
xir_close_empty(S3),
XirToken::Comment(cend, S4),
]
.into_iter();
@ -395,8 +438,8 @@ fn comment_before_or_after_root_ok() {
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Comment(cstart, S)),
Parsed::Object(XirfToken::Open(name, S2, Depth(0))),
Parsed::Object(XirfToken::Close(None, S3, Depth(0))),
Parsed::Object(open(name, S2, Depth(0))),
Parsed::Object(close_empty(S3, Depth(0))),
Parsed::Object(XirfToken::Comment(cend, S4)),
]),
sut.collect(),
@ -416,10 +459,10 @@ fn content_after_root_close_error() {
let name = "root".unwrap_into();
let toks = [
XirToken::Open(name, S),
XirToken::Close(None, S2),
xir_open(name, S),
xir_close_empty(S2),
// Document ends here
XirToken::Open(name, S3),
xir_open(name, S3),
]
.into_iter();
@ -429,7 +472,7 @@ fn content_after_root_close_error() {
sut.collect(),
Result::<Vec<Parsed<XirfToken>>, _>::Err(ParseError::UnexpectedToken(
XirToken::Open(given_name, given_span),
_)) if given_name == name && given_span == S3
_)) if given_name == name && given_span == S3.into()
);
}

View File

@ -25,7 +25,7 @@
//! - [`elem_wrap`] wraps a token stream iterator as the body of an
//! element of the given name.
use super::{QName, Token, TokenStream};
use super::{CloseSpan, OpenSpan, QName, Token, TokenStream};
use crate::span::Span;
use std::iter::{once, Chain, Once};
@ -46,10 +46,14 @@ where
{
let twospan: (Span, Span) = span.into();
// TODO: These tokens won't be able to derive name spans,
// but this is only used by the linker at the time of writing for
// generated tokens,
// where the provided span is a dummy linker span anyway.
ElemWrapIter::new(
Token::Open(name, twospan.0),
Token::Open(name, OpenSpan::without_name_span(twospan.0)),
inner,
Token::Close(Some(name), twospan.1),
Token::Close(Some(name), CloseSpan::without_name_span(twospan.1)),
)
}
@ -87,8 +91,8 @@ mod test {
#[test]
fn elem_wrap_iter() {
let inner = vec![
Token::Open("foo".unwrap_into(), DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
Token::Open("foo".unwrap_into(), DUMMY_SPAN.into()),
Token::Close(None, DUMMY_SPAN.into()),
];
let elem_name = "element".unwrap_into();
@ -102,10 +106,10 @@ mod test {
assert_eq!(
result.collect::<Vec<_>>(),
vec![
Token::Open(elem_name, twospan.0),
Token::Open(elem_name, twospan.0.into()),
inner[0].clone(),
inner[1].clone(),
Token::Close(Some(elem_name), twospan.1),
Token::Close(Some(elem_name), twospan.1.into()),
]
);
}

View File

@ -388,7 +388,7 @@ mod test {
span::{Span, DUMMY_SPAN},
xir::{
attr::{Attr, AttrSpan},
flat::{Depth, XirfToken},
flat::{test::close_empty, Depth, XirfToken},
st::qname::*,
},
};
@ -443,7 +443,7 @@ mod test {
let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
let toks = vec![
XirfToken::Attr(attr_name.clone()),
@ -478,7 +478,7 @@ mod test {
let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
// @yields then @name just to emphasize that order does not matter.
let toks = vec![
@ -512,7 +512,7 @@ mod test {
let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
let toks = vec![
XirfToken::Attr(attr_name.clone()),
@ -543,7 +543,7 @@ mod test {
}
}
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
let toks = vec![
// Will cause dead state:
@ -575,7 +575,7 @@ mod test {
let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
let attr_src = Attr(QN_SRC, "val_src".into(), AttrSpan(S2, S3));
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
let toks = vec![
// `name` and `src` but no optional `yields`.
@ -619,7 +619,7 @@ mod test {
#[test]
fn required_missing_values() {
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
let toks = vec![
XirfToken::Attr(ATTR_NAME),
@ -670,7 +670,7 @@ mod test {
// The dead token doesn't matter;
// it needs to be present but is otherwise ignored for this test.
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
let err = AttrParseError::MissingRequired(tok_dead, partial);
// When represented as a string,
@ -706,7 +706,7 @@ mod test {
// The dead token doesn't matter;
// it needs to be present but is otherwise ignored for this test.
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
let err = AttrParseError::MissingRequired(tok_dead, partial);
let desc = err.describe();
@ -745,7 +745,7 @@ mod test {
let attr_unexpected =
Attr(QN_TYPE, "unexpected".into(), AttrSpan(S1, S2));
let attr_src = Attr(QN_SRC, "val_src".into(), AttrSpan(S2, S3));
let tok_dead = XirfToken::Close(None, S3, Depth(0));
let tok_dead = close_empty(S3, Depth(0));
let toks = vec![
// This is expected:

View File

@ -21,7 +21,10 @@
//!
//! This uses [`quick_xml`] as the parser.
use super::{error::SpanlessError, DefaultEscaper, Error, Escaper, Token};
use super::{
error::SpanlessError, CloseSpan, DefaultEscaper, Error, Escaper, OpenSpan,
Token,
};
use crate::{
parse::{ParseError, Parsed, ParsedObject, ParsedResult},
span::Context,
@ -149,7 +152,10 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
// Tag is self-closing, but this does not yet
// handle whitespace before the `/`
// (as indicated in the span above).
self.tokbuf.push_front(Token::Close(None, span));
self.tokbuf.push_front(Token::Close(
None,
CloseSpan::empty(span),
));
Ok(open)
}),
@ -171,13 +177,22 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
//
// </foo >
// [------] name + '<' + '/' + " >"
let len = self.reader.buffer_position() - prev_pos;
let span = ctx.span_or_zz(prev_pos, len);
let pos_delta = self.reader.buffer_position() - prev_pos;
let span = ctx.span_or_zz(prev_pos, pos_delta);
let name_len = ele.name().len();
ele.name()
.try_into()
.map_err(Error::from_with_span(span))
.and_then(|qname| Ok(Token::Close(Some(qname), span)))
.and_then(|qname| {
Ok(Token::Close(
Some(qname),
CloseSpan(
span,
name_len.try_into().unwrap_or(0),
),
))
})
}),
// quick_xml emits a useless text event if the first byte is
@ -404,7 +419,10 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
// The first token will be immediately returned
// via the Iterator.
Ok(Token::Open(qname, span))
Ok(Token::Open(
qname,
OpenSpan(span, len.try_into().unwrap_or(0)),
))
})
}

View File

@ -20,6 +20,7 @@
use super::*;
use crate::parse::UnknownToken;
use crate::sym::GlobalSymbolIntern;
use crate::xir::{CloseSpan, OpenSpan};
use crate::{
convert::ExpectInto,
parse::{
@ -105,8 +106,8 @@ fn empty_node_without_prefix_or_attributes_or_whitespace() {
assert_eq!(
Ok(vec![
O(Token::Open("empty-node".unwrap_into(), a)),
O(Token::Close(None, b)),
O(Token::Open("empty-node".unwrap_into(), OpenSpan(a, 10))),
O(Token::Close(None, CloseSpan(b, 0))),
]),
sut.collect(),
);
@ -127,8 +128,8 @@ fn empty_node_without_prefix_or_attributes() {
assert_eq!(
Ok(vec![
O(Token::Open("empty-node".unwrap_into(), a)),
O(Token::Close(None, b)),
O(Token::Open("empty-node".unwrap_into(), OpenSpan(a, 10))),
O(Token::Close(None, CloseSpan(b, 0))),
]),
sut.collect(),
);
@ -149,11 +150,11 @@ fn does_not_resolve_xmlns() {
assert_eq!(
Ok(vec![
O(Token::Open("no-ns".unwrap_into(), a)),
O(Token::Open("no-ns".unwrap_into(), OpenSpan(a, 5))),
// Since we didn't parse @xmlns, it's still an attribute.
O(Token::AttrName("xmlns".unwrap_into(), b)),
O(Token::AttrValue("noresolve:UNESC".intern(), c)),
O(Token::Close(None, d)),
O(Token::Close(None, CloseSpan(d, 0))),
]),
sut.collect(),
);
@ -175,10 +176,13 @@ fn empty_node_with_prefix_without_attributes_unresolved() {
// Should be the QName, _unresolved_.
assert_eq!(
Ok(vec![
O(Token::Open(("x", "empty-node").unwrap_into(), a)),
O(Token::Open(
("x", "empty-node").unwrap_into(),
OpenSpan(a, 12)
)),
O(Token::AttrName(("xmlns", "x").unwrap_into(), b)),
O(Token::AttrValue("noresolve:UNESC".intern(), c)),
O(Token::Close(None, d)),
O(Token::Close(None, CloseSpan(d, 0))),
]),
sut.collect(),
);
@ -224,14 +228,14 @@ fn multiple_attrs_ordered() {
assert_eq!(
Ok(vec![
O(Token::Open("ele".unwrap_into(), a)),
O(Token::Open("ele".unwrap_into(), OpenSpan(a, 3))),
O(Token::AttrName("foo".unwrap_into(), b)),
O(Token::AttrValue("a:UNESC".intern(), c)),
O(Token::AttrName("bar".unwrap_into(), d)),
O(Token::AttrValue("b:UNESC".intern(), e)),
O(Token::AttrName(("b", "baz").unwrap_into(), f)),
O(Token::AttrValue("c:UNESC".intern(), g)),
O(Token::Close(None, h)),
O(Token::Close(None, CloseSpan(h, 0))),
]),
sut.collect(),
);
@ -254,10 +258,10 @@ fn empty_attr_value() {
assert_eq!(
Ok(vec![
O(Token::Open("ele".unwrap_into(), a)),
O(Token::Open("ele".unwrap_into(), OpenSpan(a, 3))),
O(Token::AttrName("empty".unwrap_into(), b)),
O(Token::AttrValue(":UNESC".intern(), c)),
O(Token::Close(None, d)),
O(Token::Close(None, CloseSpan(d, 0))),
]),
sut.collect(),
);
@ -282,12 +286,12 @@ fn permits_duplicate_attrs() {
assert_eq!(
Ok(vec![
O(Token::Open("dup".unwrap_into(), a)),
O(Token::Open("dup".unwrap_into(), OpenSpan(a, 3))),
O(Token::AttrName("attr".unwrap_into(), b)),
O(Token::AttrValue("a:UNESC".intern(), c)),
O(Token::AttrName("attr".unwrap_into(), d)),
O(Token::AttrValue("b:UNESC".intern(), e)),
O(Token::Close(None, f)),
O(Token::Close(None, CloseSpan(f, 0))),
]),
sut.collect(),
);
@ -308,8 +312,8 @@ fn open_close_no_child() {
assert_eq!(
Ok(vec![
O(Token::Open("nochild".unwrap_into(), a)),
O(Token::Close(Some("nochild".unwrap_into()), b)),
O(Token::Open("nochild".unwrap_into(), OpenSpan(a, 7))),
O(Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7))),
]),
sut.collect(),
);
@ -329,8 +333,8 @@ fn open_close_no_child_open_tag_whitespace() {
assert_eq!(
Ok(vec![
O(Token::Open("nochild".unwrap_into(), a)),
O(Token::Close(Some("nochild".unwrap_into()), b)),
O(Token::Open("nochild".unwrap_into(), OpenSpan(a, 7))),
O(Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7))),
]),
sut.collect(),
);
@ -350,8 +354,8 @@ fn open_close_no_child_close_tag_whitespace() {
assert_eq!(
Ok(vec![
O(Token::Open("nochild".unwrap_into(), a)),
O(Token::Close(Some("nochild".unwrap_into()), b)),
O(Token::Open("nochild".unwrap_into(), OpenSpan(a, 7))),
O(Token::Close(Some("nochild".unwrap_into()), CloseSpan(b, 7))),
]),
sut.collect(),
);
@ -374,10 +378,10 @@ fn child_node_self_closing() {
assert_eq!(
Ok(vec![
O(Token::Open("root".unwrap_into(), a)),
O(Token::Open("child".unwrap_into(), b)),
O(Token::Close(None, c)),
O(Token::Close(Some("root".unwrap_into()), d)),
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))),
O(Token::Open("child".unwrap_into(), OpenSpan(b, 5))),
O(Token::Close(None, CloseSpan(c, 0))),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(d, 4))),
]),
sut.collect(),
);
@ -399,12 +403,12 @@ fn sibling_nodes() {
assert_eq!(
Ok(vec![
O(Token::Open("root".unwrap_into(), a)),
O(Token::Open("child".unwrap_into(), b)),
O(Token::Close(None, c)),
O(Token::Open("child".unwrap_into(), d)),
O(Token::Close(None, e)),
O(Token::Close(Some("root".unwrap_into()), f)),
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))),
O(Token::Open("child".unwrap_into(), OpenSpan(b, 5))),
O(Token::Close(None, CloseSpan(c, 0))),
O(Token::Open("child".unwrap_into(), OpenSpan(d, 5))),
O(Token::Close(None, CloseSpan(e, 0))),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(f, 4))),
]),
sut.collect(),
);
@ -426,12 +430,12 @@ fn child_node_with_attrs() {
assert_eq!(
Ok(vec![
O(Token::Open("root".unwrap_into(), a)),
O(Token::Open("child".unwrap_into(), b)),
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))),
O(Token::Open("child".unwrap_into(), OpenSpan(b, 5))),
O(Token::AttrName("foo".unwrap_into(), c)),
O(Token::AttrValue("bar:UNESC".intern(), d)),
O(Token::Close(None, e)),
O(Token::Close(Some("root".unwrap_into()), f)),
O(Token::Close(None, CloseSpan(e, 0))),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(f, 4))),
]),
sut.collect(),
);
@ -450,9 +454,9 @@ fn child_text() {
assert_eq!(
Ok(vec![
O(Token::Open("text".unwrap_into(), a)),
O(Token::Open("text".unwrap_into(), OpenSpan(a, 4))),
O(Token::Text("foo bar:UNESC".into(), b)),
O(Token::Close(Some("text".unwrap_into()), c)),
O(Token::Close(Some("text".unwrap_into()), CloseSpan(c, 4))),
]),
sut.collect(),
);
@ -474,12 +478,12 @@ fn mixed_child_content() {
assert_eq!(
Ok(vec![
O(Token::Open("text".unwrap_into(), a)),
O(Token::Open("text".unwrap_into(), OpenSpan(a, 4))),
O(Token::Text("foo:UNESC".into(), b)),
O(Token::Open("em".unwrap_into(), c)),
O(Token::Open("em".unwrap_into(), OpenSpan(c, 2))),
O(Token::Text("bar:UNESC".into(), d)),
O(Token::Close(Some("em".unwrap_into()), e)),
O(Token::Close(Some("text".unwrap_into()), f)),
O(Token::Close(Some("em".unwrap_into()), CloseSpan(e, 2))),
O(Token::Close(Some("text".unwrap_into()), CloseSpan(f, 4))),
]),
sut.collect(),
);
@ -515,12 +519,12 @@ fn mixed_child_content_with_newlines() {
assert_eq!(
Ok(vec![
O(Token::Text("\n:UNESC".into(), a)),
O(Token::Open("root".unwrap_into(), b)),
O(Token::Open("root".unwrap_into(), OpenSpan(b, 4))),
O(Token::Text("\n :UNESC".into(), c)),
O(Token::Open("child".unwrap_into(), d)),
O(Token::Close(None, e)),
O(Token::Open("child".unwrap_into(), OpenSpan(d, 5))),
O(Token::Close(None, CloseSpan(e, 0))),
O(Token::Text("\n:UNESC".into(), f)),
O(Token::Close(Some("root".unwrap_into()), g)),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(g, 4))),
O(Token::Text("\n:UNESC".into(), h)),
]),
sut.collect(),
@ -542,9 +546,9 @@ fn comment() {
assert_eq!(
Ok(vec![
O(Token::Comment("root".into(), a)),
O(Token::Open("root".unwrap_into(), b)),
O(Token::Open("root".unwrap_into(), OpenSpan(b, 4))),
O(Token::Comment("<child>".into(), c)),
O(Token::Close(Some("root".unwrap_into()), d)),
O(Token::Close(Some("root".unwrap_into()), CloseSpan(d, 4))),
]),
sut.collect(),
);
@ -570,10 +574,10 @@ lines-->
assert_eq!(
Ok(vec![
O(Token::Open("mult".unwrap_into(), a)),
O(Token::Open("mult".unwrap_into(), OpenSpan(a, 4))),
O(Token::Comment("comment\non multiple\nlines".into(), b)),
O(Token::Text("\n:UNESC".into(), c)),
O(Token::Close(Some("mult".unwrap_into()), d)),
O(Token::Close(Some("mult".unwrap_into()), CloseSpan(d, 4))),
]),
sut.collect(),
);
@ -594,10 +598,13 @@ fn permits_mismatched_tags() {
assert_eq!(
Ok(vec![
O(Token::Open("root".unwrap_into(), a)),
O(Token::Open("child".unwrap_into(), b)),
O(Token::Close(None, c)),
O(Token::Close(Some("mismatch".unwrap_into()), d)),
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))),
O(Token::Open("child".unwrap_into(), OpenSpan(b, 5))),
O(Token::Close(None, CloseSpan(c, 0))),
O(Token::Close(
Some("mismatch".unwrap_into()),
CloseSpan(d, 8)
)),
]),
sut.collect(),
);
@ -682,8 +689,8 @@ fn valid_xml_decl_no_encoding() {
assert_eq!(
Ok(vec![
O(Token::Open("root".unwrap_into(), a)),
O(Token::Close(None, b)),
O(Token::Open("root".unwrap_into(), OpenSpan(a, 4))),
O(Token::Close(None, CloseSpan(b, 0))),
]),
sut.collect()
);

View File

@ -186,6 +186,7 @@ use crate::{
},
span::Span,
sym::SymbolId,
xir::{CloseSpan, OpenSpan},
};
use std::{error::Error, fmt::Display, result};
@ -530,23 +531,24 @@ impl<SA: StackAttrParseState> ParseState for Stack<SA> {
match (self, tok) {
// Open a root element (or lack of context).
(Empty, Token::Open(name, span)) => {
(Empty, Token::Open(name, OpenSpan(span, _))) => {
Self::begin_attrs(name, span, None)
}
// Open a child element.
(BuddingElement(pstack), Token::Open(name, span)) => {
(BuddingElement(pstack), Token::Open(name, OpenSpan(span, _))) => {
Self::begin_attrs(name, span, Some(pstack.store()))
}
// Open a child element in attribute parsing context.
(BuddingAttrList(pstack, attr_list), Token::Open(name, span)) => {
Self::begin_attrs(
name,
span,
Some(pstack.consume_attrs(attr_list).store()),
)
}
(
BuddingAttrList(pstack, attr_list),
Token::Open(name, OpenSpan(span, _)),
) => Self::begin_attrs(
name,
span,
Some(pstack.consume_attrs(attr_list).store()),
),
// Attribute parsing.
(AttrState(estack, attrs, sa), tok) => {
@ -569,20 +571,8 @@ impl<SA: StackAttrParseState> ParseState for Stack<SA> {
}
}
(BuddingElement(stack), Token::Close(name, span)) => stack
.try_close(name, span)
.map(ElementStack::consume_child_or_complete)
.map(|new_stack| match new_stack {
Stack::ClosedElement(ele) => {
Transition(Empty).ok(Tree::Element(ele))
}
_ => Transition(new_stack).incomplete(),
})
.unwrap_or_else(|err| Transition(Empty).err(err)),
(BuddingAttrList(stack, attr_list), Token::Close(name, span)) => {
(BuddingElement(stack), Token::Close(name, CloseSpan(span, _))) => {
stack
.consume_attrs(attr_list)
.try_close(name, span)
.map(ElementStack::consume_child_or_complete)
.map(|new_stack| match new_stack {
@ -594,6 +584,21 @@ impl<SA: StackAttrParseState> ParseState for Stack<SA> {
.unwrap_or_else(|err| Transition(Empty).err(err))
}
(
BuddingAttrList(stack, attr_list),
Token::Close(name, CloseSpan(span, _)),
) => stack
.consume_attrs(attr_list)
.try_close(name, span)
.map(ElementStack::consume_child_or_complete)
.map(|new_stack| match new_stack {
Stack::ClosedElement(ele) => {
Transition(Empty).ok(Tree::Element(ele))
}
_ => Transition(new_stack).incomplete(),
})
.unwrap_or_else(|err| Transition(Empty).err(err)),
(BuddingElement(mut ele), Token::Text(value, span)) => {
ele.element.children.push(Tree::Text(value, span));

View File

@ -22,16 +22,13 @@ use std::assert_matches::assert_matches;
use super::*;
use crate::convert::ExpectInto;
use crate::parse::ParseError;
use crate::span::DUMMY_SPAN;
use crate::sym::GlobalSymbolIntern;
use crate::xir::test::{close, close_empty, open};
lazy_static! {
static ref S: Span =
Span::from_byte_interval((0, 0), "test case, 1".intern());
static ref S2: Span =
Span::from_byte_interval((0, 0), "test case, 2".intern());
static ref S3: Span =
Span::from_byte_interval((0, 0), "test case, 3".intern());
}
const S1: Span = DUMMY_SPAN;
const S2: Span = S1.offset_add(1).unwrap();
const S3: Span = S2.offset_add(1).unwrap();
mod tree {
use super::*;
@ -42,7 +39,7 @@ mod tree {
name: "foo".unwrap_into(),
attrs: AttrList::new(),
children: vec![],
span: (*S, *S2),
span: (S1, S2),
};
let tree = Tree::Element(ele.clone());
@ -54,7 +51,7 @@ mod tree {
#[test]
fn text_from_tree() {
let text = "foo".intern();
let tree = Tree::Text(text, *S);
let tree = Tree::Text(text, S1);
assert!(!tree.is_element());
assert_eq!(None, tree.as_element());
@ -72,8 +69,8 @@ mod attrs {
let a = "a".unwrap_into();
let b = "b".unwrap_into();
let attra = Attr::new(a, "a value".intern(), (*S, *S2));
let attrb = Attr::new(b, "b value".intern(), (*S, *S2));
let attra = Attr::new(a, "a value".intern(), (S1, S2));
let attrb = Attr::new(b, "b value".intern(), (S1, S2));
let attrs = AttrList::from([attra.clone(), attrb.clone()]);
@ -88,13 +85,13 @@ mod attrs {
fn empty_element_self_close_from_toks() {
let name = ("ns", "elem").unwrap_into();
let toks = [Token::Open(name, *S), Token::Close(None, *S2)].into_iter();
let toks = [open(name, S1), close_empty(S2)].into_iter();
let expected = Element {
name,
attrs: AttrList::new(),
children: vec![],
span: (*S, *S2),
span: (S1, S2),
};
let mut sut = parse(toks);
@ -113,14 +110,13 @@ fn empty_element_self_close_from_toks() {
fn empty_element_balanced_close_from_toks() {
let name = ("ns", "openclose").unwrap_into();
let toks =
[Token::Open(name, *S), Token::Close(Some(name), *S2)].into_iter();
let toks = [open(name, S1), close(Some(name), S2)].into_iter();
let expected = Element {
name,
attrs: AttrList::new(),
children: vec![],
span: (*S, *S2),
span: (S1, S2),
};
let mut sut = parse(toks);
@ -140,11 +136,7 @@ fn empty_element_unbalanced_close_from_toks() {
let open_name = "open".unwrap_into();
let close_name = "unbalanced_name".unwrap_into();
let toks = [
Token::Open(open_name, *S),
Token::Close(Some(close_name), *S2),
]
.into_iter();
let toks = [open(open_name, S1), close(Some(close_name), S2)].into_iter();
let mut sut = parse(toks);
@ -152,8 +144,8 @@ fn empty_element_unbalanced_close_from_toks() {
assert_eq!(
sut.next(),
Some(Err(ParseError::StateError(StackError::UnbalancedTag {
open: (open_name, *S),
close: (close_name, *S2),
open: (open_name, S1),
close: (close_name, S2),
})))
);
@ -170,23 +162,23 @@ fn empty_element_with_attrs_from_toks() {
let val2 = "val2".intern();
let toks = [
Token::Open(name, *S),
Token::AttrName(attr1, *S),
Token::AttrValue(val1, *S2),
Token::AttrName(attr2, *S),
Token::AttrValue(val2, *S3),
Token::Close(None, *S2),
open(name, S1),
Token::AttrName(attr1, S1),
Token::AttrValue(val1, S2),
Token::AttrName(attr2, S1),
Token::AttrValue(val2, S3),
close_empty(S2),
]
.into_iter();
let expected = Element {
name,
attrs: AttrList::from(vec![
Attr::new(attr1, val1, (*S, *S2)),
Attr::new(attr2, val2, (*S, *S3)),
Attr::new(attr1, val1, (S1, S2)),
Attr::new(attr2, val2, (S1, S3)),
]),
children: vec![],
span: (*S, *S2),
span: (S1, S2),
};
let mut sut = parse(toks);
@ -211,25 +203,25 @@ fn child_element_after_attrs() {
let val = "val".intern();
let toks = [
Token::Open(name, *S),
Token::AttrName(attr, *S),
Token::AttrValue(val, *S2),
Token::Open(child, *S),
Token::Close(None, *S2),
Token::Close(Some(name), *S3),
open(name, S1),
Token::AttrName(attr, S1),
Token::AttrValue(val, S2),
open(child, S1),
close_empty(S2),
close(Some(name), S3),
]
.into_iter();
let expected = Element {
name,
attrs: AttrList::from(vec![Attr::new(attr, val, (*S, *S2))]),
attrs: AttrList::from(vec![Attr::new(attr, val, (S1, S2))]),
children: vec![Tree::Element(Element {
name: child,
attrs: AttrList::new(),
children: vec![],
span: (*S, *S2),
span: (S1, S2),
})],
span: (*S, *S3),
span: (S1, S3),
};
let mut sut = parse(toks);
@ -253,12 +245,12 @@ fn element_with_empty_sibling_children() {
let childb = "childb".unwrap_into();
let toks = [
Token::Open(parent, *S),
Token::Open(childa, *S),
Token::Close(None, *S2),
Token::Open(childb, *S),
Token::Close(None, *S2),
Token::Close(Some(parent), *S2),
open(parent, S1),
open(childa, S1),
close_empty(S2),
open(childb, S1),
close_empty(S2),
close(Some(parent), S2),
]
.into_iter();
@ -270,16 +262,16 @@ fn element_with_empty_sibling_children() {
name: childa,
attrs: AttrList::new(),
children: vec![],
span: (*S, *S2),
span: (S1, S2),
}),
Tree::Element(Element {
name: childb,
attrs: AttrList::new(),
children: vec![],
span: (*S, *S2),
span: (S1, S2),
}),
],
span: (*S, *S2),
span: (S1, S2),
};
let mut sut = parser_from(toks);
@ -297,12 +289,12 @@ fn element_with_child_with_attributes() {
let value = "attr value".intern();
let toks = [
Token::Open(parent, *S),
Token::Open(child, *S),
Token::AttrName(attr, *S),
Token::AttrValue(value, *S2),
Token::Close(None, *S3),
Token::Close(Some(parent), *S3),
open(parent, S1),
open(child, S1),
Token::AttrName(attr, S1),
Token::AttrValue(value, S2),
close_empty(S3),
close(Some(parent), S3),
]
.into_iter();
@ -311,11 +303,11 @@ fn element_with_child_with_attributes() {
attrs: AttrList::new(),
children: vec![Tree::Element(Element {
name: child,
attrs: AttrList::from([Attr::new(attr, value, (*S, *S2))]),
attrs: AttrList::from([Attr::new(attr, value, (S1, S2))]),
children: vec![],
span: (*S, *S3),
span: (S1, S3),
})],
span: (*S, *S3),
span: (S1, S3),
};
let mut sut = parser_from(toks);
@ -330,17 +322,17 @@ fn element_with_text() {
let text = "inner text".into();
let toks = [
Token::Open(parent, *S),
Token::Text(text, *S2),
Token::Close(Some(parent), *S3),
open(parent, S1),
Token::Text(text, S2),
close(Some(parent), S3),
]
.into_iter();
let expected = Element {
name: parent,
attrs: AttrList::new(),
children: vec![Tree::Text(text, *S2)],
span: (*S, *S3),
children: vec![Tree::Text(text, S2)],
span: (S1, S3),
};
let mut sut = parser_from(toks);
@ -356,18 +348,18 @@ fn parser_from_filters_incomplete() {
let val = "val1".intern();
let toks = [
Token::Open(name, *S),
Token::AttrName(attr, *S),
Token::AttrValue(val, *S2),
Token::Close(None, *S2),
open(name, S1),
Token::AttrName(attr, S1),
Token::AttrValue(val, S2),
close_empty(S2),
]
.into_iter();
let expected = Element {
name,
attrs: AttrList::from([Attr::new(attr, val, (*S, *S2))]),
attrs: AttrList::from([Attr::new(attr, val, (S1, S2))]),
children: vec![],
span: (*S, *S2),
span: (S1, S2),
};
let mut sut = parser_from(toks);
@ -382,14 +374,14 @@ fn parser_from_filters_incomplete() {
#[test]
fn attr_parser_with_non_attr_token() {
let name = "unexpected".unwrap_into();
let mut toks = [Token::Open(name, *S)].into_iter();
let mut toks = [open(name, S1)].into_iter();
let mut sut = attr_parser_from(&mut toks);
assert_matches!(
sut.next(),
Some(Err(ParseError::UnexpectedToken(Token::Open(given_name, given_span), _)))
if given_name == name && given_span == *S
if given_name == name && given_span == S1.into()
);
}
@ -401,19 +393,19 @@ fn parser_attr_multiple() {
let val2 = "val2".intern();
let mut toks = [
Token::AttrName(attr1, *S),
Token::AttrValue(val1, *S2),
Token::AttrName(attr2, *S2),
Token::AttrValue(val2, *S3),
Token::AttrName(attr1, S1),
Token::AttrValue(val1, S2),
Token::AttrName(attr2, S2),
Token::AttrValue(val2, S3),
// Token that we should _not_ hit.
Token::Text("nohit".into(), *S),
Token::Text("nohit".into(), S1),
]
.into_iter();
let mut sut = attr_parser_from(&mut toks);
assert_eq!(sut.next(), Some(Ok(Attr::new(attr1, val1, (*S, *S2)))));
assert_eq!(sut.next(), Some(Ok(Attr::new(attr2, val2, (*S2, *S3)))));
assert_eq!(sut.next(), Some(Ok(Attr::new(attr1, val1, (S1, S2)))));
assert_eq!(sut.next(), Some(Ok(Attr::new(attr2, val2, (S2, S3)))));
// Parsing must stop after the last attribute,
// after which some other parser can continue on the same token
@ -424,6 +416,6 @@ fn parser_attr_multiple() {
Some(Err(ParseError::UnexpectedToken(Token::Text(
given_name,
given_span,
), _))) if given_name == "nohit".into() && given_span == *S
), _))) if given_name == "nohit".into() && given_span == S1
);
}

View File

@ -321,9 +321,13 @@ mod test {
use super::*;
use crate::{
convert::ExpectInto,
span::Span,
span::{Span, DUMMY_SPAN},
sym::GlobalSymbolIntern,
xir::{error::SpanlessError, QName},
xir::{
error::SpanlessError,
test::{close, close_empty, open},
QName,
},
};
type TestResult = std::result::Result<(), Error>;
@ -347,15 +351,12 @@ mod test {
}
}
lazy_static! {
static ref S: Span =
Span::from_byte_interval((0, 0), "test case".intern());
}
const S: Span = DUMMY_SPAN;
#[test]
fn writes_beginning_node_tag_without_prefix() -> TestResult {
let name = QName::new_local("no-prefix".unwrap_into());
let result = Token::Open(name, *S)
let result = open(name, S)
.write_new(Default::default(), &MockEscaper::default())?;
assert_eq!(result.0, b"<no-prefix");
@ -366,8 +367,8 @@ mod test {
#[test]
fn writes_beginning_node_tag_with_prefix() -> TestResult {
let name = ("prefix", "element-name").unwrap_into();
let result = Token::Open(name, *S)
let name = ("prefix", "element-name");
let result = open(name, S)
.write_new(Default::default(), &MockEscaper::default())?;
assert_eq!(result.0, b"<prefix:element-name");
@ -378,8 +379,8 @@ mod test {
#[test]
fn closes_open_node_when_opening_another() -> TestResult {
let name = ("p", "another-element").unwrap_into();
let result = Token::Open(name, *S)
let name = ("p", "another-element");
let result = open(name, S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b"><p:another-element");
@ -390,7 +391,7 @@ mod test {
#[test]
fn closes_open_node_as_empty_element() -> TestResult {
let result = Token::Close(None, *S)
let result = close_empty(S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b"/>");
@ -401,9 +402,9 @@ mod test {
#[test]
fn closing_tag_when_node_expected() -> TestResult {
let name = ("a", "closed-element").unwrap_into();
let name = ("a", "closed-element");
let result = Token::Close(Some(name), *S)
let result = close(Some(name), S)
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
assert_eq!(result.0, b"</a:closed-element>");
@ -416,9 +417,9 @@ mod test {
// to explicitly support outputting malformed XML.
#[test]
fn closes_open_node_with_closing_tag() -> TestResult {
let name = ("b", "closed-element").unwrap_into();
let name = ("b", "closed-element");
let result = Token::Close(Some(name), *S)
let result = close(Some(name), S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b"></b:closed-element>");
@ -430,7 +431,7 @@ mod test {
// Intended for alignment of attributes, primarily.
#[test]
fn whitespace_within_open_node() -> TestResult {
let result = Token::Whitespace(" \t ".unwrap_into(), *S)
let result = Token::Whitespace(" \t ".unwrap_into(), S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b" \t ");
@ -445,13 +446,13 @@ mod test {
let name_local = "nons".unwrap_into();
// Namespace prefix
let result = Token::AttrName(name_ns, *S)
let result = Token::AttrName(name_ns, S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b" some:attr");
assert_eq!(result.1, WriterState::AttrNameAdjacent);
// No namespace prefix
let result = Token::AttrName(name_local, *S)
let result = Token::AttrName(name_local, S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b" nons");
assert_eq!(result.1, WriterState::AttrNameAdjacent);
@ -463,7 +464,7 @@ mod test {
fn writes_attr_value_when_adjacent_to_attr() -> TestResult {
let value = "test str".intern();
let result = Token::AttrValue(value, *S).write_new(
let result = Token::AttrValue(value, S).write_new(
WriterState::AttrNameAdjacent,
&MockEscaper::default(),
)?;
@ -481,9 +482,9 @@ mod test {
let value_right = " right".intern();
let result = vec![
Token::AttrValueFragment(value_left, *S),
Token::AttrValueFragment(value_mid, *S),
Token::AttrValue(value_right, *S),
Token::AttrValueFragment(value_left, S),
Token::AttrValueFragment(value_mid, S),
Token::AttrValue(value_right, S),
]
.into_iter()
.write_new(WriterState::AttrNameAdjacent, &MockEscaper::default())?;
@ -499,13 +500,13 @@ mod test {
let text = "test unescaped".intern();
// When a node is expected.
let result = Token::Text(text, *S)
let result = Token::Text(text, S)
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
assert_eq!(result.0, b"test unescaped:ESC");
assert_eq!(result.1, WriterState::NodeExpected);
// When a node is still open.
let result = Token::Text(text, *S)
let result = Token::Text(text, S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b">test unescaped:ESC");
assert_eq!(result.1, WriterState::NodeExpected);
@ -520,13 +521,13 @@ mod test {
let comment = "comment > escaped".intern();
// When a node is expected.
let result = Token::Comment(comment, *S)
let result = Token::Comment(comment, S)
.write_new(WriterState::NodeExpected, &MockEscaper::default())?;
assert_eq!(result.0, b"<!--comment > escaped-->");
assert_eq!(result.1, WriterState::NodeExpected);
// When a node is still open.
let result = Token::Comment(comment, *S)
let result = Token::Comment(comment, S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b"><!--comment > escaped-->");
assert_eq!(result.1, WriterState::NodeExpected);
@ -537,7 +538,7 @@ mod test {
#[test]
fn unsupported_transition_results_in_error() -> TestResult {
assert!(matches!(
Token::AttrValue("".intern(), *S).write(
Token::AttrValue("".intern(), S).write(
&mut vec![],
WriterState::NodeExpected,
&MockEscaper::default()
@ -555,14 +556,14 @@ mod test {
let root: QName = ("r", "root").unwrap_into();
let result = vec![
Token::Open(root, *S),
Token::AttrName(("an", "attr").unwrap_into(), *S),
Token::AttrValue("value".intern(), *S),
Token::Text("text".intern(), *S),
Token::Open(("c", "child").unwrap_into(), *S),
Token::Whitespace(" ".unwrap_into(), *S),
Token::Close(None, *S),
Token::Close(Some(root), *S),
open(root, S),
Token::AttrName(("an", "attr").unwrap_into(), S),
Token::AttrValue("value".intern(), S),
Token::Text("text".intern(), S),
open(("c", "child"), S),
Token::Whitespace(" ".unwrap_into(), S),
close_empty(S),
close(Some(root), S),
]
.into_iter()
.write_new(Default::default(), &MockEscaper::default())?;