tamer: xir::flat: Expose depth for all node-related tokens

Previously a `Depth` was provided only for `Open` and `Close`.  This depth
information, for example, will be used by NIR to quickly determine whether a
given parser ought to assert ownership of a text/comment token rather than
delegating it.

This involved modifying a number of test cases, but it's worth repeating in
these commits that this is intentional---I've been bit in the past using
`..` in contexts where I really do want to know if variant fields change so
that I can consider whether and how that change may affect the code
utilizing that variant.

DEV-7145
main
Mike Gerwitz 2022-07-29 15:27:42 -04:00
parent b3c0bdc786
commit 8779abe2bb
6 changed files with 70 additions and 52 deletions

View File

@ -775,7 +775,7 @@ impl ParseState for FragmentsState {
(FragmentUnnamed(span), _) => Transition(FragmentUnnamed(span))
.err(XmloError::UnassociatedFragment(span)),
(Fragment(span, id), Xirf::Text(Text(text, _))) => {
(Fragment(span, id), Xirf::Text(Text(text, _), _)) => {
Transition(FragmentDone(span, id))
.ok(XmloToken::Fragment(id, text, span))
}

View File

@ -531,12 +531,12 @@ fn sym_fragment_event() {
// first
open(QN_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id1, AttrSpan(S2, S3))),
Xirf::Text(Text(frag1, S4)),
Xirf::Text(Text(frag1, S4), Depth(1)),
close(Some(QN_FRAGMENT), S5, Depth(0)),
// second
open(QN_FRAGMENT, S2, Depth(0)),
Xirf::Attr(Attr(QN_ID, id2, AttrSpan(S3, S4))),
Xirf::Text(Text(frag2, S5)),
Xirf::Text(Text(frag2, S5), Depth(1)),
close(Some(QN_FRAGMENT), S5, Depth(0)),
]
.into_iter();
@ -561,7 +561,7 @@ fn sym_fragment_missing_id() {
let toks = [
open(QN_FRAGMENT, S1, Depth(0)),
// missing @id
Xirf::Text(Text("text".into(), S4)),
Xirf::Text(Text("text".into(), S4), Depth(1)),
]
.into_iter();
@ -579,7 +579,7 @@ fn sym_fragment_empty_id() {
open(QN_FRAGMENT, S1, Depth(0)),
// empty @id
Xirf::Attr(Attr(QN_ID, "".into(), AttrSpan(S3, S4))),
Xirf::Text(Text("text".into(), S4)),
Xirf::Text(Text("text".into(), S4), Depth(1)),
]
.into_iter();
@ -649,7 +649,7 @@ fn xmlo_composite_parsers_header() {
// <preproc:fragment
open(QN_FRAGMENT, S4, Depth(2)),
Xirf::Attr(Attr(QN_ID, symfrag_id, AttrSpan(S2, S3))),
Xirf::Text(Text(frag, S5)),
Xirf::Text(Text(frag, S5), Depth(3)),
close(Some(QN_FRAGMENT), S4, Depth(2)),
// </preproc:fragment>
close(Some(QN_FRAGMENTS), S3, Depth(1)),

View File

@ -27,7 +27,7 @@
//!
//! 1. All closing tags must correspond to a matching opening tag at the
//! same depth;
//! 2. [`XirfToken`] exposes the [`Depth`] of each opening/closing tag;
//! 2. [`XirfToken`] exposes the [`Depth`] of each node-related token;
//! 3. Attribute tokens are parsed into [`Attr`] objects;
//! 4. Documents must begin with an element and end with the closing of
//! that element;
@ -80,6 +80,11 @@ impl Display for Depth {
/// Other objects retain the same format as their underlying token,
/// but are still validated to ensure that they are well-formed and that
/// the XML is well-structured.
///
/// Each token representing a child node contains a numeric [`Depth`]
/// indicating the nesting depth;
/// this can be used by downstream parsers to avoid maintaining their
/// own stack in certain cases.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum XirfToken<T: TextType> {
/// Opening tag of an element.
@ -101,12 +106,12 @@ pub enum XirfToken<T: TextType> {
Attr(Attr),
/// Comment node.
Comment(SymbolId, Span),
Comment(SymbolId, Span, Depth),
/// Character data as part of an element.
///
/// See also [`CData`](XirfToken::CData) variant.
Text(T),
Text(T, Depth),
/// CData node (`<![CDATA[...]]>`).
///
@ -115,7 +120,7 @@ pub enum XirfToken<T: TextType> {
/// This is intended for reading existing XML data where CData is
/// already present,
/// not for producing new CData safely!
CData(SymbolId, Span),
CData(SymbolId, Span, Depth),
}
impl<T: TextType> Token for XirfToken<T> {
@ -129,10 +134,10 @@ impl<T: TextType> Token for XirfToken<T> {
match self {
Open(_, OpenSpan(span, _), _)
| Close(_, CloseSpan(span, _), _)
| Comment(_, span)
| CData(_, span) => *span,
| Comment(_, span, _)
| CData(_, span, _) => *span,
Text(text) => text.span(),
Text(text, _) => text.span(),
Attr(attr) => attr.span(),
}
}
@ -152,11 +157,13 @@ impl<T: TextType> Display for XirfToken<T> {
Display::fmt(&XirToken::Close(*oqname, *span), f)
}
Attr(attr) => Display::fmt(&attr, f),
Comment(sym, span) => {
Comment(sym, span, _) => {
Display::fmt(&XirToken::Comment(*sym, *span), f)
}
Text(text) => Display::fmt(text, f),
CData(sym, span) => Display::fmt(&XirToken::CData(*sym, *span), f),
Text(text, _) => Display::fmt(text, f),
CData(sym, span, _) => {
Display::fmt(&XirToken::CData(*sym, *span), f)
}
}
}
}
@ -364,7 +371,8 @@ where
match (self, tok) {
// Comments are permitted before and after the first root element.
(st @ (PreRoot(_) | Done), XirToken::Comment(sym, span)) => {
Transition(st).ok(XirfToken::Comment(sym, span))
let depth = Depth(stack.len());
Transition(st).ok(XirfToken::Comment(sym, span, depth))
}
// Ignore whitespace before or after root.
@ -438,6 +446,8 @@ where
) -> TransitionResult<Self> {
use XirToXirf::{AttrExpected, Done, NodeExpected};
let depth = Depth(stack.len());
match tok {
XirToken::Open(qname, span) if stack.len() == MAX_DEPTH => {
Transition(NodeExpected).err(XirToXirfError::MaxDepthExceeded {
@ -447,15 +457,11 @@ where
}
XirToken::Open(qname, span) => {
let depth = stack.len();
stack.push((qname, span.tag_span()));
// Delegate to the attribute parser until it is complete.
Transition(AttrExpected(SA::default())).ok(XirfToken::Open(
qname,
span,
Depth(depth),
))
Transition(AttrExpected(SA::default()))
.ok(XirfToken::Open(qname, span, depth))
}
XirToken::Close(close_oqname, close_span) => {
@ -490,13 +496,14 @@ where
}
}
XirToken::Comment(sym, span) => {
Transition(NodeExpected).ok(XirfToken::Comment(sym, span))
}
XirToken::Comment(sym, span) => Transition(NodeExpected)
.ok(XirfToken::Comment(sym, span, depth)),
XirToken::Text(sym, span) => Transition(NodeExpected)
.ok(XirfToken::Text(T::from(Text(sym, span)))),
.ok(XirfToken::Text(T::from(Text(sym, span)), depth)),
XirToken::CData(sym, span) => {
Transition(NodeExpected).ok(XirfToken::CData(sym, span))
Transition(NodeExpected).ok(XirfToken::CData(sym, span, depth))
}
// We should transition to `State::Attr` before encountering any

View File

@ -393,7 +393,7 @@ fn element_with_text() {
assert_eq!(
Ok(vec![
Parsed::Object(open(parent, S1, Depth(0))),
Parsed::Object(XirfToken::Text(Text(text, S2))),
Parsed::Object(XirfToken::Text(Text(text, S2), Depth(1))),
Parsed::Object(close(Some(parent), S3, Depth(0))),
]),
sut.collect(),
@ -435,10 +435,10 @@ fn comment_before_or_after_root_ok() {
assert_eq!(
Ok(vec![
Parsed::Object(XirfToken::Comment(cstart, S1)),
Parsed::Object(XirfToken::Comment(cstart, S1, Depth(0))),
Parsed::Object(open(name, S2, Depth(0))),
Parsed::Object(close_empty(S3, Depth(0))),
Parsed::Object(XirfToken::Comment(cend, S4)),
Parsed::Object(XirfToken::Comment(cend, S4, Depth(0))),
]),
sut.collect(),
);
@ -541,18 +541,19 @@ fn whitespace_refinement() {
let _ = sut.next(); // discard root
match sut.next().unwrap().unwrap() {
Parsed::Object(XirfToken::Text(RefinedText::Whitespace(
Whitespace(Text(ws, span)),
))) => {
Parsed::Object(XirfToken::Text(
RefinedText::Whitespace(Whitespace(Text(ws, span))),
Depth(1),
)) => {
assert_eq!(ws, given);
assert_eq!(span, S1);
assert!(expected == true)
}
Parsed::Object(XirfToken::Text(RefinedText::Unrefined(Text(
text,
span,
)))) => {
Parsed::Object(XirfToken::Text(
RefinedText::Unrefined(Text(text, span)),
Depth(1),
)) => {
assert_eq!(text, given);
assert_eq!(span, S1);
assert!(expected == false)

View File

@ -475,7 +475,12 @@ macro_rules! ele_parse {
Transition(RecoverEleIgnoreClosed_(qname, span)).incomplete()
},
(st, XirfToken::Text(RefinedText::Whitespace(..))) => {
// Depth check is unnecessary since _all_ xir::parse
// parsers
// (at least at the time of writing)
// ignore whitespace,
// so may as well return early.
(st, XirfToken::Text(RefinedText::Whitespace(..), _)) => {
Transition(st).incomplete()
}
@ -730,7 +735,12 @@ macro_rules! ele_parse {
};
match (self, tok) {
(st, XirfToken::Text(RefinedText::Whitespace(..))) => {
// Depth check is unnecessary since _all_ xir::parse
// parsers
// (at least at the time of writing)
// ignore whitespace,
// so may as well return early.
(st, XirfToken::Text(RefinedText::Whitespace(..), _)) => {
Transition(st).incomplete()
}

View File

@ -489,10 +489,10 @@ fn whitespace_ignored_between_elements() {
}
}
let tok_ws = XirfToken::Text(RefinedText::Whitespace(Whitespace(Text(
" ".unwrap_into(),
S1,
))));
let tok_ws = XirfToken::Text(
RefinedText::Whitespace(Whitespace(Text(" ".unwrap_into(), S1))),
Depth(0),
);
let toks = vec![
// Whitespace before start tag.
@ -702,10 +702,10 @@ fn child_error_and_recovery_at_close() {
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
// Let's mix it up a bit with some text and make sure that is
// ignored too.
XirfToken::Text(RefinedText::Unrefined(Text(
"unexpected text".unwrap_into(),
S5,
))),
XirfToken::Text(
RefinedText::Unrefined(Text("unexpected text".unwrap_into(), S5)),
Depth(1),
),
// Having recovered from the above tokens,
// this will end parsing for `Sut` as expected.
XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S6, N), Depth(0)),
@ -871,10 +871,10 @@ fn sum_nonterminal_accepts_whitespace() {
use Parsed::*;
use XirfToken::{Close, Open};
let tok_ws = XirfToken::Text(RefinedText::Whitespace(Whitespace(Text(
" ".unwrap_into(),
S1,
))));
let tok_ws = XirfToken::Text(
RefinedText::Whitespace(Whitespace(Text(" ".unwrap_into(), S1))),
Depth(0),
);
// Try each in turn with a fresh instance of `Sut`.
let toks = vec![