tamer: xir::flat: Expose depth for all node-related tokens
Previously a `Depth` was provided only for `Open` and `Close`. This depth information, for example, will be used by NIR to quickly determine whether a given parser ought to assert ownership of a text/comment token rather than delegating it. This involved modifying a number of test cases, but it's worth repeating in these commits that this is intentional---I've been bit in the past using `..` in contexts where I really do want to know if variant fields change so that I can consider whether and how that change may affect the code utilizing that variant. DEV-7145main
parent
b3c0bdc786
commit
8779abe2bb
|
@ -775,7 +775,7 @@ impl ParseState for FragmentsState {
|
|||
(FragmentUnnamed(span), _) => Transition(FragmentUnnamed(span))
|
||||
.err(XmloError::UnassociatedFragment(span)),
|
||||
|
||||
(Fragment(span, id), Xirf::Text(Text(text, _))) => {
|
||||
(Fragment(span, id), Xirf::Text(Text(text, _), _)) => {
|
||||
Transition(FragmentDone(span, id))
|
||||
.ok(XmloToken::Fragment(id, text, span))
|
||||
}
|
||||
|
|
|
@ -531,12 +531,12 @@ fn sym_fragment_event() {
|
|||
// first
|
||||
open(QN_FRAGMENT, S1, Depth(0)),
|
||||
Xirf::Attr(Attr(QN_ID, id1, AttrSpan(S2, S3))),
|
||||
Xirf::Text(Text(frag1, S4)),
|
||||
Xirf::Text(Text(frag1, S4), Depth(1)),
|
||||
close(Some(QN_FRAGMENT), S5, Depth(0)),
|
||||
// second
|
||||
open(QN_FRAGMENT, S2, Depth(0)),
|
||||
Xirf::Attr(Attr(QN_ID, id2, AttrSpan(S3, S4))),
|
||||
Xirf::Text(Text(frag2, S5)),
|
||||
Xirf::Text(Text(frag2, S5), Depth(1)),
|
||||
close(Some(QN_FRAGMENT), S5, Depth(0)),
|
||||
]
|
||||
.into_iter();
|
||||
|
@ -561,7 +561,7 @@ fn sym_fragment_missing_id() {
|
|||
let toks = [
|
||||
open(QN_FRAGMENT, S1, Depth(0)),
|
||||
// missing @id
|
||||
Xirf::Text(Text("text".into(), S4)),
|
||||
Xirf::Text(Text("text".into(), S4), Depth(1)),
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
|
@ -579,7 +579,7 @@ fn sym_fragment_empty_id() {
|
|||
open(QN_FRAGMENT, S1, Depth(0)),
|
||||
// empty @id
|
||||
Xirf::Attr(Attr(QN_ID, "".into(), AttrSpan(S3, S4))),
|
||||
Xirf::Text(Text("text".into(), S4)),
|
||||
Xirf::Text(Text("text".into(), S4), Depth(1)),
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
|
@ -649,7 +649,7 @@ fn xmlo_composite_parsers_header() {
|
|||
// <preproc:fragment
|
||||
open(QN_FRAGMENT, S4, Depth(2)),
|
||||
Xirf::Attr(Attr(QN_ID, symfrag_id, AttrSpan(S2, S3))),
|
||||
Xirf::Text(Text(frag, S5)),
|
||||
Xirf::Text(Text(frag, S5), Depth(3)),
|
||||
close(Some(QN_FRAGMENT), S4, Depth(2)),
|
||||
// </preproc:fragment>
|
||||
close(Some(QN_FRAGMENTS), S3, Depth(1)),
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
//!
|
||||
//! 1. All closing tags must correspond to a matching opening tag at the
|
||||
//! same depth;
|
||||
//! 2. [`XirfToken`] exposes the [`Depth`] of each opening/closing tag;
|
||||
//! 2. [`XirfToken`] exposes the [`Depth`] of each node-related token;
|
||||
//! 3. Attribute tokens are parsed into [`Attr`] objects;
|
||||
//! 4. Documents must begin with an element and end with the closing of
|
||||
//! that element;
|
||||
|
@ -80,6 +80,11 @@ impl Display for Depth {
|
|||
/// Other objects retain the same format as their underlying token,
|
||||
/// but are still validated to ensure that they are well-formed and that
|
||||
/// the XML is well-structured.
|
||||
///
|
||||
/// Each token representing a child node contains a numeric [`Depth`]
|
||||
/// indicating the nesting depth;
|
||||
/// this can be used by downstream parsers to avoid maintaining their
|
||||
/// own stack in certain cases.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum XirfToken<T: TextType> {
|
||||
/// Opening tag of an element.
|
||||
|
@ -101,12 +106,12 @@ pub enum XirfToken<T: TextType> {
|
|||
Attr(Attr),
|
||||
|
||||
/// Comment node.
|
||||
Comment(SymbolId, Span),
|
||||
Comment(SymbolId, Span, Depth),
|
||||
|
||||
/// Character data as part of an element.
|
||||
///
|
||||
/// See also [`CData`](XirfToken::CData) variant.
|
||||
Text(T),
|
||||
Text(T, Depth),
|
||||
|
||||
/// CData node (`<![CDATA[...]]>`).
|
||||
///
|
||||
|
@ -115,7 +120,7 @@ pub enum XirfToken<T: TextType> {
|
|||
/// This is intended for reading existing XML data where CData is
|
||||
/// already present,
|
||||
/// not for producing new CData safely!
|
||||
CData(SymbolId, Span),
|
||||
CData(SymbolId, Span, Depth),
|
||||
}
|
||||
|
||||
impl<T: TextType> Token for XirfToken<T> {
|
||||
|
@ -129,10 +134,10 @@ impl<T: TextType> Token for XirfToken<T> {
|
|||
match self {
|
||||
Open(_, OpenSpan(span, _), _)
|
||||
| Close(_, CloseSpan(span, _), _)
|
||||
| Comment(_, span)
|
||||
| CData(_, span) => *span,
|
||||
| Comment(_, span, _)
|
||||
| CData(_, span, _) => *span,
|
||||
|
||||
Text(text) => text.span(),
|
||||
Text(text, _) => text.span(),
|
||||
Attr(attr) => attr.span(),
|
||||
}
|
||||
}
|
||||
|
@ -152,11 +157,13 @@ impl<T: TextType> Display for XirfToken<T> {
|
|||
Display::fmt(&XirToken::Close(*oqname, *span), f)
|
||||
}
|
||||
Attr(attr) => Display::fmt(&attr, f),
|
||||
Comment(sym, span) => {
|
||||
Comment(sym, span, _) => {
|
||||
Display::fmt(&XirToken::Comment(*sym, *span), f)
|
||||
}
|
||||
Text(text) => Display::fmt(text, f),
|
||||
CData(sym, span) => Display::fmt(&XirToken::CData(*sym, *span), f),
|
||||
Text(text, _) => Display::fmt(text, f),
|
||||
CData(sym, span, _) => {
|
||||
Display::fmt(&XirToken::CData(*sym, *span), f)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -364,7 +371,8 @@ where
|
|||
match (self, tok) {
|
||||
// Comments are permitted before and after the first root element.
|
||||
(st @ (PreRoot(_) | Done), XirToken::Comment(sym, span)) => {
|
||||
Transition(st).ok(XirfToken::Comment(sym, span))
|
||||
let depth = Depth(stack.len());
|
||||
Transition(st).ok(XirfToken::Comment(sym, span, depth))
|
||||
}
|
||||
|
||||
// Ignore whitespace before or after root.
|
||||
|
@ -438,6 +446,8 @@ where
|
|||
) -> TransitionResult<Self> {
|
||||
use XirToXirf::{AttrExpected, Done, NodeExpected};
|
||||
|
||||
let depth = Depth(stack.len());
|
||||
|
||||
match tok {
|
||||
XirToken::Open(qname, span) if stack.len() == MAX_DEPTH => {
|
||||
Transition(NodeExpected).err(XirToXirfError::MaxDepthExceeded {
|
||||
|
@ -447,15 +457,11 @@ where
|
|||
}
|
||||
|
||||
XirToken::Open(qname, span) => {
|
||||
let depth = stack.len();
|
||||
stack.push((qname, span.tag_span()));
|
||||
|
||||
// Delegate to the attribute parser until it is complete.
|
||||
Transition(AttrExpected(SA::default())).ok(XirfToken::Open(
|
||||
qname,
|
||||
span,
|
||||
Depth(depth),
|
||||
))
|
||||
Transition(AttrExpected(SA::default()))
|
||||
.ok(XirfToken::Open(qname, span, depth))
|
||||
}
|
||||
|
||||
XirToken::Close(close_oqname, close_span) => {
|
||||
|
@ -490,13 +496,14 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
XirToken::Comment(sym, span) => {
|
||||
Transition(NodeExpected).ok(XirfToken::Comment(sym, span))
|
||||
}
|
||||
XirToken::Comment(sym, span) => Transition(NodeExpected)
|
||||
.ok(XirfToken::Comment(sym, span, depth)),
|
||||
|
||||
XirToken::Text(sym, span) => Transition(NodeExpected)
|
||||
.ok(XirfToken::Text(T::from(Text(sym, span)))),
|
||||
.ok(XirfToken::Text(T::from(Text(sym, span)), depth)),
|
||||
|
||||
XirToken::CData(sym, span) => {
|
||||
Transition(NodeExpected).ok(XirfToken::CData(sym, span))
|
||||
Transition(NodeExpected).ok(XirfToken::CData(sym, span, depth))
|
||||
}
|
||||
|
||||
// We should transition to `State::Attr` before encountering any
|
||||
|
|
|
@ -393,7 +393,7 @@ fn element_with_text() {
|
|||
assert_eq!(
|
||||
Ok(vec![
|
||||
Parsed::Object(open(parent, S1, Depth(0))),
|
||||
Parsed::Object(XirfToken::Text(Text(text, S2))),
|
||||
Parsed::Object(XirfToken::Text(Text(text, S2), Depth(1))),
|
||||
Parsed::Object(close(Some(parent), S3, Depth(0))),
|
||||
]),
|
||||
sut.collect(),
|
||||
|
@ -435,10 +435,10 @@ fn comment_before_or_after_root_ok() {
|
|||
|
||||
assert_eq!(
|
||||
Ok(vec![
|
||||
Parsed::Object(XirfToken::Comment(cstart, S1)),
|
||||
Parsed::Object(XirfToken::Comment(cstart, S1, Depth(0))),
|
||||
Parsed::Object(open(name, S2, Depth(0))),
|
||||
Parsed::Object(close_empty(S3, Depth(0))),
|
||||
Parsed::Object(XirfToken::Comment(cend, S4)),
|
||||
Parsed::Object(XirfToken::Comment(cend, S4, Depth(0))),
|
||||
]),
|
||||
sut.collect(),
|
||||
);
|
||||
|
@ -541,18 +541,19 @@ fn whitespace_refinement() {
|
|||
let _ = sut.next(); // discard root
|
||||
|
||||
match sut.next().unwrap().unwrap() {
|
||||
Parsed::Object(XirfToken::Text(RefinedText::Whitespace(
|
||||
Whitespace(Text(ws, span)),
|
||||
))) => {
|
||||
Parsed::Object(XirfToken::Text(
|
||||
RefinedText::Whitespace(Whitespace(Text(ws, span))),
|
||||
Depth(1),
|
||||
)) => {
|
||||
assert_eq!(ws, given);
|
||||
assert_eq!(span, S1);
|
||||
assert!(expected == true)
|
||||
}
|
||||
|
||||
Parsed::Object(XirfToken::Text(RefinedText::Unrefined(Text(
|
||||
text,
|
||||
span,
|
||||
)))) => {
|
||||
Parsed::Object(XirfToken::Text(
|
||||
RefinedText::Unrefined(Text(text, span)),
|
||||
Depth(1),
|
||||
)) => {
|
||||
assert_eq!(text, given);
|
||||
assert_eq!(span, S1);
|
||||
assert!(expected == false)
|
||||
|
|
|
@ -475,7 +475,12 @@ macro_rules! ele_parse {
|
|||
Transition(RecoverEleIgnoreClosed_(qname, span)).incomplete()
|
||||
},
|
||||
|
||||
(st, XirfToken::Text(RefinedText::Whitespace(..))) => {
|
||||
// Depth check is unnecessary since _all_ xir::parse
|
||||
// parsers
|
||||
// (at least at the time of writing)
|
||||
// ignore whitespace,
|
||||
// so may as well return early.
|
||||
(st, XirfToken::Text(RefinedText::Whitespace(..), _)) => {
|
||||
Transition(st).incomplete()
|
||||
}
|
||||
|
||||
|
@ -730,7 +735,12 @@ macro_rules! ele_parse {
|
|||
};
|
||||
|
||||
match (self, tok) {
|
||||
(st, XirfToken::Text(RefinedText::Whitespace(..))) => {
|
||||
// Depth check is unnecessary since _all_ xir::parse
|
||||
// parsers
|
||||
// (at least at the time of writing)
|
||||
// ignore whitespace,
|
||||
// so may as well return early.
|
||||
(st, XirfToken::Text(RefinedText::Whitespace(..), _)) => {
|
||||
Transition(st).incomplete()
|
||||
}
|
||||
|
||||
|
|
|
@ -489,10 +489,10 @@ fn whitespace_ignored_between_elements() {
|
|||
}
|
||||
}
|
||||
|
||||
let tok_ws = XirfToken::Text(RefinedText::Whitespace(Whitespace(Text(
|
||||
" ".unwrap_into(),
|
||||
S1,
|
||||
))));
|
||||
let tok_ws = XirfToken::Text(
|
||||
RefinedText::Whitespace(Whitespace(Text(" ".unwrap_into(), S1))),
|
||||
Depth(0),
|
||||
);
|
||||
|
||||
let toks = vec![
|
||||
// Whitespace before start tag.
|
||||
|
@ -702,10 +702,10 @@ fn child_error_and_recovery_at_close() {
|
|||
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
|
||||
// Let's mix it up a bit with some text and make sure that is
|
||||
// ignored too.
|
||||
XirfToken::Text(RefinedText::Unrefined(Text(
|
||||
"unexpected text".unwrap_into(),
|
||||
S5,
|
||||
))),
|
||||
XirfToken::Text(
|
||||
RefinedText::Unrefined(Text("unexpected text".unwrap_into(), S5)),
|
||||
Depth(1),
|
||||
),
|
||||
// Having recovered from the above tokens,
|
||||
// this will end parsing for `Sut` as expected.
|
||||
XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S6, N), Depth(0)),
|
||||
|
@ -871,10 +871,10 @@ fn sum_nonterminal_accepts_whitespace() {
|
|||
use Parsed::*;
|
||||
use XirfToken::{Close, Open};
|
||||
|
||||
let tok_ws = XirfToken::Text(RefinedText::Whitespace(Whitespace(Text(
|
||||
" ".unwrap_into(),
|
||||
S1,
|
||||
))));
|
||||
let tok_ws = XirfToken::Text(
|
||||
RefinedText::Whitespace(Whitespace(Text(" ".unwrap_into(), S1))),
|
||||
Depth(0),
|
||||
);
|
||||
|
||||
// Try each in turn with a fresh instance of `Sut`.
|
||||
let toks = vec![
|
||||
|
|
Loading…
Reference in New Issue