tamer: xir::flat: Expose depth for all node-related tokens
Previously a `Depth` was provided only for `Open` and `Close`. This depth information, for example, will be used by NIR to quickly determine whether a given parser ought to assert ownership of a text/comment token rather than delegating it. This involved modifying a number of test cases, but it's worth repeating in these commits that this is intentional---I've been bit in the past using `..` in contexts where I really do want to know if variant fields change so that I can consider whether and how that change may affect the code utilizing that variant. DEV-7145main
parent
b3c0bdc786
commit
8779abe2bb
|
@ -775,7 +775,7 @@ impl ParseState for FragmentsState {
|
||||||
(FragmentUnnamed(span), _) => Transition(FragmentUnnamed(span))
|
(FragmentUnnamed(span), _) => Transition(FragmentUnnamed(span))
|
||||||
.err(XmloError::UnassociatedFragment(span)),
|
.err(XmloError::UnassociatedFragment(span)),
|
||||||
|
|
||||||
(Fragment(span, id), Xirf::Text(Text(text, _))) => {
|
(Fragment(span, id), Xirf::Text(Text(text, _), _)) => {
|
||||||
Transition(FragmentDone(span, id))
|
Transition(FragmentDone(span, id))
|
||||||
.ok(XmloToken::Fragment(id, text, span))
|
.ok(XmloToken::Fragment(id, text, span))
|
||||||
}
|
}
|
||||||
|
|
|
@ -531,12 +531,12 @@ fn sym_fragment_event() {
|
||||||
// first
|
// first
|
||||||
open(QN_FRAGMENT, S1, Depth(0)),
|
open(QN_FRAGMENT, S1, Depth(0)),
|
||||||
Xirf::Attr(Attr(QN_ID, id1, AttrSpan(S2, S3))),
|
Xirf::Attr(Attr(QN_ID, id1, AttrSpan(S2, S3))),
|
||||||
Xirf::Text(Text(frag1, S4)),
|
Xirf::Text(Text(frag1, S4), Depth(1)),
|
||||||
close(Some(QN_FRAGMENT), S5, Depth(0)),
|
close(Some(QN_FRAGMENT), S5, Depth(0)),
|
||||||
// second
|
// second
|
||||||
open(QN_FRAGMENT, S2, Depth(0)),
|
open(QN_FRAGMENT, S2, Depth(0)),
|
||||||
Xirf::Attr(Attr(QN_ID, id2, AttrSpan(S3, S4))),
|
Xirf::Attr(Attr(QN_ID, id2, AttrSpan(S3, S4))),
|
||||||
Xirf::Text(Text(frag2, S5)),
|
Xirf::Text(Text(frag2, S5), Depth(1)),
|
||||||
close(Some(QN_FRAGMENT), S5, Depth(0)),
|
close(Some(QN_FRAGMENT), S5, Depth(0)),
|
||||||
]
|
]
|
||||||
.into_iter();
|
.into_iter();
|
||||||
|
@ -561,7 +561,7 @@ fn sym_fragment_missing_id() {
|
||||||
let toks = [
|
let toks = [
|
||||||
open(QN_FRAGMENT, S1, Depth(0)),
|
open(QN_FRAGMENT, S1, Depth(0)),
|
||||||
// missing @id
|
// missing @id
|
||||||
Xirf::Text(Text("text".into(), S4)),
|
Xirf::Text(Text("text".into(), S4), Depth(1)),
|
||||||
]
|
]
|
||||||
.into_iter();
|
.into_iter();
|
||||||
|
|
||||||
|
@ -579,7 +579,7 @@ fn sym_fragment_empty_id() {
|
||||||
open(QN_FRAGMENT, S1, Depth(0)),
|
open(QN_FRAGMENT, S1, Depth(0)),
|
||||||
// empty @id
|
// empty @id
|
||||||
Xirf::Attr(Attr(QN_ID, "".into(), AttrSpan(S3, S4))),
|
Xirf::Attr(Attr(QN_ID, "".into(), AttrSpan(S3, S4))),
|
||||||
Xirf::Text(Text("text".into(), S4)),
|
Xirf::Text(Text("text".into(), S4), Depth(1)),
|
||||||
]
|
]
|
||||||
.into_iter();
|
.into_iter();
|
||||||
|
|
||||||
|
@ -649,7 +649,7 @@ fn xmlo_composite_parsers_header() {
|
||||||
// <preproc:fragment
|
// <preproc:fragment
|
||||||
open(QN_FRAGMENT, S4, Depth(2)),
|
open(QN_FRAGMENT, S4, Depth(2)),
|
||||||
Xirf::Attr(Attr(QN_ID, symfrag_id, AttrSpan(S2, S3))),
|
Xirf::Attr(Attr(QN_ID, symfrag_id, AttrSpan(S2, S3))),
|
||||||
Xirf::Text(Text(frag, S5)),
|
Xirf::Text(Text(frag, S5), Depth(3)),
|
||||||
close(Some(QN_FRAGMENT), S4, Depth(2)),
|
close(Some(QN_FRAGMENT), S4, Depth(2)),
|
||||||
// </preproc:fragment>
|
// </preproc:fragment>
|
||||||
close(Some(QN_FRAGMENTS), S3, Depth(1)),
|
close(Some(QN_FRAGMENTS), S3, Depth(1)),
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
//!
|
//!
|
||||||
//! 1. All closing tags must correspond to a matching opening tag at the
|
//! 1. All closing tags must correspond to a matching opening tag at the
|
||||||
//! same depth;
|
//! same depth;
|
||||||
//! 2. [`XirfToken`] exposes the [`Depth`] of each opening/closing tag;
|
//! 2. [`XirfToken`] exposes the [`Depth`] of each node-related token;
|
||||||
//! 3. Attribute tokens are parsed into [`Attr`] objects;
|
//! 3. Attribute tokens are parsed into [`Attr`] objects;
|
||||||
//! 4. Documents must begin with an element and end with the closing of
|
//! 4. Documents must begin with an element and end with the closing of
|
||||||
//! that element;
|
//! that element;
|
||||||
|
@ -80,6 +80,11 @@ impl Display for Depth {
|
||||||
/// Other objects retain the same format as their underlying token,
|
/// Other objects retain the same format as their underlying token,
|
||||||
/// but are still validated to ensure that they are well-formed and that
|
/// but are still validated to ensure that they are well-formed and that
|
||||||
/// the XML is well-structured.
|
/// the XML is well-structured.
|
||||||
|
///
|
||||||
|
/// Each token representing a child node contains a numeric [`Depth`]
|
||||||
|
/// indicating the nesting depth;
|
||||||
|
/// this can be used by downstream parsers to avoid maintaining their
|
||||||
|
/// own stack in certain cases.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum XirfToken<T: TextType> {
|
pub enum XirfToken<T: TextType> {
|
||||||
/// Opening tag of an element.
|
/// Opening tag of an element.
|
||||||
|
@ -101,12 +106,12 @@ pub enum XirfToken<T: TextType> {
|
||||||
Attr(Attr),
|
Attr(Attr),
|
||||||
|
|
||||||
/// Comment node.
|
/// Comment node.
|
||||||
Comment(SymbolId, Span),
|
Comment(SymbolId, Span, Depth),
|
||||||
|
|
||||||
/// Character data as part of an element.
|
/// Character data as part of an element.
|
||||||
///
|
///
|
||||||
/// See also [`CData`](XirfToken::CData) variant.
|
/// See also [`CData`](XirfToken::CData) variant.
|
||||||
Text(T),
|
Text(T, Depth),
|
||||||
|
|
||||||
/// CData node (`<![CDATA[...]]>`).
|
/// CData node (`<![CDATA[...]]>`).
|
||||||
///
|
///
|
||||||
|
@ -115,7 +120,7 @@ pub enum XirfToken<T: TextType> {
|
||||||
/// This is intended for reading existing XML data where CData is
|
/// This is intended for reading existing XML data where CData is
|
||||||
/// already present,
|
/// already present,
|
||||||
/// not for producing new CData safely!
|
/// not for producing new CData safely!
|
||||||
CData(SymbolId, Span),
|
CData(SymbolId, Span, Depth),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: TextType> Token for XirfToken<T> {
|
impl<T: TextType> Token for XirfToken<T> {
|
||||||
|
@ -129,10 +134,10 @@ impl<T: TextType> Token for XirfToken<T> {
|
||||||
match self {
|
match self {
|
||||||
Open(_, OpenSpan(span, _), _)
|
Open(_, OpenSpan(span, _), _)
|
||||||
| Close(_, CloseSpan(span, _), _)
|
| Close(_, CloseSpan(span, _), _)
|
||||||
| Comment(_, span)
|
| Comment(_, span, _)
|
||||||
| CData(_, span) => *span,
|
| CData(_, span, _) => *span,
|
||||||
|
|
||||||
Text(text) => text.span(),
|
Text(text, _) => text.span(),
|
||||||
Attr(attr) => attr.span(),
|
Attr(attr) => attr.span(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -152,11 +157,13 @@ impl<T: TextType> Display for XirfToken<T> {
|
||||||
Display::fmt(&XirToken::Close(*oqname, *span), f)
|
Display::fmt(&XirToken::Close(*oqname, *span), f)
|
||||||
}
|
}
|
||||||
Attr(attr) => Display::fmt(&attr, f),
|
Attr(attr) => Display::fmt(&attr, f),
|
||||||
Comment(sym, span) => {
|
Comment(sym, span, _) => {
|
||||||
Display::fmt(&XirToken::Comment(*sym, *span), f)
|
Display::fmt(&XirToken::Comment(*sym, *span), f)
|
||||||
}
|
}
|
||||||
Text(text) => Display::fmt(text, f),
|
Text(text, _) => Display::fmt(text, f),
|
||||||
CData(sym, span) => Display::fmt(&XirToken::CData(*sym, *span), f),
|
CData(sym, span, _) => {
|
||||||
|
Display::fmt(&XirToken::CData(*sym, *span), f)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -364,7 +371,8 @@ where
|
||||||
match (self, tok) {
|
match (self, tok) {
|
||||||
// Comments are permitted before and after the first root element.
|
// Comments are permitted before and after the first root element.
|
||||||
(st @ (PreRoot(_) | Done), XirToken::Comment(sym, span)) => {
|
(st @ (PreRoot(_) | Done), XirToken::Comment(sym, span)) => {
|
||||||
Transition(st).ok(XirfToken::Comment(sym, span))
|
let depth = Depth(stack.len());
|
||||||
|
Transition(st).ok(XirfToken::Comment(sym, span, depth))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ignore whitespace before or after root.
|
// Ignore whitespace before or after root.
|
||||||
|
@ -438,6 +446,8 @@ where
|
||||||
) -> TransitionResult<Self> {
|
) -> TransitionResult<Self> {
|
||||||
use XirToXirf::{AttrExpected, Done, NodeExpected};
|
use XirToXirf::{AttrExpected, Done, NodeExpected};
|
||||||
|
|
||||||
|
let depth = Depth(stack.len());
|
||||||
|
|
||||||
match tok {
|
match tok {
|
||||||
XirToken::Open(qname, span) if stack.len() == MAX_DEPTH => {
|
XirToken::Open(qname, span) if stack.len() == MAX_DEPTH => {
|
||||||
Transition(NodeExpected).err(XirToXirfError::MaxDepthExceeded {
|
Transition(NodeExpected).err(XirToXirfError::MaxDepthExceeded {
|
||||||
|
@ -447,15 +457,11 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
XirToken::Open(qname, span) => {
|
XirToken::Open(qname, span) => {
|
||||||
let depth = stack.len();
|
|
||||||
stack.push((qname, span.tag_span()));
|
stack.push((qname, span.tag_span()));
|
||||||
|
|
||||||
// Delegate to the attribute parser until it is complete.
|
// Delegate to the attribute parser until it is complete.
|
||||||
Transition(AttrExpected(SA::default())).ok(XirfToken::Open(
|
Transition(AttrExpected(SA::default()))
|
||||||
qname,
|
.ok(XirfToken::Open(qname, span, depth))
|
||||||
span,
|
|
||||||
Depth(depth),
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
XirToken::Close(close_oqname, close_span) => {
|
XirToken::Close(close_oqname, close_span) => {
|
||||||
|
@ -490,13 +496,14 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
XirToken::Comment(sym, span) => {
|
XirToken::Comment(sym, span) => Transition(NodeExpected)
|
||||||
Transition(NodeExpected).ok(XirfToken::Comment(sym, span))
|
.ok(XirfToken::Comment(sym, span, depth)),
|
||||||
}
|
|
||||||
XirToken::Text(sym, span) => Transition(NodeExpected)
|
XirToken::Text(sym, span) => Transition(NodeExpected)
|
||||||
.ok(XirfToken::Text(T::from(Text(sym, span)))),
|
.ok(XirfToken::Text(T::from(Text(sym, span)), depth)),
|
||||||
|
|
||||||
XirToken::CData(sym, span) => {
|
XirToken::CData(sym, span) => {
|
||||||
Transition(NodeExpected).ok(XirfToken::CData(sym, span))
|
Transition(NodeExpected).ok(XirfToken::CData(sym, span, depth))
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should transition to `State::Attr` before encountering any
|
// We should transition to `State::Attr` before encountering any
|
||||||
|
|
|
@ -393,7 +393,7 @@ fn element_with_text() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Ok(vec![
|
Ok(vec![
|
||||||
Parsed::Object(open(parent, S1, Depth(0))),
|
Parsed::Object(open(parent, S1, Depth(0))),
|
||||||
Parsed::Object(XirfToken::Text(Text(text, S2))),
|
Parsed::Object(XirfToken::Text(Text(text, S2), Depth(1))),
|
||||||
Parsed::Object(close(Some(parent), S3, Depth(0))),
|
Parsed::Object(close(Some(parent), S3, Depth(0))),
|
||||||
]),
|
]),
|
||||||
sut.collect(),
|
sut.collect(),
|
||||||
|
@ -435,10 +435,10 @@ fn comment_before_or_after_root_ok() {
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Ok(vec![
|
Ok(vec![
|
||||||
Parsed::Object(XirfToken::Comment(cstart, S1)),
|
Parsed::Object(XirfToken::Comment(cstart, S1, Depth(0))),
|
||||||
Parsed::Object(open(name, S2, Depth(0))),
|
Parsed::Object(open(name, S2, Depth(0))),
|
||||||
Parsed::Object(close_empty(S3, Depth(0))),
|
Parsed::Object(close_empty(S3, Depth(0))),
|
||||||
Parsed::Object(XirfToken::Comment(cend, S4)),
|
Parsed::Object(XirfToken::Comment(cend, S4, Depth(0))),
|
||||||
]),
|
]),
|
||||||
sut.collect(),
|
sut.collect(),
|
||||||
);
|
);
|
||||||
|
@ -541,18 +541,19 @@ fn whitespace_refinement() {
|
||||||
let _ = sut.next(); // discard root
|
let _ = sut.next(); // discard root
|
||||||
|
|
||||||
match sut.next().unwrap().unwrap() {
|
match sut.next().unwrap().unwrap() {
|
||||||
Parsed::Object(XirfToken::Text(RefinedText::Whitespace(
|
Parsed::Object(XirfToken::Text(
|
||||||
Whitespace(Text(ws, span)),
|
RefinedText::Whitespace(Whitespace(Text(ws, span))),
|
||||||
))) => {
|
Depth(1),
|
||||||
|
)) => {
|
||||||
assert_eq!(ws, given);
|
assert_eq!(ws, given);
|
||||||
assert_eq!(span, S1);
|
assert_eq!(span, S1);
|
||||||
assert!(expected == true)
|
assert!(expected == true)
|
||||||
}
|
}
|
||||||
|
|
||||||
Parsed::Object(XirfToken::Text(RefinedText::Unrefined(Text(
|
Parsed::Object(XirfToken::Text(
|
||||||
text,
|
RefinedText::Unrefined(Text(text, span)),
|
||||||
span,
|
Depth(1),
|
||||||
)))) => {
|
)) => {
|
||||||
assert_eq!(text, given);
|
assert_eq!(text, given);
|
||||||
assert_eq!(span, S1);
|
assert_eq!(span, S1);
|
||||||
assert!(expected == false)
|
assert!(expected == false)
|
||||||
|
|
|
@ -475,7 +475,12 @@ macro_rules! ele_parse {
|
||||||
Transition(RecoverEleIgnoreClosed_(qname, span)).incomplete()
|
Transition(RecoverEleIgnoreClosed_(qname, span)).incomplete()
|
||||||
},
|
},
|
||||||
|
|
||||||
(st, XirfToken::Text(RefinedText::Whitespace(..))) => {
|
// Depth check is unnecessary since _all_ xir::parse
|
||||||
|
// parsers
|
||||||
|
// (at least at the time of writing)
|
||||||
|
// ignore whitespace,
|
||||||
|
// so may as well return early.
|
||||||
|
(st, XirfToken::Text(RefinedText::Whitespace(..), _)) => {
|
||||||
Transition(st).incomplete()
|
Transition(st).incomplete()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -730,7 +735,12 @@ macro_rules! ele_parse {
|
||||||
};
|
};
|
||||||
|
|
||||||
match (self, tok) {
|
match (self, tok) {
|
||||||
(st, XirfToken::Text(RefinedText::Whitespace(..))) => {
|
// Depth check is unnecessary since _all_ xir::parse
|
||||||
|
// parsers
|
||||||
|
// (at least at the time of writing)
|
||||||
|
// ignore whitespace,
|
||||||
|
// so may as well return early.
|
||||||
|
(st, XirfToken::Text(RefinedText::Whitespace(..), _)) => {
|
||||||
Transition(st).incomplete()
|
Transition(st).incomplete()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -489,10 +489,10 @@ fn whitespace_ignored_between_elements() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let tok_ws = XirfToken::Text(RefinedText::Whitespace(Whitespace(Text(
|
let tok_ws = XirfToken::Text(
|
||||||
" ".unwrap_into(),
|
RefinedText::Whitespace(Whitespace(Text(" ".unwrap_into(), S1))),
|
||||||
S1,
|
Depth(0),
|
||||||
))));
|
);
|
||||||
|
|
||||||
let toks = vec![
|
let toks = vec![
|
||||||
// Whitespace before start tag.
|
// Whitespace before start tag.
|
||||||
|
@ -702,10 +702,10 @@ fn child_error_and_recovery_at_close() {
|
||||||
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
|
XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)),
|
||||||
// Let's mix it up a bit with some text and make sure that is
|
// Let's mix it up a bit with some text and make sure that is
|
||||||
// ignored too.
|
// ignored too.
|
||||||
XirfToken::Text(RefinedText::Unrefined(Text(
|
XirfToken::Text(
|
||||||
"unexpected text".unwrap_into(),
|
RefinedText::Unrefined(Text("unexpected text".unwrap_into(), S5)),
|
||||||
S5,
|
Depth(1),
|
||||||
))),
|
),
|
||||||
// Having recovered from the above tokens,
|
// Having recovered from the above tokens,
|
||||||
// this will end parsing for `Sut` as expected.
|
// this will end parsing for `Sut` as expected.
|
||||||
XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S6, N), Depth(0)),
|
XirfToken::Close(Some(QN_PACKAGE), CloseSpan(S6, N), Depth(0)),
|
||||||
|
@ -871,10 +871,10 @@ fn sum_nonterminal_accepts_whitespace() {
|
||||||
use Parsed::*;
|
use Parsed::*;
|
||||||
use XirfToken::{Close, Open};
|
use XirfToken::{Close, Open};
|
||||||
|
|
||||||
let tok_ws = XirfToken::Text(RefinedText::Whitespace(Whitespace(Text(
|
let tok_ws = XirfToken::Text(
|
||||||
" ".unwrap_into(),
|
RefinedText::Whitespace(Whitespace(Text(" ".unwrap_into(), S1))),
|
||||||
S1,
|
Depth(0),
|
||||||
))));
|
);
|
||||||
|
|
||||||
// Try each in turn with a fresh instance of `Sut`.
|
// Try each in turn with a fresh instance of `Sut`.
|
||||||
let toks = vec![
|
let toks = vec![
|
||||||
|
|
Loading…
Reference in New Issue