tamer: xir::Token::span: New method
This permits retrieving a Span from any Token variant. To support this, rather than having this return an Option, Token::AttrEnd was augmented with a Span; this results in a much simpler and friendlier API. DEV-11268main
parent
77c18d0615
commit
325c3167ee
|
@ -56,7 +56,7 @@ fn parses_package_attrs() {
|
|||
Token::AttrValue(raw::L_TRUE, DS),
|
||||
Token::AttrName(("preproc", "elig-class-yields").unwrap_into(), DS),
|
||||
Token::AttrValue(elig, DS),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DS),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
|
@ -84,7 +84,7 @@ fn parses_package_attrs_with_ns_prefix() {
|
|||
Token::Open(("lv", "package").unwrap_into(), DS),
|
||||
Token::AttrName("name".unwrap_into(), DS),
|
||||
Token::AttrValue(name, DS),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DS),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
|
|
|
@ -521,7 +521,15 @@ pub enum Token {
|
|||
/// but _writers must ignore it and not require it to be present_,
|
||||
/// allowing for the reduction of token counts for generated XIR in
|
||||
/// situations where we know that it will not be further parsed.
|
||||
AttrEnd,
|
||||
///
|
||||
/// The [`Span`] ought to be the final byte of the preceding attribute,
|
||||
/// and is required only so that we can guarantee an API that can
|
||||
/// produce a [`Span`] for any given [`Token`].
|
||||
/// The span position cannot be after the preceding attribute because,
|
||||
/// if attributes are parsed in isolation,
|
||||
/// the following byte is outside of the context that we are permitted
|
||||
/// to parse.
|
||||
AttrEnd(Span),
|
||||
|
||||
/// Comment node.
|
||||
Comment(SymbolId, Span),
|
||||
|
@ -567,7 +575,7 @@ impl Display for Token {
|
|||
Self::AttrValueFragment(attr_val, span) => {
|
||||
write!(f, "attribute value fragment `{}` at {}", attr_val, span)
|
||||
}
|
||||
Self::AttrEnd => write!(f, "end of attributes"),
|
||||
Self::AttrEnd(span) => write!(f, "end of attributes at {}", span),
|
||||
// TODO: Safe truncated comment.
|
||||
Self::Comment(_, span) => write!(f, "comment at {}", span),
|
||||
// TODO: Safe truncated text.
|
||||
|
@ -579,6 +587,34 @@ impl Display for Token {
|
|||
}
|
||||
}
|
||||
|
||||
impl Token {
|
||||
/// Retrieve the [`Span`] associated with a given [`Token`].
|
||||
///
|
||||
/// Every token has an associated span.
|
||||
pub fn span(&self) -> Span {
|
||||
use Token::*;
|
||||
|
||||
match self {
|
||||
Open(_, span)
|
||||
| Close(_, span)
|
||||
| AttrName(_, span)
|
||||
| AttrValue(_, span)
|
||||
| AttrValueFragment(_, span)
|
||||
| AttrEnd(span)
|
||||
| Comment(_, span)
|
||||
| Text(_, span)
|
||||
| CData(_, span)
|
||||
| Whitespace(_, span) => *span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Token> for Span {
|
||||
fn from(tok: Token) -> Self {
|
||||
tok.span()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
|
@ -229,7 +229,7 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
|
|||
// Indicate the end of attributes even if no attributes were output.
|
||||
// This allows for a reliable delimiter that can be used without
|
||||
// lookahead for streaming attribute parsing.
|
||||
tokbuf.push_front(Token::AttrEnd);
|
||||
tokbuf.push_front(Token::AttrEnd(DUMMY_SPAN));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ fn empty_node_without_prefix_or_attributes() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("empty-node".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -115,7 +115,7 @@ fn does_not_resolve_xmlns() {
|
|||
// Since we didn't parse @xmlns, it's still an attribute.
|
||||
Token::AttrName("xmlns".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrValue("noresolve:UNESC".intern(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -135,7 +135,7 @@ fn empty_node_with_prefix_without_attributes_unresolved() {
|
|||
Token::Open(("x", "empty-node").unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrName(("xmlns", "x").unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrValue("noresolve:UNESC".intern(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -174,7 +174,7 @@ fn multiple_attrs_ordered() {
|
|||
Token::AttrValue("b:UNESC".intern(), DUMMY_SPAN),
|
||||
Token::AttrName(("b", "baz").unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrValue("c:UNESC".intern(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -196,7 +196,7 @@ fn permits_duplicate_attrs() {
|
|||
Token::AttrValue("a:UNESC".intern(), DUMMY_SPAN),
|
||||
Token::AttrName("attr".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrValue("b:UNESC".intern(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
],
|
||||
);
|
||||
|
@ -212,9 +212,9 @@ fn child_node_self_closing() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -231,12 +231,12 @@ fn sibling_nodes() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -253,11 +253,11 @@ fn child_node_with_attrs() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrName("foo".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrValue("bar:UNESC".intern(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -274,7 +274,7 @@ fn child_text() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Text("foo bar:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -291,10 +291,10 @@ fn mixed_child_content() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("text".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Text("foo:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Open("em".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Text("bar:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("em".unwrap_into()), DUMMY_SPAN),
|
||||
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
|
||||
|
@ -322,10 +322,10 @@ fn mixed_child_content_with_newlines() {
|
|||
vec![
|
||||
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Text("\n :UNESC".into(), DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
|
@ -345,7 +345,7 @@ fn comment() {
|
|||
vec![
|
||||
Token::Comment("root".into(), DUMMY_SPAN),
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Comment("<child>".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
@ -367,7 +367,7 @@ lines-->
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("mult".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Comment("comment\non multiple\nlines".into(), DUMMY_SPAN),
|
||||
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
|
||||
Token::Close(Some("mult".unwrap_into()), DUMMY_SPAN),
|
||||
|
@ -386,9 +386,9 @@ fn permits_mismatched_tags() {
|
|||
result.expect("parsing failed"),
|
||||
vec![
|
||||
Token::Open("root".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Open("child".unwrap_into(), DUMMY_SPAN),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(DUMMY_SPAN),
|
||||
Token::Close(None, DUMMY_SPAN),
|
||||
Token::Close(Some("mismatch".unwrap_into()), DUMMY_SPAN),
|
||||
],
|
||||
|
|
|
@ -741,7 +741,7 @@ impl ParserState {
|
|||
Token::Close(name, span) => stack.close_element(name, span),
|
||||
Token::AttrName(name, span) => stack.open_attr(name, span),
|
||||
Token::AttrValue(value, span) => stack.close_attr(value, span),
|
||||
Token::AttrEnd => stack.end_attrs(),
|
||||
Token::AttrEnd(_) => stack.end_attrs(),
|
||||
Token::Text(value, span) => stack.text(value, span),
|
||||
|
||||
// This parse is being rewritten, so we'll address this with a
|
||||
|
|
|
@ -195,8 +195,12 @@ mod test {
|
|||
|
||||
// But we provide something else unexpected.
|
||||
assert_eq!(
|
||||
sut.parse_token(Token::AttrEnd),
|
||||
Err(AttrParseError::AttrValueExpected(attr, *S, Token::AttrEnd))
|
||||
sut.parse_token(Token::AttrEnd(*S2)),
|
||||
Err(AttrParseError::AttrValueExpected(
|
||||
attr,
|
||||
*S,
|
||||
Token::AttrEnd(*S2)
|
||||
))
|
||||
);
|
||||
|
||||
// We should not be in an accepting state,
|
||||
|
|
|
@ -282,7 +282,7 @@ pub mod test {
|
|||
use std::assert_matches::assert_matches;
|
||||
|
||||
use super::*;
|
||||
use crate::span::DUMMY_SPAN;
|
||||
use crate::span::DUMMY_SPAN as DS;
|
||||
|
||||
/// Preferred [`TokenStreamParser`].
|
||||
///
|
||||
|
@ -307,7 +307,7 @@ pub mod test {
|
|||
|
||||
fn parse_token(&mut self, tok: Token) -> TokenStreamStateResult<Self> {
|
||||
match tok {
|
||||
Token::AttrEnd => {
|
||||
Token::AttrEnd(..) => {
|
||||
*self = Self::Done;
|
||||
}
|
||||
Token::Close(..) => {
|
||||
|
@ -346,13 +346,13 @@ pub mod test {
|
|||
#[test]
|
||||
fn permits_end_of_stream_in_accepting_state() {
|
||||
// EchoState is placed into a Done state given AttrEnd.
|
||||
let mut toks = [Token::AttrEnd].into_iter();
|
||||
let mut toks = [Token::AttrEnd(DS)].into_iter();
|
||||
|
||||
let mut sut = Sut::from(&mut toks);
|
||||
|
||||
// The first token should be processed normally.
|
||||
// EchoState proxies the token back.
|
||||
assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd))), sut.next());
|
||||
assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd(DS)))), sut.next());
|
||||
|
||||
// This is now the end of the token stream,
|
||||
// which should be okay provided that the first token put us into
|
||||
|
@ -380,7 +380,7 @@ pub mod test {
|
|||
#[test]
|
||||
fn returns_state_specific_error() {
|
||||
// Token::Close causes EchoState to produce an error.
|
||||
let errtok = Token::Close(None, DUMMY_SPAN);
|
||||
let errtok = Token::Close(None, DS);
|
||||
let mut toks = [errtok.clone()].into_iter();
|
||||
|
||||
let mut sut = Sut::from(&mut toks);
|
||||
|
@ -403,7 +403,7 @@ pub mod test {
|
|||
fn fails_when_parser_is_finalized_in_non_accepting_state() {
|
||||
// Set up so that we have a single token that we can use for
|
||||
// recovery as part of the same iterator.
|
||||
let mut toks = [Token::AttrEnd].into_iter();
|
||||
let mut toks = [Token::AttrEnd(DS)].into_iter();
|
||||
|
||||
let sut = Sut::from(&mut toks);
|
||||
|
||||
|
@ -421,7 +421,7 @@ pub mod test {
|
|||
// `toks` above is set up already for this,
|
||||
// which allows us to assert that we received back the same `sut`.
|
||||
let mut sut = result.unwrap_err().0;
|
||||
assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd))), sut.next());
|
||||
assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd(DS)))), sut.next());
|
||||
|
||||
// And so we should now be in an accepting state,
|
||||
// able to finalize.
|
||||
|
|
|
@ -431,7 +431,7 @@ fn parse_attrs_isolated() {
|
|||
Token::AttrValue(val1, *S2),
|
||||
Token::AttrName(attr2, *S2),
|
||||
Token::AttrValue(val2, *S3),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(*S3),
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
|
@ -468,7 +468,7 @@ fn parser_attr_multiple() {
|
|||
Token::AttrValue(val1, *S2),
|
||||
Token::AttrName(attr2, *S2),
|
||||
Token::AttrValue(val2, *S3),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(*S3),
|
||||
// Token that we should _not_ hit.
|
||||
Token::Text("nohit".into(), *S),
|
||||
]
|
||||
|
|
|
@ -256,7 +256,7 @@ impl<S: Escaper> XmlWriter<S> for Token {
|
|||
}
|
||||
|
||||
// AttrEnd is ignored by the writer (and is optional).
|
||||
(Self::AttrEnd, x) => Ok(x),
|
||||
(Self::AttrEnd(..), x) => Ok(x),
|
||||
|
||||
// TODO: We have no way of knowing if text should be formatted
|
||||
// as CData,
|
||||
|
@ -501,7 +501,7 @@ mod test {
|
|||
// just ignore it entirely.
|
||||
#[test]
|
||||
fn ignores_attr_end() -> TestResult {
|
||||
let result = Token::AttrEnd
|
||||
let result = Token::AttrEnd(*S)
|
||||
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
|
||||
assert_eq!(result.0, b"");
|
||||
assert_eq!(result.1, WriterState::NodeOpen);
|
||||
|
@ -573,7 +573,7 @@ mod test {
|
|||
Token::Open(root, *S),
|
||||
Token::AttrName(("an", "attr").try_into()?, *S),
|
||||
Token::AttrValue("value".intern(), *S),
|
||||
Token::AttrEnd,
|
||||
Token::AttrEnd(*S),
|
||||
Token::Text("text".intern(), *S),
|
||||
Token::Open(("c", "child").try_into()?, *S),
|
||||
Token::Whitespace(" ".try_into()?, *S),
|
||||
|
|
Loading…
Reference in New Issue