tamer: xir::Token::span: New method

This permits retrieving a Span from any Token variant.  To support this,
rather than having this return an Option, Token::AttrEnd was augmented with
a Span; this results in a much simpler and friendlier API.

DEV-11268
main
Mike Gerwitz 2021-12-06 14:48:55 -05:00
parent 77c18d0615
commit 325c3167ee
9 changed files with 81 additions and 41 deletions

View File

@ -56,7 +56,7 @@ fn parses_package_attrs() {
Token::AttrValue(raw::L_TRUE, DS),
Token::AttrName(("preproc", "elig-class-yields").unwrap_into(), DS),
Token::AttrValue(elig, DS),
Token::AttrEnd,
Token::AttrEnd(DS),
]
.into_iter(),
);
@ -84,7 +84,7 @@ fn parses_package_attrs_with_ns_prefix() {
Token::Open(("lv", "package").unwrap_into(), DS),
Token::AttrName("name".unwrap_into(), DS),
Token::AttrValue(name, DS),
Token::AttrEnd,
Token::AttrEnd(DS),
]
.into_iter(),
);

View File

@ -521,7 +521,15 @@ pub enum Token {
/// but _writers must ignore it and not require it to be present_,
/// allowing for the reduction of token counts for generated XIR in
/// situations where we know that it will not be further parsed.
AttrEnd,
///
/// The [`Span`] ought to be the final byte of the preceding attribute,
/// and is required only so that we can guarantee an API that can
/// produce a [`Span`] for any given [`Token`].
/// The span position cannot be after the preceding attribute because,
/// if attributes are parsed in isolation,
/// the following byte is outside of the context that we are permitted
/// to parse.
AttrEnd(Span),
/// Comment node.
Comment(SymbolId, Span),
@ -567,7 +575,7 @@ impl Display for Token {
Self::AttrValueFragment(attr_val, span) => {
write!(f, "attribute value fragment `{}` at {}", attr_val, span)
}
Self::AttrEnd => write!(f, "end of attributes"),
Self::AttrEnd(span) => write!(f, "end of attributes at {}", span),
// TODO: Safe truncated comment.
Self::Comment(_, span) => write!(f, "comment at {}", span),
// TODO: Safe truncated text.
@ -579,6 +587,34 @@ impl Display for Token {
}
}
impl Token {
/// Retrieve the [`Span`] associated with a given [`Token`].
///
/// Every token has an associated span.
pub fn span(&self) -> Span {
use Token::*;
match self {
Open(_, span)
| Close(_, span)
| AttrName(_, span)
| AttrValue(_, span)
| AttrValueFragment(_, span)
| AttrEnd(span)
| Comment(_, span)
| Text(_, span)
| CData(_, span)
| Whitespace(_, span) => *span,
}
}
}
impl From<Token> for Span {
fn from(tok: Token) -> Self {
tok.span()
}
}
#[cfg(test)]
mod test {
use super::*;

View File

@ -229,7 +229,7 @@ impl<'s, B: BufRead, S: Escaper> XmlXirReader<'s, B, S> {
// Indicate the end of attributes even if no attributes were output.
// This allows for a reliable delimiter that can be used without
// lookahead for streaming attribute parsing.
tokbuf.push_front(Token::AttrEnd);
tokbuf.push_front(Token::AttrEnd(DUMMY_SPAN));
Ok(())
}

View File

@ -95,7 +95,7 @@ fn empty_node_without_prefix_or_attributes() {
result.expect("parsing failed"),
vec![
Token::Open("empty-node".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
],
);
@ -115,7 +115,7 @@ fn does_not_resolve_xmlns() {
// Since we didn't parse @xmlns, it's still an attribute.
Token::AttrName("xmlns".unwrap_into(), DUMMY_SPAN),
Token::AttrValue("noresolve:UNESC".intern(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
],
);
@ -135,7 +135,7 @@ fn empty_node_with_prefix_without_attributes_unresolved() {
Token::Open(("x", "empty-node").unwrap_into(), DUMMY_SPAN),
Token::AttrName(("xmlns", "x").unwrap_into(), DUMMY_SPAN),
Token::AttrValue("noresolve:UNESC".intern(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
],
);
@ -174,7 +174,7 @@ fn multiple_attrs_ordered() {
Token::AttrValue("b:UNESC".intern(), DUMMY_SPAN),
Token::AttrName(("b", "baz").unwrap_into(), DUMMY_SPAN),
Token::AttrValue("c:UNESC".intern(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
],
);
@ -196,7 +196,7 @@ fn permits_duplicate_attrs() {
Token::AttrValue("a:UNESC".intern(), DUMMY_SPAN),
Token::AttrName("attr".unwrap_into(), DUMMY_SPAN),
Token::AttrValue("b:UNESC".intern(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
],
);
@ -212,9 +212,9 @@ fn child_node_self_closing() {
result.expect("parsing failed"),
vec![
Token::Open("root".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Open("child".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
],
@ -231,12 +231,12 @@ fn sibling_nodes() {
result.expect("parsing failed"),
vec![
Token::Open("root".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Open("child".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
Token::Open("child".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
],
@ -253,11 +253,11 @@ fn child_node_with_attrs() {
result.expect("parsing failed"),
vec![
Token::Open("root".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Open("child".unwrap_into(), DUMMY_SPAN),
Token::AttrName("foo".unwrap_into(), DUMMY_SPAN),
Token::AttrValue("bar:UNESC".intern(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
],
@ -274,7 +274,7 @@ fn child_text() {
result.expect("parsing failed"),
vec![
Token::Open("text".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Text("foo bar:UNESC".into(), DUMMY_SPAN),
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
],
@ -291,10 +291,10 @@ fn mixed_child_content() {
result.expect("parsing failed"),
vec![
Token::Open("text".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Text("foo:UNESC".into(), DUMMY_SPAN),
Token::Open("em".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Text("bar:UNESC".into(), DUMMY_SPAN),
Token::Close(Some("em".unwrap_into()), DUMMY_SPAN),
Token::Close(Some("text".unwrap_into()), DUMMY_SPAN),
@ -322,10 +322,10 @@ fn mixed_child_content_with_newlines() {
vec![
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
Token::Open("root".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Text("\n :UNESC".into(), DUMMY_SPAN),
Token::Open("child".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
@ -345,7 +345,7 @@ fn comment() {
vec![
Token::Comment("root".into(), DUMMY_SPAN),
Token::Open("root".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Comment("<child>".into(), DUMMY_SPAN),
Token::Close(Some("root".unwrap_into()), DUMMY_SPAN),
],
@ -367,7 +367,7 @@ lines-->
result.expect("parsing failed"),
vec![
Token::Open("mult".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Comment("comment\non multiple\nlines".into(), DUMMY_SPAN),
Token::Text("\n:UNESC".into(), DUMMY_SPAN),
Token::Close(Some("mult".unwrap_into()), DUMMY_SPAN),
@ -386,9 +386,9 @@ fn permits_mismatched_tags() {
result.expect("parsing failed"),
vec![
Token::Open("root".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Open("child".unwrap_into(), DUMMY_SPAN),
Token::AttrEnd,
Token::AttrEnd(DUMMY_SPAN),
Token::Close(None, DUMMY_SPAN),
Token::Close(Some("mismatch".unwrap_into()), DUMMY_SPAN),
],

View File

@ -741,7 +741,7 @@ impl ParserState {
Token::Close(name, span) => stack.close_element(name, span),
Token::AttrName(name, span) => stack.open_attr(name, span),
Token::AttrValue(value, span) => stack.close_attr(value, span),
Token::AttrEnd => stack.end_attrs(),
Token::AttrEnd(_) => stack.end_attrs(),
Token::Text(value, span) => stack.text(value, span),
// This parse is being rewritten, so we'll address this with a

View File

@ -195,8 +195,12 @@ mod test {
// But we provide something else unexpected.
assert_eq!(
sut.parse_token(Token::AttrEnd),
Err(AttrParseError::AttrValueExpected(attr, *S, Token::AttrEnd))
sut.parse_token(Token::AttrEnd(*S2)),
Err(AttrParseError::AttrValueExpected(
attr,
*S,
Token::AttrEnd(*S2)
))
);
// We should not be in an accepting state,

View File

@ -282,7 +282,7 @@ pub mod test {
use std::assert_matches::assert_matches;
use super::*;
use crate::span::DUMMY_SPAN;
use crate::span::DUMMY_SPAN as DS;
/// Preferred [`TokenStreamParser`].
///
@ -307,7 +307,7 @@ pub mod test {
fn parse_token(&mut self, tok: Token) -> TokenStreamStateResult<Self> {
match tok {
Token::AttrEnd => {
Token::AttrEnd(..) => {
*self = Self::Done;
}
Token::Close(..) => {
@ -346,13 +346,13 @@ pub mod test {
#[test]
fn permits_end_of_stream_in_accepting_state() {
// EchoState is placed into a Done state given AttrEnd.
let mut toks = [Token::AttrEnd].into_iter();
let mut toks = [Token::AttrEnd(DS)].into_iter();
let mut sut = Sut::from(&mut toks);
// The first token should be processed normally.
// EchoState proxies the token back.
assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd))), sut.next());
assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd(DS)))), sut.next());
// This is now the end of the token stream,
// which should be okay provided that the first token put us into
@ -380,7 +380,7 @@ pub mod test {
#[test]
fn returns_state_specific_error() {
// Token::Close causes EchoState to produce an error.
let errtok = Token::Close(None, DUMMY_SPAN);
let errtok = Token::Close(None, DS);
let mut toks = [errtok.clone()].into_iter();
let mut sut = Sut::from(&mut toks);
@ -403,7 +403,7 @@ pub mod test {
fn fails_when_parser_is_finalized_in_non_accepting_state() {
// Set up so that we have a single token that we can use for
// recovery as part of the same iterator.
let mut toks = [Token::AttrEnd].into_iter();
let mut toks = [Token::AttrEnd(DS)].into_iter();
let sut = Sut::from(&mut toks);
@ -421,7 +421,7 @@ pub mod test {
// `toks` above is set up already for this,
// which allows us to assert that we received back the same `sut`.
let mut sut = result.unwrap_err().0;
assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd))), sut.next());
assert_eq!(Some(Ok(Parsed::Object(Token::AttrEnd(DS)))), sut.next());
// And so we should now be in an accepting state,
// able to finalize.

View File

@ -431,7 +431,7 @@ fn parse_attrs_isolated() {
Token::AttrValue(val1, *S2),
Token::AttrName(attr2, *S2),
Token::AttrValue(val2, *S3),
Token::AttrEnd,
Token::AttrEnd(*S3),
]
.into_iter();
@ -468,7 +468,7 @@ fn parser_attr_multiple() {
Token::AttrValue(val1, *S2),
Token::AttrName(attr2, *S2),
Token::AttrValue(val2, *S3),
Token::AttrEnd,
Token::AttrEnd(*S3),
// Token that we should _not_ hit.
Token::Text("nohit".into(), *S),
]

View File

@ -256,7 +256,7 @@ impl<S: Escaper> XmlWriter<S> for Token {
}
// AttrEnd is ignored by the writer (and is optional).
(Self::AttrEnd, x) => Ok(x),
(Self::AttrEnd(..), x) => Ok(x),
// TODO: We have no way of knowing if text should be formatted
// as CData,
@ -501,7 +501,7 @@ mod test {
// just ignore it entirely.
#[test]
fn ignores_attr_end() -> TestResult {
let result = Token::AttrEnd
let result = Token::AttrEnd(*S)
.write_new(WriterState::NodeOpen, &MockEscaper::default())?;
assert_eq!(result.0, b"");
assert_eq!(result.1, WriterState::NodeOpen);
@ -573,7 +573,7 @@ mod test {
Token::Open(root, *S),
Token::AttrName(("an", "attr").try_into()?, *S),
Token::AttrValue("value".intern(), *S),
Token::AttrEnd,
Token::AttrEnd(*S),
Token::Text("text".intern(), *S),
Token::Open(("c", "child").try_into()?, *S),
Token::Whitespace(" ".try_into()?, *S),