From bba181f573de49a08e6312731697e163f09fc20e Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Mon, 6 Jun 2022 11:05:41 -0400 Subject: [PATCH] tamer: xir::attr::Attr: Introduce AttrSpan This replaces a tuple with a tuple struct that allows for calculating more complete span information, such as the span encompassing the entire attribute and the value span including the surrounding quotes. This includes logic that ought to be abstracted into `Span` itself, and it's not as formal as I'd like it to be (e.g. not ensuring context), but this is a good starting point. Note that parsers call `Token::span`, which in turn calculates the attribute span, each time an attribute is encountered during lowering. But Rust does a good job at optimizing away unnecessary operations, so this didn't have an observable impact on time. DEV-7145 --- tamer/src/obj/xmlo/reader.rs | 11 ++- tamer/src/obj/xmlo/reader/test.rs | 48 +++++----- tamer/src/span.rs | 8 +- tamer/src/xir/attr.rs | 140 ++++++++++++++++++++++++++---- 4 files changed, 163 insertions(+), 44 deletions(-) diff --git a/tamer/src/obj/xmlo/reader.rs b/tamer/src/obj/xmlo/reader.rs index 3d504602..f89468f8 100644 --- a/tamer/src/obj/xmlo/reader.rs +++ b/tamer/src/obj/xmlo/reader.rs @@ -29,7 +29,12 @@ use crate::{ }, span::Span, sym::{st::raw, SymbolId}, - xir::{attr::Attr, flat::XirfToken as Xirf, st::qname::*, QName}, + xir::{ + attr::{Attr, AttrSpan}, + flat::XirfToken as Xirf, + st::qname::*, + QName, + }, }; /// `xmlo` reader events. @@ -345,7 +350,7 @@ impl ParseState for SymtableState { ( Sym(span_sym, name, mut attrs), - Xirf::Attr(Attr(key, value, (_, span_attrval))), + Xirf::Attr(Attr(key, value, AttrSpan(_, span_attrval))), ) => Self::parse_sym_attr(&mut attrs, key, value, span_attrval) .transition(Sym(span_sym, name, attrs)), @@ -623,7 +628,7 @@ impl ParseState for SymDepsState { ( SymRefUnnamed(span, name, span_ref), - Xirf::Attr(Attr(QN_NAME, ref_name, (_, span_ref_name))), + Xirf::Attr(Attr(QN_NAME, ref_name, AttrSpan(_, span_ref_name))), ) => Transition(SymRefDone(span, name, span_ref)) .ok(XmloToken::Symbol(ref_name, span_ref_name)), diff --git a/tamer/src/obj/xmlo/reader/test.rs b/tamer/src/obj/xmlo/reader/test.rs index 0e33ddf5..783e23b5 100644 --- a/tamer/src/obj/xmlo/reader/test.rs +++ b/tamer/src/obj/xmlo/reader/test.rs @@ -170,12 +170,12 @@ macro_rules! symtable_tests { let toks = [ Xirf::Open(QN_SYM, SSYM, Depth(0)), - Xirf::Attr(Attr(QN_NAME, name, (S2, S3))), + Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))), $( Xirf::Attr(Attr( stringify!($key).unwrap_into(), $val.unwrap_into(), - (S3, SATTRVAL) + AttrSpan(S3, SATTRVAL) )), )* Xirf::Close(Some(QN_SYM), S2, Depth(0)), @@ -329,11 +329,11 @@ fn symtable_sym_generated_true() { let toks = [ Xirf::Open(QN_SYM, SSYM, Depth(0)), - Xirf::Attr(Attr(QN_NAME, name, (S2, S3))), + Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))), Xirf::Attr(Attr( ("preproc", "generated").unwrap_into(), raw::L_TRUE, - (S3, S4), + AttrSpan(S3, S4), )), Xirf::Close(Some(QN_SYM), S2, Depth(0)), ] @@ -364,11 +364,11 @@ fn symtable_map_from() { let toks = [ Xirf::Open(QN_SYM, SSYM, Depth(0)), - Xirf::Attr(Attr(QN_NAME, name, (S2, S3))), - Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, (S3, S4))), + Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))), + Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))), // Xirf::Open(QN_FROM, S2, Depth(1)), - Xirf::Attr(Attr(QN_NAME, map_from, (S2, S3))), + Xirf::Attr(Attr(QN_NAME, map_from, AttrSpan(S2, S3))), Xirf::Close(None, S4, Depth(1)), // /> Xirf::Close(Some(QN_SYM), S2, Depth(0)), @@ -401,8 +401,8 @@ fn symtable_map_from_missing_name() { let toks = [ Xirf::Open(QN_SYM, SSYM, Depth(0)), - Xirf::Attr(Attr(QN_NAME, name, (S2, S3))), - Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, (S3, S4))), + Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))), + Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))), // Xirf::Open(QN_FROM, S2, Depth(1)), // @name missing @@ -426,16 +426,16 @@ fn symtable_map_from_multiple() { let toks = [ Xirf::Open(QN_SYM, SSYM, Depth(0)), - Xirf::Attr(Attr(QN_NAME, name, (S2, S3))), - Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, (S3, S4))), + Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))), + Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))), // Xirf::Open(QN_FROM, S2, Depth(1)), - Xirf::Attr(Attr(QN_NAME, "ok".into(), (S2, S3))), + Xirf::Attr(Attr(QN_NAME, "ok".into(), AttrSpan(S2, S3))), Xirf::Close(None, S4, Depth(1)), // /> // again (err) Xirf::Open(QN_FROM, S3, Depth(1)), - Xirf::Attr(Attr(QN_NAME, "bad".into(), (S2, S3))), + Xirf::Attr(Attr(QN_NAME, "bad".into(), AttrSpan(S2, S3))), Xirf::Close(None, S4, Depth(1)), // /> Xirf::Close(Some(QN_SYM), S2, Depth(0)), @@ -457,15 +457,15 @@ fn sym_dep_event() { let toks = [ Xirf::Open(QN_SYM_DEP, S1, Depth(0)), - Xirf::Attr(Attr(QN_NAME, name, (S2, S3))), + Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))), // // Xirf::Close(Some(QN_SYM_DEP), S5, Depth(0)), @@ -510,7 +510,7 @@ fn sym_ref_missing_name() { let toks = [ Xirf::Open(QN_SYM_DEP, S1, Depth(0)), - Xirf::Attr(Attr(QN_NAME, name, (S2, S3))), + Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))), Xirf::Open(QN_SYM_REF, S2, Depth(1)), // missing @name, causes error Xirf::Close(None, S3, Depth(1)), @@ -534,12 +534,12 @@ fn sym_fragment_event() { let toks = [ // first Xirf::Open(QN_FRAGMENT, S1, Depth(0)), - Xirf::Attr(Attr(QN_ID, id1, (S2, S3))), + Xirf::Attr(Attr(QN_ID, id1, AttrSpan(S2, S3))), Xirf::Text(frag1, S4), Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)), // second Xirf::Open(QN_FRAGMENT, S2, Depth(0)), - Xirf::Attr(Attr(QN_ID, id2, (S3, S4))), + Xirf::Attr(Attr(QN_ID, id2, AttrSpan(S3, S4))), Xirf::Text(frag2, S5), Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)), ] @@ -582,7 +582,7 @@ fn sym_fragment_empty_id() { let toks = [ Xirf::Open(QN_FRAGMENT, S1, Depth(0)), // empty @id - Xirf::Attr(Attr(QN_ID, "".into(), (S3, S4))), + Xirf::Attr(Attr(QN_ID, "".into(), AttrSpan(S3, S4))), Xirf::Text("text".into(), S4), ] .into_iter(); @@ -602,7 +602,7 @@ fn _sym_fragment_missing_text() { let toks = [ Xirf::Open(QN_FRAGMENT, S1, Depth(0)), - Xirf::Attr(Attr(QN_ID, id, (S3, S4))), + Xirf::Attr(Attr(QN_ID, id, AttrSpan(S3, S4))), // missing text Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)), ] @@ -634,7 +634,7 @@ fn xmlo_composite_parsers_header() { Xirf::Open(QN_SYMTABLE, S2, Depth(1)), // Xirf::Close(Some(QN_SYMTABLE), S4, Depth(1)), @@ -643,7 +643,7 @@ fn xmlo_composite_parsers_header() { Xirf::Open(QN_SYM_DEPS, S2, Depth(1)), // Xirf::Close(Some(QN_SYM_DEPS), S3, Depth(1)), @@ -652,7 +652,7 @@ fn xmlo_composite_parsers_header() { Xirf::Open(QN_FRAGMENTS, S2, Depth(1)), // diff --git a/tamer/src/span.rs b/tamer/src/span.rs index 0e597be9..f4fae0de 100644 --- a/tamer/src/span.rs +++ b/tamer/src/span.rs @@ -514,8 +514,12 @@ pub struct Context(PathSymbolId); impl Context { /// Produce a [`Span`] within the given context. #[inline] - pub fn span(self, offset: SpanOffsetSize, len: SpanLenSize) -> Span { - Span::new(offset, len, self) + pub const fn span(self, offset: SpanOffsetSize, len: SpanLenSize) -> Span { + Span { + ctx: self, + offset, + len, + } } /// Attempt to produce a [`Span`] of the given length at the given diff --git a/tamer/src/xir/attr.rs b/tamer/src/xir/attr.rs index 2b8247d9..bc50da85 100644 --- a/tamer/src/xir/attr.rs +++ b/tamer/src/xir/attr.rs @@ -26,21 +26,109 @@ mod parse; use super::QName; -use crate::{parse::Token, span::Span, sym::SymbolId}; +use crate::{ + parse::Token, + span::{Span, SpanLenSize}, + sym::SymbolId, +}; use std::fmt::Display; pub use parse::{AttrParseError, AttrParseState}; /// Element attribute. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct Attr(pub QName, pub SymbolId, pub (Span, Span)); +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Attr(pub QName, pub SymbolId, pub AttrSpan); + +/// Spans associated with attribute key and value. +/// +/// The diagram below illustrates the behavior of `AttrSpan`. +/// Note that the extra spaces surrounding the `=` are intentional to +/// illustrate what the behavior ought to be. +/// Spans are represented by `|---|` intervals, +/// with the byte offset at each end, +/// and the single-letter span name centered below the interval. +/// `+` represents intersecting `-` and `|` lines. +/// +/// ```text +/// +/// |-| |+-+| +/// 5 7 13| |17 +/// |K |Q| +/// | 14 16 +/// | V | +/// |-----------| +/// A +/// ``` +/// +/// Above we have +/// +/// - `A` = [`AttrSpan::span`]; +/// - `K` = [`AttrSpan::key_span`]; +/// - `V` = [`AttrSpan::value_span`]; and +/// - `Q` = [`AttrSpan::value_span_with_quotes`]. +/// +/// Note that this object assumes that the key and value span are adjacent +/// to one-another in the same [`span::Context`](crate::span::Context). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AttrSpan(pub Span, pub Span); + +impl AttrSpan { + /// A [`Span`] covering the entire attribute token, + /// including the key, + /// _quoted_ value, + /// and everything in-between. + pub fn span(&self) -> Span { + let AttrSpan(k, _) = self; + + // TODO: Move much of this into `Span`. + k.context().span( + k.offset(), + self.value_span_with_quotes() + .endpoints_saturated() + .1 + .offset() + .saturating_sub(k.offset()) + .try_into() + .unwrap_or(SpanLenSize::MAX), + ) + } + + /// The span associated with the name of the key. + /// + /// This does _not_ include the following `=` or any surrounding + /// whitespace. + pub fn key_span(&self) -> Span { + let AttrSpan(k, _) = self; + *k + } + + /// The span associated with the string value _inside_ the quotes, + /// not including the quotes themselves. + /// + /// See [`AttrSpan`]'s documentation for an example. + pub fn value_span(&self) -> Span { + let AttrSpan(_, v) = self; + *v + } + + /// The span associated with the string value _including_ the + /// surrounding quotes. + /// + /// See [`AttrSpan`]'s documentation for an example. + pub fn value_span_with_quotes(&self) -> Span { + let AttrSpan(_, v) = self; + + v.context() + .span(v.offset().saturating_sub(1), v.len().saturating_add(2)) + } +} impl Attr { /// Construct a new simple attribute with a name, value, and respective /// [`Span`]s. #[inline] pub fn new(name: QName, value: SymbolId, span: (Span, Span)) -> Self { - Self(name, value, span) + Self(name, value, AttrSpan(span.0, span.1)) } /// Attribute name. @@ -61,17 +149,8 @@ impl Attr { impl Token for Attr { fn span(&self) -> Span { - // TODO: This ought to represent the _entire_ token. - // However, - // this is complicated by the closing quote, - // which is not present in _either_ span, - // so we'll need to formalize that first; - // simply adding a single byte offset seems unwise, - // given that this is not responsible for producing the - // spans to begin with; - // I'd prefer help from XIR. match self { - Attr(.., (span, _)) => *span, + Attr(.., attr_span) => attr_span.span(), } } } @@ -147,4 +226,35 @@ impl From<[Attr; N]> for AttrList { } } -// See [`super::test`] for tests related to attributes. +#[cfg(test)] +mod test { + use crate::span::DUMMY_CONTEXT as DC; + + use super::*; + + // See docblock for [`AttrSpan`]. + const A: Span = DC.span(5, 13); // Entire attribute token + const K: Span = DC.span(5, 3); // Key + const V: Span = DC.span(14, 3); // Value without quotes + const Q: Span = DC.span(13, 5); // Value with quotes + + #[test] + fn attr_span_token() { + assert_eq!(AttrSpan(K, V).span(), A); + } + + #[test] + fn attr_span_value_with_quotes() { + assert_eq!(AttrSpan(K, V).value_span_with_quotes(), Q); + } + + #[test] + fn attr_span_key() { + assert_eq!(AttrSpan(K, V).key_span(), K); + } + + #[test] + fn attr_span_value() { + assert_eq!(AttrSpan(K, V).value_span(), V); + } +}