tamer: xir::attr::Attr: Introduce AttrSpan

This replaces a tuple with a tuple struct that allows for calculating more
complete span information, such as the span encompassing the entire
attribute and the value span including the surrounding quotes.

This includes logic that ought to be abstracted into `Span` itself, and it's
not as formal as I'd like it to be (e.g. not ensuring context), but this is
a good starting point.

Note that parsers call `Token::span`, which in turn calculates the attribute
span, each time an attribute is encountered during lowering.  But Rust does
a good job at optimizing away unnecessary operations, so this didn't have an
observable impact on time.

DEV-7145
main
Mike Gerwitz 2022-06-06 11:05:41 -04:00
parent 2b8e7e6031
commit bba181f573
4 changed files with 163 additions and 44 deletions

View File

@ -29,7 +29,12 @@ use crate::{
},
span::Span,
sym::{st::raw, SymbolId},
xir::{attr::Attr, flat::XirfToken as Xirf, st::qname::*, QName},
xir::{
attr::{Attr, AttrSpan},
flat::XirfToken as Xirf,
st::qname::*,
QName,
},
};
/// `xmlo` reader events.
@ -345,7 +350,7 @@ impl ParseState for SymtableState {
(
Sym(span_sym, name, mut attrs),
Xirf::Attr(Attr(key, value, (_, span_attrval))),
Xirf::Attr(Attr(key, value, AttrSpan(_, span_attrval))),
) => Self::parse_sym_attr(&mut attrs, key, value, span_attrval)
.transition(Sym(span_sym, name, attrs)),
@ -623,7 +628,7 @@ impl ParseState for SymDepsState {
(
SymRefUnnamed(span, name, span_ref),
Xirf::Attr(Attr(QN_NAME, ref_name, (_, span_ref_name))),
Xirf::Attr(Attr(QN_NAME, ref_name, AttrSpan(_, span_ref_name))),
) => Transition(SymRefDone(span, name, span_ref))
.ok(XmloToken::Symbol(ref_name, span_ref_name)),

View File

@ -170,12 +170,12 @@ macro_rules! symtable_tests {
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
$(
Xirf::Attr(Attr(
stringify!($key).unwrap_into(),
$val.unwrap_into(),
(S3, SATTRVAL)
AttrSpan(S3, SATTRVAL)
)),
)*
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
@ -329,11 +329,11 @@ fn symtable_sym_generated_true() {
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(
("preproc", "generated").unwrap_into(),
raw::L_TRUE,
(S3, S4),
AttrSpan(S3, S4),
)),
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
]
@ -364,11 +364,11 @@ fn symtable_map_from() {
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, (S3, S4))),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
Xirf::Open(QN_FROM, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, map_from, (S2, S3))),
Xirf::Attr(Attr(QN_NAME, map_from, AttrSpan(S2, S3))),
Xirf::Close(None, S4, Depth(1)),
// />
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
@ -401,8 +401,8 @@ fn symtable_map_from_missing_name() {
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, (S3, S4))),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
Xirf::Open(QN_FROM, S2, Depth(1)),
// @name missing
@ -426,16 +426,16 @@ fn symtable_map_from_multiple() {
let toks = [
Xirf::Open(QN_SYM, SSYM, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, (S3, S4))),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Attr(Attr(QN_TYPE, raw::L_MAP, AttrSpan(S3, S4))),
// <preproc:from>
Xirf::Open(QN_FROM, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, "ok".into(), (S2, S3))),
Xirf::Attr(Attr(QN_NAME, "ok".into(), AttrSpan(S2, S3))),
Xirf::Close(None, S4, Depth(1)),
// />
// <preproc:from> again (err)
Xirf::Open(QN_FROM, S3, Depth(1)),
Xirf::Attr(Attr(QN_NAME, "bad".into(), (S2, S3))),
Xirf::Attr(Attr(QN_NAME, "bad".into(), AttrSpan(S2, S3))),
Xirf::Close(None, S4, Depth(1)),
// />
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
@ -457,15 +457,15 @@ fn sym_dep_event() {
let toks = [
Xirf::Open(QN_SYM_DEP, S1, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
// <preproc:sym-ref
Xirf::Open(QN_SYM_REF, S2, Depth(1)),
Xirf::Attr(Attr(QN_NAME, dep1, (S3, S4))),
Xirf::Attr(Attr(QN_NAME, dep1, AttrSpan(S3, S4))),
Xirf::Close(None, S4, Depth(1)),
// />
// <preproc:sym-ref
Xirf::Open(QN_SYM_REF, S3, Depth(1)),
Xirf::Attr(Attr(QN_NAME, dep2, (S4, S5))),
Xirf::Attr(Attr(QN_NAME, dep2, AttrSpan(S4, S5))),
Xirf::Close(None, S4, Depth(1)),
// />
Xirf::Close(Some(QN_SYM_DEP), S5, Depth(0)),
@ -510,7 +510,7 @@ fn sym_ref_missing_name() {
let toks = [
Xirf::Open(QN_SYM_DEP, S1, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(QN_NAME, name, AttrSpan(S2, S3))),
Xirf::Open(QN_SYM_REF, S2, Depth(1)),
// missing @name, causes error
Xirf::Close(None, S3, Depth(1)),
@ -534,12 +534,12 @@ fn sym_fragment_event() {
let toks = [
// first
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id1, (S2, S3))),
Xirf::Attr(Attr(QN_ID, id1, AttrSpan(S2, S3))),
Xirf::Text(frag1, S4),
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
// second
Xirf::Open(QN_FRAGMENT, S2, Depth(0)),
Xirf::Attr(Attr(QN_ID, id2, (S3, S4))),
Xirf::Attr(Attr(QN_ID, id2, AttrSpan(S3, S4))),
Xirf::Text(frag2, S5),
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
]
@ -582,7 +582,7 @@ fn sym_fragment_empty_id() {
let toks = [
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
// empty @id
Xirf::Attr(Attr(QN_ID, "".into(), (S3, S4))),
Xirf::Attr(Attr(QN_ID, "".into(), AttrSpan(S3, S4))),
Xirf::Text("text".into(), S4),
]
.into_iter();
@ -602,7 +602,7 @@ fn _sym_fragment_missing_text() {
let toks = [
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id, (S3, S4))),
Xirf::Attr(Attr(QN_ID, id, AttrSpan(S3, S4))),
// missing text
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
]
@ -634,7 +634,7 @@ fn xmlo_composite_parsers_header() {
Xirf::Open(QN_SYMTABLE, S2, Depth(1)),
// <preproc:sym
Xirf::Open(QN_SYM, S3, Depth(2)),
Xirf::Attr(Attr(QN_NAME, sym_name, (S2, S3))),
Xirf::Attr(Attr(QN_NAME, sym_name, AttrSpan(S2, S3))),
Xirf::Close(None, S4, Depth(2)),
// />
Xirf::Close(Some(QN_SYMTABLE), S4, Depth(1)),
@ -643,7 +643,7 @@ fn xmlo_composite_parsers_header() {
Xirf::Open(QN_SYM_DEPS, S2, Depth(1)),
// <preproc:sym-dep
Xirf::Open(QN_SYM_DEP, S3, Depth(3)),
Xirf::Attr(Attr(QN_NAME, symdep_name, (S2, S3))),
Xirf::Attr(Attr(QN_NAME, symdep_name, AttrSpan(S2, S3))),
Xirf::Close(Some(QN_SYM_DEP), S4, Depth(3)),
// </preproc:sym-dep>
Xirf::Close(Some(QN_SYM_DEPS), S3, Depth(1)),
@ -652,7 +652,7 @@ fn xmlo_composite_parsers_header() {
Xirf::Open(QN_FRAGMENTS, S2, Depth(1)),
// <preproc:fragment
Xirf::Open(QN_FRAGMENT, S4, Depth(2)),
Xirf::Attr(Attr(QN_ID, symfrag_id, (S2, S3))),
Xirf::Attr(Attr(QN_ID, symfrag_id, AttrSpan(S2, S3))),
Xirf::Text(frag, S5),
Xirf::Close(Some(QN_FRAGMENT), S4, Depth(2)),
// </preproc:fragment>

View File

@ -514,8 +514,12 @@ pub struct Context(PathSymbolId);
impl Context {
/// Produce a [`Span`] within the given context.
#[inline]
pub fn span(self, offset: SpanOffsetSize, len: SpanLenSize) -> Span {
Span::new(offset, len, self)
pub const fn span(self, offset: SpanOffsetSize, len: SpanLenSize) -> Span {
Span {
ctx: self,
offset,
len,
}
}
/// Attempt to produce a [`Span`] of the given length at the given

View File

@ -26,21 +26,109 @@
mod parse;
use super::QName;
use crate::{parse::Token, span::Span, sym::SymbolId};
use crate::{
parse::Token,
span::{Span, SpanLenSize},
sym::SymbolId,
};
use std::fmt::Display;
pub use parse::{AttrParseError, AttrParseState};
/// Element attribute.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Attr(pub QName, pub SymbolId, pub (Span, Span));
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Attr(pub QName, pub SymbolId, pub AttrSpan);
/// Spans associated with attribute key and value.
///
/// The diagram below illustrates the behavior of `AttrSpan`.
/// Note that the extra spaces surrounding the `=` are intentional to
/// illustrate what the behavior ought to be.
/// Spans are represented by `|---|` intervals,
/// with the byte offset at each end,
/// and the single-letter span name centered below the interval.
/// `+` represents intersecting `-` and `|` lines.
///
/// ```text
/// <foo bar = "baz" />
/// |-| |+-+|
/// 5 7 13| |17
/// |K |Q|
/// | 14 16
/// | V |
/// |-----------|
/// A
/// ```
///
/// Above we have
///
/// - `A` = [`AttrSpan::span`];
/// - `K` = [`AttrSpan::key_span`];
/// - `V` = [`AttrSpan::value_span`]; and
/// - `Q` = [`AttrSpan::value_span_with_quotes`].
///
/// Note that this object assumes that the key and value span are adjacent
/// to one-another in the same [`span::Context`](crate::span::Context).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AttrSpan(pub Span, pub Span);
impl AttrSpan {
/// A [`Span`] covering the entire attribute token,
/// including the key,
/// _quoted_ value,
/// and everything in-between.
pub fn span(&self) -> Span {
let AttrSpan(k, _) = self;
// TODO: Move much of this into `Span`.
k.context().span(
k.offset(),
self.value_span_with_quotes()
.endpoints_saturated()
.1
.offset()
.saturating_sub(k.offset())
.try_into()
.unwrap_or(SpanLenSize::MAX),
)
}
/// The span associated with the name of the key.
///
/// This does _not_ include the following `=` or any surrounding
/// whitespace.
pub fn key_span(&self) -> Span {
let AttrSpan(k, _) = self;
*k
}
/// The span associated with the string value _inside_ the quotes,
/// not including the quotes themselves.
///
/// See [`AttrSpan`]'s documentation for an example.
pub fn value_span(&self) -> Span {
let AttrSpan(_, v) = self;
*v
}
/// The span associated with the string value _including_ the
/// surrounding quotes.
///
/// See [`AttrSpan`]'s documentation for an example.
pub fn value_span_with_quotes(&self) -> Span {
let AttrSpan(_, v) = self;
v.context()
.span(v.offset().saturating_sub(1), v.len().saturating_add(2))
}
}
impl Attr {
/// Construct a new simple attribute with a name, value, and respective
/// [`Span`]s.
#[inline]
pub fn new(name: QName, value: SymbolId, span: (Span, Span)) -> Self {
Self(name, value, span)
Self(name, value, AttrSpan(span.0, span.1))
}
/// Attribute name.
@ -61,17 +149,8 @@ impl Attr {
impl Token for Attr {
fn span(&self) -> Span {
// TODO: This ought to represent the _entire_ token.
// However,
// this is complicated by the closing quote,
// which is not present in _either_ span,
// so we'll need to formalize that first;
// simply adding a single byte offset seems unwise,
// given that this is not responsible for producing the
// spans to begin with;
// I'd prefer help from XIR.
match self {
Attr(.., (span, _)) => *span,
Attr(.., attr_span) => attr_span.span(),
}
}
}
@ -147,4 +226,35 @@ impl<const N: usize> From<[Attr; N]> for AttrList {
}
}
// See [`super::test`] for tests related to attributes.
#[cfg(test)]
mod test {
use crate::span::DUMMY_CONTEXT as DC;
use super::*;
// See docblock for [`AttrSpan`].
const A: Span = DC.span(5, 13); // Entire attribute token
const K: Span = DC.span(5, 3); // Key
const V: Span = DC.span(14, 3); // Value without quotes
const Q: Span = DC.span(13, 5); // Value with quotes
#[test]
fn attr_span_token() {
assert_eq!(AttrSpan(K, V).span(), A);
}
#[test]
fn attr_span_value_with_quotes() {
assert_eq!(AttrSpan(K, V).value_span_with_quotes(), Q);
}
#[test]
fn attr_span_key() {
assert_eq!(AttrSpan(K, V).key_span(), K);
}
#[test]
fn attr_span_value() {
assert_eq!(AttrSpan(K, V).value_span(), V);
}
}