tamer: xir: Remove Attr::Extensible

This removes XIRT support for attribute fragments.  The reason is that
because this is a write-only operation---fragments are used to concatenate
SymbolIds without reallocation, which can only happen if we are generating
XIR internally.

Given that this cannot happen during read, it was a mistake to complicate
the parsers.  But it makes sense why I did originally, given that the XIRT
parser was written for simplifying test cases.  But now that we want parsers
for real, and are writing production-quality parsers, this extra complexity
is very undesirable.

As a bonus, we also avoid any potential for heap allocations related to
attributes.  Granted, they didn't _really_ exist to begin with, but it was
part of XIRT, and was ugly.

DEV-11268
main
Mike Gerwitz 2021-12-06 14:26:58 -05:00
parent 42b5007402
commit 77c18d0615
6 changed files with 102 additions and 442 deletions

View File

@ -18,16 +18,16 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::*;
use crate::convert::ExpectInto;
use crate::ld::xmle::section::PushResult;
use crate::ld::xmle::Sections;
use crate::obj::xmlo::SymDtype;
use crate::sym::{GlobalSymbolIntern, GlobalSymbolResolve};
use crate::xir::tree::merge_attr_fragments;
use crate::{
asg::{Dim, IdentKind, Source},
xir::{
pred::{not, open},
tree::{parser_from, Attr},
tree::parser_from,
},
};
use std::collections::HashSet;
@ -37,7 +37,7 @@ type TestResult = Result<(), Box<dyn std::error::Error>>;
macro_rules! assert_attr{
($attrs:ident, $name:ident, $expected:expr, $($args:expr),*) => {
assert_eq!(
$attrs.find($name).map(|a| a.value_atom()),
$attrs.find($name).map(|a| a.value()),
$expected,
$($args),*
)
@ -219,10 +219,10 @@ fn test_writes_deps() -> TestResult {
deps: objs.iter().collect(),
};
let mut iter = parser_from(
lower_iter(sections, "pkg".intern(), relroot)
.skip_while(not(open(QN_L_DEP))),
);
let mut lower_iter = lower_iter(sections, "pkg".intern(), relroot)
.skip_while(not(open(QN_L_DEP)));
let mut iter = parser_from(merge_attr_fragments(&mut lower_iter));
let given = iter
.next()
@ -254,17 +254,14 @@ fn test_writes_deps() -> TestResult {
let ident = &objs[i];
let attrs = ele.attrs().unwrap();
assert_eq!(
attrs.find(QN_NAME).map(|a| a.value_atom()),
Some(ident.name()),
);
assert_eq!(attrs.find(QN_NAME).map(|a| a.value()), Some(ident.name()),);
assert_eq!(
attrs.find(QN_TYPE).map(|a| a.value_atom()),
attrs.find(QN_TYPE).map(|a| a.value()),
Some(ident.kind().unwrap().as_sym())
);
let generated = attrs.find(QN_GENERATED).map(|a| a.value_atom());
let generated = attrs.find(QN_GENERATED).map(|a| a.value());
if let Some(Source {
generated: true, ..
@ -287,19 +284,7 @@ fn test_writes_deps() -> TestResult {
desc: Some(desc), ..
}) = ident.src()
{
// We must take extra effort to compare these, since desc is
// uninterned and therefore cannot be compared as a
// `SymbolId`. Once the reader takes care of creating the
// symbol, we'll have no such problem.
match attrs.find(QN_DESC).map(|a| a.value_atom()) {
Some(given) => {
assert_eq!(
desc.lookup_str(),
Into::<SymbolId>::into(given).lookup_str()
);
}
invalid => panic!("unexpected desc: {:?}", invalid),
}
assert_attr!(attrs, QN_DESC, Some(*desc),);
}
if let Some(Source {
@ -307,15 +292,13 @@ fn test_writes_deps() -> TestResult {
..
}) = ident.src()
{
match attrs.find("src".unwrap_into()) {
Some(Attr::Extensible(parts)) => {
assert_eq!(
parts.value_fragments(),
&vec![(relroot, LSPAN), (*pkg_name, LSPAN),]
);
}
invalid => panic!("unexpected desc: {:?}", invalid),
}
let expected = [relroot, *pkg_name]
.iter()
.map(GlobalSymbolResolve::lookup_str)
.collect::<String>()
.intern();
assert_attr!(attrs, QN_SRC, Some(expected),);
}
// Object-specific attributes
@ -430,7 +413,7 @@ fn test_writes_map_froms() -> TestResult {
.unwrap()
.find(QN_NAME)
.expect("expecting @name")
.value_atom(),
.value(),
);
});

View File

@ -179,7 +179,7 @@ impl TryFromIterator<Attr> for PackageAttrs {
let mut attrs: Self = Default::default();
for attr in iter.into_iter() {
let val = attr.value_atom();
let val = attr.value();
match attr.name() {
QN_NAME => attrs.name = Some(val),

View File

@ -500,6 +500,13 @@ pub enum Token {
/// Since each fragment contains a span,
/// this also potentially gives higher resolution for the origin of
/// components of generated attribute values.
///
/// _This should be used only for writing._
/// These will never be encountered during reading,
/// and so to keep the parsers and IRs simple,
/// there is no support for fragments beyond XIR.
/// (There was in the past,
/// but it was removed.)
AttrValueFragment(SymbolId, Span),
/// A delimiter indicating that attribute processing has ended and the

View File

@ -170,30 +170,6 @@
//! is, our stack is fully type-safe.
//!
//! [state machine]: https://en.wikipedia.org/wiki/Finite-state_machine
//!
//! High-Resolution Attributes
//! --------------------------
//! XIRT supports [`Token::AttrValueFragment`],
//! which can produce concatenated attribute values that retain the
//! [`Span`] of each of their constituent parts.
//! This could allow,
//! for example,
//! creating an LSP server that would expose all of the TAME templates and
//! source inputs used to generate an identifier.
//!
//! However,
//! note that the XIR token stream introduced [`Token::AttrValueFragment`]
//! primarily to eliminate the need for unnecessary [symbol
//! lookups](crate::sym), copying, and heap allocations.
//! XIRT must perform extra heap allocations to process these fragments.
//! Once processed,
//! an [`Attr::Extensible`] object is produced;
//! the value is _not_ concatenated and interned,
//! allowing it to be cheaply converted back into a [`Token`] stream
//! for writing without unnecessary overhead.
//!
//! For more information,
//! see [`AttrParts`].
mod attr;
mod parse;
@ -202,7 +178,7 @@ use super::{QName, Token, TokenResultStream, TokenStream};
use crate::{span::Span, sym::SymbolId};
use std::{error::Error, fmt::Display, iter, mem::take};
pub use attr::{Attr, AttrList, AttrParts, SimpleAttr};
pub use attr::{Attr, AttrList};
/// A XIR tree (XIRT).
///
@ -509,10 +485,6 @@ pub enum Stack {
/// after which it will be attached to an element.
AttrName(Option<(Option<ElementStack>, AttrList)>, QName, Span),
/// An attribute whose value is being constructed of value fragments,
/// after which it will be attached to an element.
AttrFragments(Option<(Option<ElementStack>, AttrList)>, AttrParts),
/// A completed [`AttrList`] without any [`Element`] context.
IsolatedAttrList(AttrList),
@ -639,43 +611,8 @@ impl Stack {
})
}
/// Push a value fragment onto an attribute.
///
/// This begins to build an attribute out of value fragments,
/// which is also completed by [`Stack::close_attr`].
/// The attribute information that was previously held in
/// [`Stack::AttrName`] is moved into a [`AttrParts`] if that has not
/// already happend,
/// which is responsible for managing future fragments.
///
/// This will cause heap allocation.
fn push_attr_value(self, value: SymbolId, span: Span) -> Result<Self> {
Ok(match self {
Self::AttrName(head, name, open_span) => {
// This initial capacity can be adjusted after we observe
// empirically what we most often parse, or we can make it
// configurable.
let mut parts = AttrParts::with_capacity(name, open_span, 2);
parts.push_value(value, span);
Self::AttrFragments(head, parts)
}
Self::AttrFragments(head, mut parts) => {
parts.push_value(value, span);
Self::AttrFragments(head, parts)
}
_ => todo! {},
})
}
/// Assigns a value to an opened attribute and attaches to the parent
/// element.
///
/// If the attribute is composed of fragments ([`Stack::AttrFragments`]),
/// this serves as the final fragment and will yield an
/// [`Attr::Extensible`] with no further processing.
fn close_attr(self, value: SymbolId, span: Span) -> Result<Self> {
Ok(match self {
Self::AttrName(Some((ele_stack, attr_list)), name, open_span) => {
@ -685,15 +622,6 @@ impl Stack {
)
}
Self::AttrFragments(Some((ele_stack, attr_list)), mut parts) => {
parts.push_value(value, span);
Stack::BuddingAttrList(
ele_stack,
attr_list.push(Attr::Extensible(parts)),
)
}
// Isolated single attribute.
Self::AttrName(None, name, open_span) => {
Stack::IsolatedAttr(Attr::new(name, value, (open_span, span)))
@ -812,13 +740,16 @@ impl ParserState {
Token::Open(name, span) => stack.open_element(name, span),
Token::Close(name, span) => stack.close_element(name, span),
Token::AttrName(name, span) => stack.open_attr(name, span),
Token::AttrValueFragment(value, span) => {
stack.push_attr_value(value, span)
}
Token::AttrValue(value, span) => stack.close_attr(value, span),
Token::AttrEnd => stack.end_attrs(),
Token::Text(value, span) => stack.text(value, span),
// This parse is being rewritten, so we'll address this with a
// proper error then.
Token::AttrValueFragment(..) => {
panic!("AttrValueFragment is not parsable")
}
Token::Comment(..) | Token::CData(..) | Token::Whitespace(..) => {
Err(ParseError::Todo(tok, stack))
}
@ -1148,5 +1079,50 @@ pub fn attr_parser_from<'a>(
})
}
#[cfg(test)]
pub fn merge_attr_fragments<'a>(
toks: &'a mut impl TokenStream,
) -> impl TokenStream + 'a {
use crate::sym::{GlobalSymbolIntern, GlobalSymbolResolve};
let mut stack = Vec::with_capacity(4);
iter::from_fn(move || {
loop {
match toks.next() {
// Collect fragments and continue iterating until we find
// the final `Token::AttrValue`.
Some(Token::AttrValueFragment(frag, ..)) => {
stack.push(frag);
}
// An AttrValue without any stack is just a normal value.
// We are not interested in it.
val @ Some(Token::AttrValue(..)) if stack.len() == 0 => {
return val;
}
// But if we have a stack,
// allocate a new string that concatenates each of the
// symbols and return a newly allocated symbol.
Some(Token::AttrValue(last, span)) if stack.len() > 0 => {
stack.push(last);
let merged = stack
.iter()
.map(|frag| frag.lookup_str())
.collect::<String>()
.intern();
stack.clear();
return Some(Token::AttrValue(merged, span));
}
other => return other,
}
}
})
}
#[cfg(test)]
mod test;

View File

@ -29,254 +29,45 @@ use std::fmt::Display;
mod parse;
/// An attribute.
///
/// Attributes come in two flavors:
/// attributes with simple atoms ([`SimpleAttr`]),
/// and extensible attributes composed of a list of fragments with
/// associated spans ([`AttrParts`]).
///
/// If you do not care about the distinction between the two types,
/// use the API provided by this enum for common functionality.
/// Element attribute.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Attr {
Simple(SimpleAttr),
Extensible(AttrParts),
}
impl Attr {
/// Construct a new simple attribute with a name, value, and respective
/// [`Span`]s.
///
/// This attribute's value cannot be extended,
/// but it can be cheaply converted into [`Attr::Extensible`] via
/// [`Attr::parts`] or [`From`].
#[inline]
pub fn new(name: QName, value: SymbolId, span: (Span, Span)) -> Self {
Self::Simple(SimpleAttr::new(name, value, span))
}
/// Construct a new attribute whose value will be incrementally
/// constructed.
///
/// This is intended for use with
/// [`Token::AttrValueFragment`](super::Token::AttrValueFragment),
/// which provides for string concatenation while maintaining
/// [`Span`] resolution and being zero-copy.
#[inline]
pub fn new_extensible_with_capacity(
name: QName,
name_span: Span,
capacity: usize,
) -> Self {
Self::Extensible(AttrParts::with_capacity(name, name_span, capacity))
}
/// Create an attribute from a list of value fragments and their spans.
///
/// This is intended not only for convenience,
/// but also to permit pre-allocating buffers,
/// or re-using them in conjunction with [`AttrParts::into_fragments`].
#[inline]
pub fn from_fragments(
name: QName,
name_span: Span,
frags: Vec<(SymbolId, Span)>,
) -> Self {
Self::Extensible(AttrParts {
name,
name_span,
value_frags: frags,
})
}
/// The inner [`AttrParts`] representing an attribute and its value
/// fragments.
///
/// This provides the inner [`AttrParts`] needed to begin pushing value
/// fragments.
///
/// If the attribute has no parts (is a [`SimpleAttr`]),
/// it will be converted into an extensible attribute with one value
/// fragment and then returned.
#[inline]
pub fn parts(self) -> AttrParts {
match self {
Self::Simple(attr) => attr.into(),
Self::Extensible(parts) => parts,
}
}
/// Attribute name.
#[inline]
pub fn name(&self) -> QName {
match self {
Self::Simple(attr) => attr.name,
Self::Extensible(attr) => attr.name,
}
}
/// Retrieve an atom from the attribute.
///
/// An atom is cost-free if either
/// (a) this is [`Attr::Simple`]; or
/// (b) this is [`Attr::Extensible`] with one fragment.
/// Otherwise,
/// this panics with a TODO,
/// since we haven't had a need for merging attributes \[yet\].
///
/// Since [`SymbolId`] implements [`Copy`],
/// this returns an owned value.
#[inline]
pub fn value_atom(&self) -> SymbolId {
match self {
Self::Simple(attr) => attr.value,
Self::Extensible(attr) if attr.value_frags.len() == 1 => {
attr.value_frags[0].0
}
// We'll probably want to just merge and generate a new
// SymbolId,
// possibly having a separate variant of this method if we
// want to guarantee a cost-free conversion as a Result.
Self::Extensible(attr) => todo!(
"Multi-fragment attribute atom requested: {:?}",
attr.value_frags
),
}
}
}
impl Display for Attr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Simple(attr) => attr.fmt(f),
Self::Extensible(parts) => parts.fmt(f),
}
}
}
/// Element attribute with an atomic value.
///
/// This should be used in place of [`AttrParts`] whenever the attribute is
/// a simple [`QName`]/[`SymbolId`] pair.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct SimpleAttr {
pub struct Attr {
name: QName,
value: SymbolId,
/// Spans for the attribute name and value respectively.
span: (Span, Span),
}
impl SimpleAttr {
impl Attr {
/// Construct a new simple attribute with a name, value, and respective
/// [`Span`]s.
#[inline]
pub fn new(name: QName, value: SymbolId, span: (Span, Span)) -> Self {
Self { name, value, span }
}
/// Attribute name.
#[inline]
pub fn name(&self) -> QName {
self.name
}
/// Retrieve the value from the attribute.
///
/// Since [`SymbolId`] implements [`Copy`],
/// this returns an owned value.
#[inline]
pub fn value(&self) -> SymbolId {
self.value
}
}
impl Display for SimpleAttr {
impl Display for Attr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "`@{}=\"{}\"` at {}", self.name, self.value, self.span.0)
}
}
/// Element attribute with a value composed of multiple fragments.
///
/// This should be used when one or more of these properties is desirable:
/// 1. Zero-copy concatenation with respect to [symbols](crate::sym);
/// 2. High-resolution [`Span`]s for each constituent fragment; and/or
/// 3. You need to parse a XIR stream with
/// [`Token::AttrValueFragment`](super::Token::AttrValueFragment).
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct AttrParts {
name: QName,
name_span: Span,
/// Ordered value fragments and their associated [`Span`]s.
///
/// When writing,
/// fragments will be concatenated in order without any delimiters.
value_frags: Vec<(SymbolId, Span)>,
}
impl AttrParts {
/// Construct a new simple attribute with a name, value, and respective
/// [`Span`]s.
#[inline]
pub fn with_capacity(
name: QName,
name_span: Span,
capacity: usize,
) -> Self {
Self {
name,
name_span,
value_frags: Vec::with_capacity(capacity),
}
}
}
impl AttrParts {
/// Append a new value fragment and its associated span.
///
/// Value fragments are intended to be concatenated on write without a
/// delimiter,
/// and are associated with
/// [`Token::AttrValueFragment`](super::Token::AttrValueFragment).
#[inline]
pub fn push_value(&mut self, value: SymbolId, span: Span) {
self.value_frags.push((value, span));
}
/// Retrieve a read-only list of ordered value fragments and their
/// associated spans.
///
/// If you want to consume the vector to re-use it for future
/// [`AttrParts`],
/// see [`into_fragments`](AttrParts::into_fragments).
#[inline]
pub fn value_fragments(&self) -> &Vec<(SymbolId, Span)> {
&self.value_frags
}
/// Consume [`AttrParts`],
/// yielding its internal fragment buffer.
///
/// This allows the buffer to be re-used for future [`AttrParts`],
/// avoiding additional heap allocations.
#[inline]
pub fn into_fragments(self) -> Vec<(SymbolId, Span)> {
self.value_frags
}
}
impl From<SimpleAttr> for AttrParts {
fn from(attr: SimpleAttr) -> Self {
Self {
name: attr.name,
name_span: attr.span.0,
value_frags: vec![(attr.value, attr.span.1)],
}
}
}
impl From<Attr> for AttrParts {
fn from(attr: Attr) -> Self {
match attr {
Attr::Simple(inner) => inner.into(),
Attr::Extensible(inner) => inner,
}
}
}
impl Display for AttrParts {
fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
todo!("<AttrParts as std::fmt::Display>::fmt")
}
}
/// List of attributes.
///
/// Attributes are ordered in XIR so that this IR will be suitable for code
@ -334,90 +125,4 @@ impl<const N: usize> From<[Attr; N]> for AttrList {
}
}
// See also [`super::test`] for many more tests related to attributes.
#[cfg(test)]
mod test {
use super::*;
use crate::{convert::ExpectInto, sym::GlobalSymbolIntern};
lazy_static! {
static ref S: Span =
Span::from_byte_interval((0, 0), "test case, 1".intern());
static ref S2: Span =
Span::from_byte_interval((0, 0), "test case, 2".intern());
}
#[test]
fn attr_into_attr_parts() {
let name = "attr".unwrap_into();
let value = "value".intern();
let attr = SimpleAttr {
name,
value,
span: (*S, *S2),
};
let result = attr.clone().into();
assert_eq!(
AttrParts {
name,
name_span: *S,
value_frags: vec![(value, *S2)],
},
result,
);
// Enum should also be able to do it
assert_eq!(result, Attr::Simple(attr.clone()).into(),);
assert_eq!(result, Attr::Simple(attr).parts(),);
}
#[test]
fn push_attr_part() {
let name = "pushattr".unwrap_into();
let value1 = "first".intern();
let value2 = "second".intern();
let mut attr = Attr::new_extensible_with_capacity(name, *S, 2).parts();
attr.push_value(value1, *S);
attr.push_value(value2, *S2);
assert_eq!(&vec![(value1, *S), (value2, *S2)], attr.value_fragments());
}
#[test]
fn attr_from_parts() {
let name = "pushattr".unwrap_into();
let value1 = "first".intern();
let value2 = "second".intern();
let attr =
Attr::from_fragments(name, *S, vec![(value1, *S), (value2, *S2)])
.parts();
assert_eq!(&vec![(value1, *S), (value2, *S2)], attr.value_fragments());
}
#[test]
fn into_fragments_to_reuse_buffer_for_parts() {
let name = "partbuffer".unwrap_into();
let value1 = "first".intern();
let value2 = "second".intern();
let value3 = "third".intern();
let frags = vec![(value1, *S2), (value2, *S)];
let mut attr1 = Attr::from_fragments(name, *S, frags).parts();
attr1.push_value(value3, *S2);
// Notice that the value is owned, and so we can call
// `from_fragments` again to re-use the buffer.
assert_eq!(
vec![(value1, *S2), (value2, *S), (value3, *S2)],
attr1.into_fragments(),
);
}
}
// See [`super::test`] for tests related to attributes.

View File

@ -158,19 +158,14 @@ fn empty_element_with_attrs_from_toks() {
let attr1 = "a".unwrap_into();
let attr2 = "b".unwrap_into();
let val1 = "val1".intern();
let val2a = "val2a".intern();
let val2b = "val2b".intern();
let val2c = "val2b".intern();
let val2 = "val2".intern();
let toks = [
Token::Open(name, *S),
Token::AttrName(attr1, *S),
Token::AttrValue(val1, *S2),
Token::AttrName(attr2, *S),
// More than one fragment to ensure we handle that state
Token::AttrValueFragment(val2a, *S),
Token::AttrValueFragment(val2b, *S2),
Token::AttrValue(val2c, *S3),
Token::AttrValue(val2, *S3),
Token::Close(None, *S2),
]
.into_iter();
@ -179,11 +174,7 @@ fn empty_element_with_attrs_from_toks() {
name,
attrs: Some(AttrList::from(vec![
Attr::new(attr1, val1, (*S, *S2)),
Attr::from_fragments(
attr2,
*S,
vec![(val2a, *S), (val2b, *S2), (val2c, *S3)],
),
Attr::new(attr2, val2, (*S, *S3)),
])),
children: vec![],
span: (*S, *S2),
@ -195,8 +186,6 @@ fn empty_element_with_attrs_from_toks() {
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrName
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValue
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrName
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValueFragment
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValueFragment
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete))); // AttrValue
assert_eq!(sut.next(), Some(Ok(Parsed::Tree(Tree::Element(expected)))));
assert_eq!(sut.next(), None);