tamer: tameld: Use uninterned symbols for reader

Fragments were previously represented by `String` to avoid the cost of
interning (hashing and copying).  This change modifies it to use uninterned
symbols, which does still have a copy overhead but it does not hash.

Initial tests shows a small performance decrease of about 15% and a small
memory increase of similar proportion.  However, once I realized that I was
not clearing buffers from quick_xml events and implemented that change in a
previous commit, this change ended up being approximately on par with
`String`, despite the copying of some pretty large fragments.

YMMV, though, and perhaps on less powerful systems time may increase
slightly.

The upcoming XIR (XML IR) was originally going to support both owned strings
and symbols, but now we'll just use uninterned symbols; I can't rationalize
complicating the API at this time when it will provide an almost
imperceivable performance benefit.  If ever that changes in the future,
that change will be entertained.

The end result is that the fate of a fragment's underlying memory is
determined by whatever is processing the data, _not_ by the API itself---the
API was previously forcing use of a String, whereas now it's up to the
caller to determine whether we want comparable interns.  For fragments,
that's not likely ever to be the case, especially considering that the
representation will change so drastically in the future.
main
Mike Gerwitz 2021-08-16 14:01:20 -04:00
parent d96dcad7d8
commit f97141f5c5
8 changed files with 56 additions and 41 deletions

View File

@ -149,7 +149,7 @@ mod interner {
let strs = gen_strs(1000);
bench.iter(|| {
let sut = ArenaInterner::<FxBuildHasher, u32>::new();
let _sut = ArenaInterner::<FxBuildHasher, u32>::new();
strs.iter().map(|s| String::from(s)).for_each(drop);
});
}

View File

@ -231,7 +231,7 @@ where
fn set_fragment(
&mut self,
identi: ObjectRef<Ix>,
text: FragmentText,
text: FragmentText<Ix>,
) -> AsgResult<ObjectRef<Ix>> {
self.with_ident(identi, |obj| obj.set_fragment(text))
}
@ -410,7 +410,7 @@ mod test {
given_declare: Option<SymbolId<Ix>>,
given_extern: Option<(IdentKind, Source<Ix>)>,
given_resolve: Option<(IdentKind, Source<Ix>)>,
given_set_fragment: Option<FragmentText>,
given_set_fragment: Option<FragmentText<Ix>>,
fail_redeclare: RefCell<Option<TransitionError>>,
fail_extern: RefCell<Option<TransitionError>>,
fail_set_fragment: RefCell<Option<TransitionError>>,
@ -430,7 +430,7 @@ mod test {
None
}
fn fragment(&self) -> Option<&FragmentText> {
fn fragment(&self) -> Option<&FragmentText<Ix>> {
None
}
@ -485,7 +485,7 @@ mod test {
fn set_fragment(
mut self,
text: FragmentText,
text: FragmentText<Ix>,
) -> TransitionResult<StubIdentObject> {
if self.fail_set_fragment.borrow().is_some() {
let err = self.fail_set_fragment.replace(None).unwrap();
@ -730,8 +730,8 @@ mod test {
};
let node = sut.declare(sym, IdentKind::Meta, src.clone())?;
let fragment = "a fragment".to_string();
let node_with_frag = sut.set_fragment(node, fragment.clone())?;
let fragment = "a fragment".intern();
let node_with_frag = sut.set_fragment(node, fragment)?;
// Attaching a fragment should _replace_ the node, not create a
// new one

View File

@ -124,7 +124,7 @@ where
fn set_fragment(
&mut self,
identi: ObjectRef<Ix>,
text: FragmentText,
text: FragmentText<Ix>,
) -> AsgResult<ObjectRef<Ix>>;
/// Retrieve an object from the graph by [`ObjectRef`].

View File

@ -77,7 +77,7 @@ pub enum IdentObject<Ix: SymbolIndexSize> {
/// They are produced by the compiler and it is the job of the
/// [linker][crate::ld] to put them into the correct order for the
/// final executable.
IdentFragment(SymbolId<Ix>, IdentKind, Source<Ix>, FragmentText),
IdentFragment(SymbolId<Ix>, IdentKind, Source<Ix>, FragmentText<Ix>),
}
/// Retrieve information about an [`IdentObject`].
@ -120,7 +120,7 @@ pub trait IdentObjectData<Ix: SymbolIndexSize> {
///
/// If the object does not have an associated code fragment,
/// [`None`] is returned.
fn fragment(&self) -> Option<&FragmentText>;
fn fragment(&self) -> Option<&FragmentText<Ix>>;
/// IdentObject as an identifier ([`IdentObject`]).
///
@ -166,7 +166,7 @@ where
}
}
fn fragment(&self) -> Option<&FragmentText> {
fn fragment(&self) -> Option<&FragmentText<Ix>> {
match self {
Self::Missing(_) | Self::Ident(_, _, _) | Self::Extern(_, _, _) => {
None
@ -249,7 +249,7 @@ where
/// Note, however, that an identifier's fragment may be cleared under
/// certain circumstances (such as symbol overrides),
/// making way for a new fragment to be set.
fn set_fragment(self, text: FragmentText) -> TransitionResult<T>;
fn set_fragment(self, text: FragmentText<Ix>) -> TransitionResult<T>;
}
impl<Ix> IdentObjectState<Ix, IdentObject<Ix>> for IdentObject<Ix>
@ -434,7 +434,7 @@ where
fn set_fragment(
self,
text: FragmentText,
text: FragmentText<Ix>,
) -> TransitionResult<IdentObject<Ix>> {
match self {
IdentObject::Ident(sym, kind, src) => {
@ -608,7 +608,7 @@ impl std::error::Error for UnresolvedError {
/// Compiled fragment for identifier.
///
/// This represents the text associated with an identifier.
pub type FragmentText = String;
pub type FragmentText<Ix> = SymbolId<Ix>;
/// Metadata about the source of an object.
///
@ -742,7 +742,7 @@ mod test {
sym,
IdentKind::Meta,
Source::default(),
FragmentText::default(),
"".intern()
)
.name()
);
@ -772,7 +772,7 @@ mod test {
sym,
kind.clone(),
Source::default(),
FragmentText::default()
"".intern()
)
.kind()
);
@ -804,7 +804,7 @@ mod test {
sym,
IdentKind::Meta,
src.clone(),
FragmentText::default()
"".intern()
)
.src()
);
@ -813,7 +813,7 @@ mod test {
#[test]
fn ident_object_fragment() {
let sym: PkgSymbolId = "sym".intern();
let text: FragmentText = "foo".into();
let text = "foo".into();
assert_eq!(None, IdentObject::Missing(sym).fragment());
@ -835,7 +835,7 @@ mod test {
sym,
IdentKind::Meta,
Source::default(),
text.clone(),
text,
)
.fragment()
);
@ -1239,7 +1239,7 @@ mod test {
// Since it's already a fragment, this should fail.
let err = ident_with_frag
.clone()
.set_fragment("replacement".to_string())
.set_fragment("replacement".intern())
.expect_err("Expected failure");
match err {
@ -1633,8 +1633,8 @@ mod test {
.resolve(given, src.clone())
.unwrap();
let fragment = "a fragment".to_string();
let obj_with_frag = obj.set_fragment(fragment.clone());
let fragment = "a fragment".intern();
let obj_with_frag = obj.set_fragment(fragment);
assert_eq!(
Ok(IdentObject::IdentFragment(sym, expected, src, fragment)),

View File

@ -361,7 +361,7 @@ impl<W: Write> XmleWriter<W> {
match ident {
IdentObject::IdentFragment(_, _, _, frag) => {
self.writer.write_event(Event::Text(
BytesText::from_plain_str(frag),
BytesText::from_plain_str(&frag.lookup_str()),
))?;
}
// Cgen, Gen, and Lparam are not expected to be present, so we
@ -426,7 +426,7 @@ mod mock {
#[cfg(test)]
mod test {
use super::*;
use crate::ir::asg::{Dim, Section, Source};
use crate::ir::asg::{Dim, FragmentText, Section, Source};
use crate::ir::legacyir::SymAttrs;
use crate::sym::GlobalSymbolIntern;
use std::str;
@ -520,7 +520,7 @@ mod test {
"sym".intern(),
IdentKind::Meta,
Source::default(),
String::from(""),
FragmentText::from(""),
);
let mut section = Section::new();

View File

@ -54,7 +54,7 @@
//! use tamer::global;
//! use tamer::ir::legacyir::SymType;
//! use tamer::obj::xmlo::{XmloEvent, XmloReader};
//! use tamer::sym::GlobalSymbolIntern;
//! use tamer::sym::{GlobalSymbolIntern, GlobalSymbolResolve, PkgSymbolId};
//!
//! let xmlo = br#"<package name="foo">
//! <preproc:symtable>
@ -90,30 +90,34 @@
//! XmloEvent::Package(attrs) => pkgname = attrs.name,
//! XmloEvent::SymDecl(sym, attrs) => syms.push((sym, attrs.ty)),
//! XmloEvent::SymDeps(sym, symdeps) => deps.push((sym, symdeps)),
//! XmloEvent::Fragment(sym, text) => fragments.push((sym, text)),
//! XmloEvent::Fragment(sym, text) =>
//! fragments.push((sym, text.lookup_str().as_str())),
//!
//! // Do not read past end of header.
//! XmloEvent::Eoh => break,
//! }
//! }
//!
//! let syma = "syma".intern();
//! let symb = "symb".intern();
//!
//! assert_eq!(Some("foo".intern()), pkgname);
//!
//! assert_eq!(
//! vec![
//! ("syma".intern(), Some(SymType::Class)),
//! ("symb".intern(), Some(SymType::Cgen)),
//! (syma, Some(SymType::Class)),
//! (symb, Some(SymType::Cgen)),
//! ],
//! syms
//! );
//!
//! assert_eq!(
//! vec![
//! ("syma".intern(), vec![
//! (syma, vec![
//! "depa-1".intern(),
//! "depa-2".intern(),
//! ]),
//! ("symb".intern(), vec![
//! (symb, vec![
//! "depb-1".intern(),
//! ]),
//! ],
@ -122,8 +126,8 @@
//!
//! assert_eq!(
//! vec![
//! ("syma".intern(), "syma text".into()),
//! ("symb".intern(), "symb text".into()),
//! (syma, "syma text"),
//! (symb, "symb text"),
//! ],
//! fragments
//! );
@ -134,8 +138,8 @@
use crate::ir::legacyir::{PackageAttrs, SymAttrs, SymType};
use crate::sym::{
GlobalSymbolInternUnchecked, GlobalSymbolResolve, SymbolId,
SymbolIndexSize, SymbolStr,
GlobalSymbolIntern, GlobalSymbolInternUnchecked, GlobalSymbolResolve,
SymbolId, SymbolIndexSize, SymbolStr,
};
#[cfg(test)]
use crate::test::quick_xml::MockBytesStart as BytesStart;
@ -654,7 +658,7 @@ where
_ => err.into(),
})?;
Ok(XmloEvent::Fragment(id, text))
Ok(XmloEvent::Fragment(id, text.clone_uninterned()))
}
/// Convert single-character `@dim` to a [`u8`].
@ -744,7 +748,7 @@ pub enum XmloEvent<Ix: SymbolIndexSize> {
/// Given that fragments can be quite large,
/// a caller not interested in these data should choose to skip
/// fragments entirely rather than simply ignoring fragment events.
Fragment(SymbolId<Ix>, String),
Fragment(SymbolId<Ix>, SymbolId<Ix>),
/// End-of-header.
///

View File

@ -328,10 +328,11 @@ xmlo_tests! {
let result = sut.read_event()?;
assert_eq!(
XmloEvent::Fragment("fragsym".intern(), expected),
result
);
assert!(matches!(
result,
XmloEvent::Fragment(sym, given)
if sym == "fragsym".intern() && given.lookup_str() == expected
));
// argument provided to underlying read_text
assert_eq!(

View File

@ -386,6 +386,16 @@ impl<Ix: SymbolIndexSize> GlobalSymbolInternUnchecked<Ix> for &[u8] {
}
}
impl<T, Ix> From<T> for SymbolId<Ix>
where
T: Deref<Target = str>,
Ix: SymbolIndexSize,
{
fn from(value: T) -> Self {
value.intern()
}
}
#[cfg(test)]
mod test {
use super::*;