tamer: obj::xmle::xir: Consideration of simplified iterators

The previous iterators had to be used in a certain order because they mixed
concerns, out of concern for performance.  This attempts to chain even more
iterators to see how it may perform.

To be clear: this will be cleaned up.  This was just an experiment.

Here were profiles on the average of 50 runs of linking our largest program:

  Baseline, pre-XIR (with fragments removed from output)               0.8082
  XIR writer, pre-ElemWrap, no #[inline]                               0.7844s
  XIR writer, ElemWrap, no #[inline]                                   0.7918s
  XIR writer, ElemWrap, inlines in obj::xmle::xir                      0.7892s
  XIR writer, ElemWrap, inlines in obj::xmle::xir and ir::asg::section 0.7858s
  XIR writer, ElemWrap, inline in only ir::asg::section                0.781s
  Pre-ElemWrap, inlines in ir::asg::section                            0.7772s

These profiles are difficult, because they hit the filesystem so much.  I
write to /dev/null, but it reads 100s of xmlo files from disk.

It's clear that the impact is fairly modest and within a margin of error; as
such, I will continue down the path of writing code that's easier to grok
and maintain, since not doing so would be a micro-optimization relative to
the concerns of the rest of the system at this point.

But the purpose of all of this work was to determine whether an
iterator-based XIR would be viable.  It seems to be competitive.  I'll
finish up the writer reimplementation and move on.
main
Mike Gerwitz 2021-10-07 16:48:58 -04:00
parent 2821098b40
commit 75d2ecf4dd
2 changed files with 63 additions and 26 deletions

View File

@ -54,32 +54,38 @@ impl<'a, T> Section<'a, T> {
}
/// The length of the `Section`
#[inline]
pub fn len(&self) -> usize {
self.head.len() + self.body.len() + self.tail.len()
}
/// Check if the `Section` is empty
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Push an `IdentObject` into a `Section`'s head
#[inline]
pub fn push_head(&mut self, obj: &'a T) {
self.head.push(obj)
}
/// Push an `IdentObject` into a `Section`'s body
#[inline]
pub fn push_body(&mut self, obj: &'a T) {
self.body.push(obj)
}
/// Push an `IdentObject` into a `Section`'s tail
#[inline]
pub fn push_tail(&mut self, obj: &'a T) {
self.tail.push(obj)
}
/// Construct a new iterator visiting each head, body, and tail object
/// in order.
#[inline]
pub fn iter(&self) -> SectionIter<T> {
SectionIter(
self.head
@ -102,6 +108,7 @@ pub struct SectionIter<'a, T>(
impl<'a, T> Iterator for SectionIter<'a, T> {
type Item = &'a T;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.0.next().map(|x| *x)
}
@ -127,6 +134,7 @@ pub struct Sections<'a, T> {
impl<'a, T: IdentObjectData> Sections<'a, T> {
/// New collection of empty sections.
#[inline]
pub fn new() -> Self {
Self {
map: Section::new(),
@ -151,6 +159,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
/// At the time of writing,
/// they are chained in the same order in which they are defined
/// on the [`Sections`] struct.
#[inline]
pub fn iter_all(&self) -> SectionsIter<T> {
SectionsIter(SectionsIterType::All(
self.map
@ -176,6 +185,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
/// but you should not rely on the order that the sections themselves
/// appear in;
/// they may change or be combined in the future.
#[inline]
pub fn iter_static(&self) -> SectionsIter<T> {
SectionsIter(SectionsIterType::Static(
self.meta
@ -189,6 +199,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
}
/// Construct an iterator over the map section.
#[inline]
pub fn iter_map(&self) -> SectionsIter<T> {
SectionsIter(SectionsIterType::Single(self.map.iter()))
}
@ -197,6 +208,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
///
/// Multiple mappings may reference the same source field,
/// which would produce duplicate values if they are not filtered.
#[inline]
pub fn iter_map_froms_uniq(&self) -> hash_set::IntoIter<SymbolId> {
self.iter_map()
.filter_map(|ident| {
@ -207,11 +219,13 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
}
/// Construct an iterator over the return map section.
#[inline]
pub fn iter_retmap(&self) -> SectionsIter<T> {
SectionsIter(SectionsIterType::Single(self.retmap.iter()))
}
/// Construct an iterator over the executable `rater` section.
#[inline]
pub fn iter_exec(&self) -> SectionsIter<T> {
SectionsIter(SectionsIterType::Single(self.rater.iter()))
}
@ -246,6 +260,7 @@ pub struct SectionsIter<'a, T>(SectionsIterType<'a, T>);
impl<'a, T> Iterator for SectionsIter<'a, T> {
type Item = &'a T;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match &mut self.0 {
SectionsIterType::All(inner) => inner.next(),
@ -254,6 +269,7 @@ impl<'a, T> Iterator for SectionsIter<'a, T> {
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match &self.0 {
SectionsIterType::All(inner) => inner.size_hint(),

View File

@ -30,7 +30,7 @@ use crate::{
sym::{st::*, SymbolId},
};
use arrayvec::ArrayVec;
use std::iter::Chain;
use std::iter::{once, Chain, Once};
use std::{array, collections::hash_set};
qname_const! {
@ -56,11 +56,12 @@ qname_const! {
QN_L_FROM: L_L:L_FROM,
}
const HEADER_SIZE: usize = 16;
const HEADER_SIZE: usize = 14;
type HeaderIter = array::IntoIter<Token, HEADER_SIZE>;
/// Beginning [`Token`]s representing the root node of an `xmle` document
/// and its immediate child.
#[inline]
fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter {
let pkg_name_val = AttrValue::Escaped(pkg_name);
@ -69,7 +70,6 @@ fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter {
// edition 2018; 2021 will be out in a few months at the time of
// writing.
[
Token::Open(QN_PACKAGE, LSPAN),
Token::AttrName(QN_XMLNS, LSPAN),
Token::AttrValue(AttrValue::st_uri(URI_LV_RATER), LSPAN),
Token::AttrName(QN_XMLNS_PREPROC, LSPAN),
@ -84,7 +84,6 @@ fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter {
Token::AttrValue(pkg_name_val, LSPAN),
Token::AttrName(QN_UUROOTPATH, LSPAN),
Token::AttrValue(AttrValue::Escaped(relroot), LSPAN),
Token::Open(QN_L_DEP, LSPAN),
]
.into_iter()
}
@ -116,6 +115,7 @@ struct DepListIter<'a, T: IdentObjectData> {
}
impl<'a, T: IdentObjectData> DepListIter<'a, T> {
#[inline]
fn new(sections: &'a Sections<T>, relroot: SymbolId) -> Self {
Self {
iter: sections.iter_all(),
@ -232,6 +232,7 @@ impl<'a, T: IdentObjectData> DepListIter<'a, T> {
impl<'a, T: IdentObjectData> Iterator for DepListIter<'a, T> {
type Item = Token;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.toks.pop().or_else(|| self.refill_toks())
}
@ -251,20 +252,18 @@ struct MapFromsIter {
}
impl MapFromsIter {
#[inline]
fn new<'a, T: IdentObjectData>(sections: &'a Sections<T>) -> Self {
let mut iter = Self {
let iter = Self {
iter: sections.iter_map_froms_uniq(),
// Most of the time we have a single `from` (4 tokens).
toks: ArrayVec::new(),
};
// reverse
iter.toks.push(Token::Open(QN_L_MAP_FROM, LSPAN));
iter.toks.push(Token::Close(Some(QN_L_DEP), LSPAN));
iter
}
#[inline]
fn refill_toks(&mut self) -> Option<Token> {
self.iter.next().and_then(|from| {
self.toks.push(Token::Close(None, LSPAN));
@ -281,21 +280,30 @@ impl MapFromsIter {
impl Iterator for MapFromsIter {
type Item = Token;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.toks.pop().or_else(|| self.refill_toks())
}
}
const FOOTER_SIZE: usize = 2;
type FooterIter = array::IntoIter<Token, FOOTER_SIZE>;
pub struct ElemWrapIter<I: Iterator<Item = Token>>(
Chain<Chain<Once<Token>, I>, Once<Token>>,
);
#[inline]
fn footer() -> FooterIter {
[
Token::Close(Some(QN_L_MAP_FROM), LSPAN),
Token::Close(Some(QN_PACKAGE), LSPAN),
]
.into_iter()
impl<I: Iterator<Item = Token>> ElemWrapIter<I> {
#[inline]
fn new(open: Token, inner: I, close: Token) -> Self {
Self(once(open).chain(inner).chain(once(close)))
}
}
impl<I: Iterator<Item = Token>> Iterator for ElemWrapIter<I> {
type Item = Token;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
/// Iterator that lazily lowers `xmle` object files into XIR.
@ -305,9 +313,11 @@ fn footer() -> FooterIter {
/// since this iterator will receive over a million calls on larger
/// programs (and hundreds of thousands on smaller).
pub struct LowerIter<'a, T: IdentObjectData>(
Chain<
Chain<Chain<HeaderIter, DepListIter<'a, T>>, MapFromsIter>,
FooterIter,
ElemWrapIter<
Chain<
Chain<HeaderIter, ElemWrapIter<DepListIter<'a, T>>>,
ElemWrapIter<MapFromsIter>,
>,
>,
);
@ -321,22 +331,33 @@ impl<'a, T: IdentObjectData> Iterator for LowerIter<'a, T> {
/// but [`DepListIter`] buffers tokens before emitting them,
/// so certain `next` calls have a processing cost while others are
/// essentially free.
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
#[inline]
pub fn lower_iter<'a, T: IdentObjectData>(
sections: &'a Sections<T>,
pkg_name: SymbolId,
relroot: SymbolId,
) -> LowerIter<'a, T> {
LowerIter(
LowerIter(ElemWrapIter::new(
Token::Open(QN_PACKAGE, LSPAN),
header(pkg_name, relroot)
.chain(DepListIter::new(sections, relroot))
.chain(MapFromsIter::new(sections))
.chain(footer()),
)
.chain(ElemWrapIter::new(
Token::Open(QN_L_DEP, LSPAN),
DepListIter::new(sections, relroot),
Token::Close(Some(QN_L_DEP), LSPAN),
))
.chain(ElemWrapIter::new(
Token::Open(QN_L_MAP_FROM, LSPAN),
MapFromsIter::new(sections),
Token::Close(Some(QN_L_MAP_FROM), LSPAN),
)),
Token::Close(Some(QN_PACKAGE), LSPAN),
))
}
#[cfg(test)]