tamer: obj::xmle::xir: Consideration of simplified iterators
The previous iterators had to be used in a certain order because they mixed concerns, out of concern for performance. This attempts to chain even more iterators to see how it may perform. To be clear: this will be cleaned up. This was just an experiment. Here were profiles on the average of 50 runs of linking our largest program: Baseline, pre-XIR (with fragments removed from output) 0.8082 XIR writer, pre-ElemWrap, no #[inline] 0.7844s XIR writer, ElemWrap, no #[inline] 0.7918s XIR writer, ElemWrap, inlines in obj::xmle::xir 0.7892s XIR writer, ElemWrap, inlines in obj::xmle::xir and ir::asg::section 0.7858s XIR writer, ElemWrap, inline in only ir::asg::section 0.781s Pre-ElemWrap, inlines in ir::asg::section 0.7772s These profiles are difficult, because they hit the filesystem so much. I write to /dev/null, but it reads 100s of xmlo files from disk. It's clear that the impact is fairly modest and within a margin of error; as such, I will continue down the path of writing code that's easier to grok and maintain, since not doing so would be a micro-optimization relative to the concerns of the rest of the system at this point. But the purpose of all of this work was to determine whether an iterator-based XIR would be viable. It seems to be competitive. I'll finish up the writer reimplementation and move on.main
parent
2821098b40
commit
75d2ecf4dd
|
@ -54,32 +54,38 @@ impl<'a, T> Section<'a, T> {
|
|||
}
|
||||
|
||||
/// The length of the `Section`
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.head.len() + self.body.len() + self.tail.len()
|
||||
}
|
||||
|
||||
/// Check if the `Section` is empty
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Push an `IdentObject` into a `Section`'s head
|
||||
#[inline]
|
||||
pub fn push_head(&mut self, obj: &'a T) {
|
||||
self.head.push(obj)
|
||||
}
|
||||
|
||||
/// Push an `IdentObject` into a `Section`'s body
|
||||
#[inline]
|
||||
pub fn push_body(&mut self, obj: &'a T) {
|
||||
self.body.push(obj)
|
||||
}
|
||||
|
||||
/// Push an `IdentObject` into a `Section`'s tail
|
||||
#[inline]
|
||||
pub fn push_tail(&mut self, obj: &'a T) {
|
||||
self.tail.push(obj)
|
||||
}
|
||||
|
||||
/// Construct a new iterator visiting each head, body, and tail object
|
||||
/// in order.
|
||||
#[inline]
|
||||
pub fn iter(&self) -> SectionIter<T> {
|
||||
SectionIter(
|
||||
self.head
|
||||
|
@ -102,6 +108,7 @@ pub struct SectionIter<'a, T>(
|
|||
impl<'a, T> Iterator for SectionIter<'a, T> {
|
||||
type Item = &'a T;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.0.next().map(|x| *x)
|
||||
}
|
||||
|
@ -127,6 +134,7 @@ pub struct Sections<'a, T> {
|
|||
|
||||
impl<'a, T: IdentObjectData> Sections<'a, T> {
|
||||
/// New collection of empty sections.
|
||||
#[inline]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
map: Section::new(),
|
||||
|
@ -151,6 +159,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
|
|||
/// At the time of writing,
|
||||
/// they are chained in the same order in which they are defined
|
||||
/// on the [`Sections`] struct.
|
||||
#[inline]
|
||||
pub fn iter_all(&self) -> SectionsIter<T> {
|
||||
SectionsIter(SectionsIterType::All(
|
||||
self.map
|
||||
|
@ -176,6 +185,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
|
|||
/// but you should not rely on the order that the sections themselves
|
||||
/// appear in;
|
||||
/// they may change or be combined in the future.
|
||||
#[inline]
|
||||
pub fn iter_static(&self) -> SectionsIter<T> {
|
||||
SectionsIter(SectionsIterType::Static(
|
||||
self.meta
|
||||
|
@ -189,6 +199,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
|
|||
}
|
||||
|
||||
/// Construct an iterator over the map section.
|
||||
#[inline]
|
||||
pub fn iter_map(&self) -> SectionsIter<T> {
|
||||
SectionsIter(SectionsIterType::Single(self.map.iter()))
|
||||
}
|
||||
|
@ -197,6 +208,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
|
|||
///
|
||||
/// Multiple mappings may reference the same source field,
|
||||
/// which would produce duplicate values if they are not filtered.
|
||||
#[inline]
|
||||
pub fn iter_map_froms_uniq(&self) -> hash_set::IntoIter<SymbolId> {
|
||||
self.iter_map()
|
||||
.filter_map(|ident| {
|
||||
|
@ -207,11 +219,13 @@ impl<'a, T: IdentObjectData> Sections<'a, T> {
|
|||
}
|
||||
|
||||
/// Construct an iterator over the return map section.
|
||||
#[inline]
|
||||
pub fn iter_retmap(&self) -> SectionsIter<T> {
|
||||
SectionsIter(SectionsIterType::Single(self.retmap.iter()))
|
||||
}
|
||||
|
||||
/// Construct an iterator over the executable `rater` section.
|
||||
#[inline]
|
||||
pub fn iter_exec(&self) -> SectionsIter<T> {
|
||||
SectionsIter(SectionsIterType::Single(self.rater.iter()))
|
||||
}
|
||||
|
@ -246,6 +260,7 @@ pub struct SectionsIter<'a, T>(SectionsIterType<'a, T>);
|
|||
impl<'a, T> Iterator for SectionsIter<'a, T> {
|
||||
type Item = &'a T;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match &mut self.0 {
|
||||
SectionsIterType::All(inner) => inner.next(),
|
||||
|
@ -254,6 +269,7 @@ impl<'a, T> Iterator for SectionsIter<'a, T> {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
match &self.0 {
|
||||
SectionsIterType::All(inner) => inner.size_hint(),
|
||||
|
|
|
@ -30,7 +30,7 @@ use crate::{
|
|||
sym::{st::*, SymbolId},
|
||||
};
|
||||
use arrayvec::ArrayVec;
|
||||
use std::iter::Chain;
|
||||
use std::iter::{once, Chain, Once};
|
||||
use std::{array, collections::hash_set};
|
||||
|
||||
qname_const! {
|
||||
|
@ -56,11 +56,12 @@ qname_const! {
|
|||
QN_L_FROM: L_L:L_FROM,
|
||||
}
|
||||
|
||||
const HEADER_SIZE: usize = 16;
|
||||
const HEADER_SIZE: usize = 14;
|
||||
type HeaderIter = array::IntoIter<Token, HEADER_SIZE>;
|
||||
|
||||
/// Beginning [`Token`]s representing the root node of an `xmle` document
|
||||
/// and its immediate child.
|
||||
#[inline]
|
||||
fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter {
|
||||
let pkg_name_val = AttrValue::Escaped(pkg_name);
|
||||
|
||||
|
@ -69,7 +70,6 @@ fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter {
|
|||
// edition 2018; 2021 will be out in a few months at the time of
|
||||
// writing.
|
||||
[
|
||||
Token::Open(QN_PACKAGE, LSPAN),
|
||||
Token::AttrName(QN_XMLNS, LSPAN),
|
||||
Token::AttrValue(AttrValue::st_uri(URI_LV_RATER), LSPAN),
|
||||
Token::AttrName(QN_XMLNS_PREPROC, LSPAN),
|
||||
|
@ -84,7 +84,6 @@ fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter {
|
|||
Token::AttrValue(pkg_name_val, LSPAN),
|
||||
Token::AttrName(QN_UUROOTPATH, LSPAN),
|
||||
Token::AttrValue(AttrValue::Escaped(relroot), LSPAN),
|
||||
Token::Open(QN_L_DEP, LSPAN),
|
||||
]
|
||||
.into_iter()
|
||||
}
|
||||
|
@ -116,6 +115,7 @@ struct DepListIter<'a, T: IdentObjectData> {
|
|||
}
|
||||
|
||||
impl<'a, T: IdentObjectData> DepListIter<'a, T> {
|
||||
#[inline]
|
||||
fn new(sections: &'a Sections<T>, relroot: SymbolId) -> Self {
|
||||
Self {
|
||||
iter: sections.iter_all(),
|
||||
|
@ -232,6 +232,7 @@ impl<'a, T: IdentObjectData> DepListIter<'a, T> {
|
|||
impl<'a, T: IdentObjectData> Iterator for DepListIter<'a, T> {
|
||||
type Item = Token;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.toks.pop().or_else(|| self.refill_toks())
|
||||
}
|
||||
|
@ -251,20 +252,18 @@ struct MapFromsIter {
|
|||
}
|
||||
|
||||
impl MapFromsIter {
|
||||
#[inline]
|
||||
fn new<'a, T: IdentObjectData>(sections: &'a Sections<T>) -> Self {
|
||||
let mut iter = Self {
|
||||
let iter = Self {
|
||||
iter: sections.iter_map_froms_uniq(),
|
||||
// Most of the time we have a single `from` (4 tokens).
|
||||
toks: ArrayVec::new(),
|
||||
};
|
||||
|
||||
// reverse
|
||||
iter.toks.push(Token::Open(QN_L_MAP_FROM, LSPAN));
|
||||
iter.toks.push(Token::Close(Some(QN_L_DEP), LSPAN));
|
||||
|
||||
iter
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn refill_toks(&mut self) -> Option<Token> {
|
||||
self.iter.next().and_then(|from| {
|
||||
self.toks.push(Token::Close(None, LSPAN));
|
||||
|
@ -281,21 +280,30 @@ impl MapFromsIter {
|
|||
impl Iterator for MapFromsIter {
|
||||
type Item = Token;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.toks.pop().or_else(|| self.refill_toks())
|
||||
}
|
||||
}
|
||||
|
||||
const FOOTER_SIZE: usize = 2;
|
||||
type FooterIter = array::IntoIter<Token, FOOTER_SIZE>;
|
||||
pub struct ElemWrapIter<I: Iterator<Item = Token>>(
|
||||
Chain<Chain<Once<Token>, I>, Once<Token>>,
|
||||
);
|
||||
|
||||
#[inline]
|
||||
fn footer() -> FooterIter {
|
||||
[
|
||||
Token::Close(Some(QN_L_MAP_FROM), LSPAN),
|
||||
Token::Close(Some(QN_PACKAGE), LSPAN),
|
||||
]
|
||||
.into_iter()
|
||||
impl<I: Iterator<Item = Token>> ElemWrapIter<I> {
|
||||
#[inline]
|
||||
fn new(open: Token, inner: I, close: Token) -> Self {
|
||||
Self(once(open).chain(inner).chain(once(close)))
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: Iterator<Item = Token>> Iterator for ElemWrapIter<I> {
|
||||
type Item = Token;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator that lazily lowers `xmle` object files into XIR.
|
||||
|
@ -305,9 +313,11 @@ fn footer() -> FooterIter {
|
|||
/// since this iterator will receive over a million calls on larger
|
||||
/// programs (and hundreds of thousands on smaller).
|
||||
pub struct LowerIter<'a, T: IdentObjectData>(
|
||||
Chain<
|
||||
Chain<Chain<HeaderIter, DepListIter<'a, T>>, MapFromsIter>,
|
||||
FooterIter,
|
||||
ElemWrapIter<
|
||||
Chain<
|
||||
Chain<HeaderIter, ElemWrapIter<DepListIter<'a, T>>>,
|
||||
ElemWrapIter<MapFromsIter>,
|
||||
>,
|
||||
>,
|
||||
);
|
||||
|
||||
|
@ -321,22 +331,33 @@ impl<'a, T: IdentObjectData> Iterator for LowerIter<'a, T> {
|
|||
/// but [`DepListIter`] buffers tokens before emitting them,
|
||||
/// so certain `next` calls have a processing cost while others are
|
||||
/// essentially free.
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn lower_iter<'a, T: IdentObjectData>(
|
||||
sections: &'a Sections<T>,
|
||||
pkg_name: SymbolId,
|
||||
relroot: SymbolId,
|
||||
) -> LowerIter<'a, T> {
|
||||
LowerIter(
|
||||
LowerIter(ElemWrapIter::new(
|
||||
Token::Open(QN_PACKAGE, LSPAN),
|
||||
header(pkg_name, relroot)
|
||||
.chain(DepListIter::new(sections, relroot))
|
||||
.chain(MapFromsIter::new(sections))
|
||||
.chain(footer()),
|
||||
)
|
||||
.chain(ElemWrapIter::new(
|
||||
Token::Open(QN_L_DEP, LSPAN),
|
||||
DepListIter::new(sections, relroot),
|
||||
Token::Close(Some(QN_L_DEP), LSPAN),
|
||||
))
|
||||
.chain(ElemWrapIter::new(
|
||||
Token::Open(QN_L_MAP_FROM, LSPAN),
|
||||
MapFromsIter::new(sections),
|
||||
Token::Close(Some(QN_L_MAP_FROM), LSPAN),
|
||||
)),
|
||||
Token::Close(Some(QN_PACKAGE), LSPAN),
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
Loading…
Reference in New Issue