From 75d2ecf4dd525dde4e5bf37065c3c37735a8f1b0 Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Thu, 7 Oct 2021 16:48:58 -0400 Subject: [PATCH] tamer: obj::xmle::xir: Consideration of simplified iterators The previous iterators had to be used in a certain order because they mixed concerns, out of concern for performance. This attempts to chain even more iterators to see how it may perform. To be clear: this will be cleaned up. This was just an experiment. Here were profiles on the average of 50 runs of linking our largest program: Baseline, pre-XIR (with fragments removed from output) 0.8082 XIR writer, pre-ElemWrap, no #[inline] 0.7844s XIR writer, ElemWrap, no #[inline] 0.7918s XIR writer, ElemWrap, inlines in obj::xmle::xir 0.7892s XIR writer, ElemWrap, inlines in obj::xmle::xir and ir::asg::section 0.7858s XIR writer, ElemWrap, inline in only ir::asg::section 0.781s Pre-ElemWrap, inlines in ir::asg::section 0.7772s These profiles are difficult, because they hit the filesystem so much. I write to /dev/null, but it reads 100s of xmlo files from disk. It's clear that the impact is fairly modest and within a margin of error; as such, I will continue down the path of writing code that's easier to grok and maintain, since not doing so would be a micro-optimization relative to the concerns of the rest of the system at this point. But the purpose of all of this work was to determine whether an iterator-based XIR would be viable. It seems to be competitive. I'll finish up the writer reimplementation and move on. --- tamer/src/ir/asg/section.rs | 16 ++++++++ tamer/src/obj/xmle/xir.rs | 73 ++++++++++++++++++++++++------------- 2 files changed, 63 insertions(+), 26 deletions(-) diff --git a/tamer/src/ir/asg/section.rs b/tamer/src/ir/asg/section.rs index fc2b92e8..7c9331fe 100644 --- a/tamer/src/ir/asg/section.rs +++ b/tamer/src/ir/asg/section.rs @@ -54,32 +54,38 @@ impl<'a, T> Section<'a, T> { } /// The length of the `Section` + #[inline] pub fn len(&self) -> usize { self.head.len() + self.body.len() + self.tail.len() } /// Check if the `Section` is empty + #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Push an `IdentObject` into a `Section`'s head + #[inline] pub fn push_head(&mut self, obj: &'a T) { self.head.push(obj) } /// Push an `IdentObject` into a `Section`'s body + #[inline] pub fn push_body(&mut self, obj: &'a T) { self.body.push(obj) } /// Push an `IdentObject` into a `Section`'s tail + #[inline] pub fn push_tail(&mut self, obj: &'a T) { self.tail.push(obj) } /// Construct a new iterator visiting each head, body, and tail object /// in order. + #[inline] pub fn iter(&self) -> SectionIter { SectionIter( self.head @@ -102,6 +108,7 @@ pub struct SectionIter<'a, T>( impl<'a, T> Iterator for SectionIter<'a, T> { type Item = &'a T; + #[inline] fn next(&mut self) -> Option { self.0.next().map(|x| *x) } @@ -127,6 +134,7 @@ pub struct Sections<'a, T> { impl<'a, T: IdentObjectData> Sections<'a, T> { /// New collection of empty sections. + #[inline] pub fn new() -> Self { Self { map: Section::new(), @@ -151,6 +159,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> { /// At the time of writing, /// they are chained in the same order in which they are defined /// on the [`Sections`] struct. + #[inline] pub fn iter_all(&self) -> SectionsIter { SectionsIter(SectionsIterType::All( self.map @@ -176,6 +185,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> { /// but you should not rely on the order that the sections themselves /// appear in; /// they may change or be combined in the future. + #[inline] pub fn iter_static(&self) -> SectionsIter { SectionsIter(SectionsIterType::Static( self.meta @@ -189,6 +199,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> { } /// Construct an iterator over the map section. + #[inline] pub fn iter_map(&self) -> SectionsIter { SectionsIter(SectionsIterType::Single(self.map.iter())) } @@ -197,6 +208,7 @@ impl<'a, T: IdentObjectData> Sections<'a, T> { /// /// Multiple mappings may reference the same source field, /// which would produce duplicate values if they are not filtered. + #[inline] pub fn iter_map_froms_uniq(&self) -> hash_set::IntoIter { self.iter_map() .filter_map(|ident| { @@ -207,11 +219,13 @@ impl<'a, T: IdentObjectData> Sections<'a, T> { } /// Construct an iterator over the return map section. + #[inline] pub fn iter_retmap(&self) -> SectionsIter { SectionsIter(SectionsIterType::Single(self.retmap.iter())) } /// Construct an iterator over the executable `rater` section. + #[inline] pub fn iter_exec(&self) -> SectionsIter { SectionsIter(SectionsIterType::Single(self.rater.iter())) } @@ -246,6 +260,7 @@ pub struct SectionsIter<'a, T>(SectionsIterType<'a, T>); impl<'a, T> Iterator for SectionsIter<'a, T> { type Item = &'a T; + #[inline] fn next(&mut self) -> Option { match &mut self.0 { SectionsIterType::All(inner) => inner.next(), @@ -254,6 +269,7 @@ impl<'a, T> Iterator for SectionsIter<'a, T> { } } + #[inline] fn size_hint(&self) -> (usize, Option) { match &self.0 { SectionsIterType::All(inner) => inner.size_hint(), diff --git a/tamer/src/obj/xmle/xir.rs b/tamer/src/obj/xmle/xir.rs index e83e1b7e..82fac247 100644 --- a/tamer/src/obj/xmle/xir.rs +++ b/tamer/src/obj/xmle/xir.rs @@ -30,7 +30,7 @@ use crate::{ sym::{st::*, SymbolId}, }; use arrayvec::ArrayVec; -use std::iter::Chain; +use std::iter::{once, Chain, Once}; use std::{array, collections::hash_set}; qname_const! { @@ -56,11 +56,12 @@ qname_const! { QN_L_FROM: L_L:L_FROM, } -const HEADER_SIZE: usize = 16; +const HEADER_SIZE: usize = 14; type HeaderIter = array::IntoIter; /// Beginning [`Token`]s representing the root node of an `xmle` document /// and its immediate child. +#[inline] fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter { let pkg_name_val = AttrValue::Escaped(pkg_name); @@ -69,7 +70,6 @@ fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter { // edition 2018; 2021 will be out in a few months at the time of // writing. [ - Token::Open(QN_PACKAGE, LSPAN), Token::AttrName(QN_XMLNS, LSPAN), Token::AttrValue(AttrValue::st_uri(URI_LV_RATER), LSPAN), Token::AttrName(QN_XMLNS_PREPROC, LSPAN), @@ -84,7 +84,6 @@ fn header(pkg_name: SymbolId, relroot: SymbolId) -> HeaderIter { Token::AttrValue(pkg_name_val, LSPAN), Token::AttrName(QN_UUROOTPATH, LSPAN), Token::AttrValue(AttrValue::Escaped(relroot), LSPAN), - Token::Open(QN_L_DEP, LSPAN), ] .into_iter() } @@ -116,6 +115,7 @@ struct DepListIter<'a, T: IdentObjectData> { } impl<'a, T: IdentObjectData> DepListIter<'a, T> { + #[inline] fn new(sections: &'a Sections, relroot: SymbolId) -> Self { Self { iter: sections.iter_all(), @@ -232,6 +232,7 @@ impl<'a, T: IdentObjectData> DepListIter<'a, T> { impl<'a, T: IdentObjectData> Iterator for DepListIter<'a, T> { type Item = Token; + #[inline] fn next(&mut self) -> Option { self.toks.pop().or_else(|| self.refill_toks()) } @@ -251,20 +252,18 @@ struct MapFromsIter { } impl MapFromsIter { + #[inline] fn new<'a, T: IdentObjectData>(sections: &'a Sections) -> Self { - let mut iter = Self { + let iter = Self { iter: sections.iter_map_froms_uniq(), // Most of the time we have a single `from` (4 tokens). toks: ArrayVec::new(), }; - // reverse - iter.toks.push(Token::Open(QN_L_MAP_FROM, LSPAN)); - iter.toks.push(Token::Close(Some(QN_L_DEP), LSPAN)); - iter } + #[inline] fn refill_toks(&mut self) -> Option { self.iter.next().and_then(|from| { self.toks.push(Token::Close(None, LSPAN)); @@ -281,21 +280,30 @@ impl MapFromsIter { impl Iterator for MapFromsIter { type Item = Token; + #[inline] fn next(&mut self) -> Option { self.toks.pop().or_else(|| self.refill_toks()) } } -const FOOTER_SIZE: usize = 2; -type FooterIter = array::IntoIter; +pub struct ElemWrapIter>( + Chain, I>, Once>, +); -#[inline] -fn footer() -> FooterIter { - [ - Token::Close(Some(QN_L_MAP_FROM), LSPAN), - Token::Close(Some(QN_PACKAGE), LSPAN), - ] - .into_iter() +impl> ElemWrapIter { + #[inline] + fn new(open: Token, inner: I, close: Token) -> Self { + Self(once(open).chain(inner).chain(once(close))) + } +} + +impl> Iterator for ElemWrapIter { + type Item = Token; + + #[inline] + fn next(&mut self) -> Option { + self.0.next() + } } /// Iterator that lazily lowers `xmle` object files into XIR. @@ -305,9 +313,11 @@ fn footer() -> FooterIter { /// since this iterator will receive over a million calls on larger /// programs (and hundreds of thousands on smaller). pub struct LowerIter<'a, T: IdentObjectData>( - Chain< - Chain>, MapFromsIter>, - FooterIter, + ElemWrapIter< + Chain< + Chain>>, + ElemWrapIter, + >, >, ); @@ -321,22 +331,33 @@ impl<'a, T: IdentObjectData> Iterator for LowerIter<'a, T> { /// but [`DepListIter`] buffers tokens before emitting them, /// so certain `next` calls have a processing cost while others are /// essentially free. + #[inline] fn next(&mut self) -> Option { self.0.next() } } +#[inline] pub fn lower_iter<'a, T: IdentObjectData>( sections: &'a Sections, pkg_name: SymbolId, relroot: SymbolId, ) -> LowerIter<'a, T> { - LowerIter( + LowerIter(ElemWrapIter::new( + Token::Open(QN_PACKAGE, LSPAN), header(pkg_name, relroot) - .chain(DepListIter::new(sections, relroot)) - .chain(MapFromsIter::new(sections)) - .chain(footer()), - ) + .chain(ElemWrapIter::new( + Token::Open(QN_L_DEP, LSPAN), + DepListIter::new(sections, relroot), + Token::Close(Some(QN_L_DEP), LSPAN), + )) + .chain(ElemWrapIter::new( + Token::Open(QN_L_MAP_FROM, LSPAN), + MapFromsIter::new(sections), + Token::Close(Some(QN_L_MAP_FROM), LSPAN), + )), + Token::Close(Some(QN_PACKAGE), LSPAN), + )) } #[cfg(test)]