tamer: obj::xmlo::reader: Working xmlo reader
This makes the necessary tweaks to have the entire linker work end-to-end and produce a compatible xmle file (that is, identical except for nondeterministic topological ordering). That's good, and finally that can get off of my plate. What's disappointing, and what I'll have more information on in future commits, is how slow it is. The linking of our largest package goes from ~1s -> ~15s with this change. The reason is because of tens of millions of `memcpy` calls. Why? The ParseState abstraction is pure and passes an owned `self` around, and Parser replaces its own reference using this: let result; TransitionResult(Transition(self.state), result) = take(&mut self.state).parse_token(tok); Naively, this would store a copy of the old state in `result`, allocate a new ParseState for `self.state`, pass the original or a copy to `parse_token`, and then overwrite `self.state` with the new ParseState that is returned once it is all over. Of course, that'd be devastating. What we want to happen is for Rust to realize that it can just pass a reference to `self.state` and perform no copying at all. For certain parsers, this is exactly what happens. Great! But for XIRF, it we have this: /// Stack of element [`QName`] and [`Span`] pairs, /// representing the current level of nesting. /// /// This storage is statically allocated, /// allowing XIRF's parser to avoid memory allocation entirely. type ElementStack<const MAX_DEPTH: usize> = ArrayVec<(QName, Span), MAX_DEPTH>; /// XIRF document parser state. /// /// This parser is a pushdown automaton that parses a single XML document. #[derive(Debug, Default, PartialEq, Eq)] pub enum State<const MAX_DEPTH: usize, SA = AttrParseState> where SA: FlatAttrParseState, { /// Document parsing has not yet begun. #[default] PreRoot, /// Parsing nodes. NodeExpected(ElementStack<MAX_DEPTH>), /// Delegating to attribute parser. AttrExpected(ElementStack<MAX_DEPTH>, SA), /// End of document has been reached. Done, } ParseState contains an ArrayVec, and its implementation details are causes LLVM _not_ to elide the `memcpy`. And there's a lot of them. Considering that ParseState is supposed to use only statically allocated memory and be zero-copy, this is rather ironic. Now, this _could_ be potentially fixed by not using ArrayVec; removing it (and the corresponding checks for balanced tags) gets us down to 2s (which still needs improvement), but we can't have a core abstraction in our system resting on a house of cards. What if the optimization changes between releases and suddenly linking / building becomes shit slow? That's too much of a risk. Further, having to limit what abstractions we use just to appease the compiler to optimize away moves is very restrictive. The better option seems like to go back to what I used to do: pass around `&mut self`. I had moved to an owned `self` to force consideration of _all_ state transitions, but I can try to do the same thing in a different type of way using mutable references, and then we avoid this problem. The abstraction isn't pure (in the functional sense) anymore, but it's safe and isn't relying on delicate inlining and optimizer implementation details to have a performant system. More information to come. DEV-10863main
parent
9eaebd576b
commit
1a04d99f15
|
@ -180,12 +180,17 @@ where
|
|||
let first = state.is_first();
|
||||
let found = state.found.get_or_insert(Default::default());
|
||||
|
||||
// Package currently being processed.
|
||||
let mut pkg_name = None;
|
||||
|
||||
use AsgBuilderInternalState as IS;
|
||||
let mut istate = IS::None;
|
||||
|
||||
while let Some(ev) = xmlo.next() {
|
||||
match (istate, ev?) {
|
||||
(IS::None, XmloEvent::PkgName(name)) => {
|
||||
pkg_name = Some(name);
|
||||
|
||||
if first {
|
||||
state.name = Some(name);
|
||||
}
|
||||
|
@ -222,6 +227,11 @@ where
|
|||
|
||||
let mut src: Source = attrs.into();
|
||||
|
||||
// This used to come from SymAttrs in the old XmloReader.
|
||||
if src.pkg_name.is_none() {
|
||||
src.pkg_name = pkg_name;
|
||||
}
|
||||
|
||||
// Existing convention is to omit @src of local package
|
||||
// (in this case, the program being linked)
|
||||
if first {
|
||||
|
@ -266,7 +276,7 @@ where
|
|||
// may change in the future, in which case this
|
||||
// responsibility can be delegated to the linker (to produce
|
||||
// an `Iterator` that stops at EOH).
|
||||
(IS::None, XmloEvent::Eoh) => break,
|
||||
(IS::None, XmloEvent::Eoh(_)) => break,
|
||||
|
||||
(istate, ev) => {
|
||||
todo!("unexpected state transition: {istate:?} -> {ev:?}")
|
||||
|
@ -652,6 +662,48 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
// This used to be set in SymAttrs by XmloReader,
|
||||
// but that's no longer true with the new reader.
|
||||
#[test]
|
||||
fn sym_decl_pkg_name_set_if_empty_and_not_first() {
|
||||
let mut sut = Sut::new();
|
||||
|
||||
let sym = "sym".intern();
|
||||
let pkg_name = "pkg name".intern();
|
||||
|
||||
let state = AsgBuilderState::<RandomState> {
|
||||
name: Some("first pkg".into()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let evs = vec![
|
||||
Ok(XmloEvent::PkgName(pkg_name)),
|
||||
Ok(XmloEvent::SymDecl(
|
||||
sym,
|
||||
SymAttrs {
|
||||
ty: Some(SymType::Meta),
|
||||
..Default::default()
|
||||
},
|
||||
UNKNOWN_SPAN,
|
||||
)),
|
||||
];
|
||||
|
||||
let _ = sut.import_xmlo(evs.into_iter(), state).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
// `pkg_name` retained
|
||||
&IdentObject::Ident(
|
||||
sym,
|
||||
IdentKind::Meta,
|
||||
Source {
|
||||
pkg_name: Some(pkg_name),
|
||||
..Default::default()
|
||||
},
|
||||
),
|
||||
sut.get(sut.lookup(sym).unwrap()).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ident_kind_conversion_error_propagates() {
|
||||
let mut sut = Sut::new();
|
||||
|
@ -837,7 +889,7 @@ mod test {
|
|||
|
||||
let evs = vec![
|
||||
// Stop here.
|
||||
Ok(XmloEvent::Eoh),
|
||||
Ok(XmloEvent::Eoh(DUMMY_SPAN)),
|
||||
// Shouldn't make it to this one.
|
||||
Ok(XmloEvent::PkgName(pkg_name)),
|
||||
];
|
||||
|
|
|
@ -86,7 +86,7 @@ pub enum XmloEvent {
|
|||
/// The header of an `xmlo` file is defined as the symbol table;
|
||||
/// dependency list; and fragments.
|
||||
/// This event is emitted at the closing `preproc:fragment` node.
|
||||
Eoh,
|
||||
Eoh(Span),
|
||||
}
|
||||
|
||||
impl parse::Object for XmloEvent {}
|
||||
|
@ -212,7 +212,7 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
|
|||
Transition(SymDeps(span, SD::default())).incomplete()
|
||||
}
|
||||
|
||||
(SymDeps(_, sd), Xirf::Close(Some(QN_SYM_DEPS), ..))
|
||||
(SymDeps(_, sd), Xirf::Close(None | Some(QN_SYM_DEPS), ..))
|
||||
if sd.is_accepting() =>
|
||||
{
|
||||
Transition(FragmentsExpected).incomplete()
|
||||
|
@ -224,11 +224,10 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
|
|||
Transition(Fragments(span, SF::default())).incomplete()
|
||||
}
|
||||
|
||||
(Fragments(_, sf), Xirf::Close(Some(QN_FRAGMENTS), ..))
|
||||
if sf.is_accepting() =>
|
||||
{
|
||||
Transition(Eoh).incomplete()
|
||||
}
|
||||
(
|
||||
Fragments(_, sf),
|
||||
Xirf::Close(None | Some(QN_FRAGMENTS), span, _),
|
||||
) if sf.is_accepting() => Transition(Eoh).ok(XmloEvent::Eoh(span)),
|
||||
|
||||
(Fragments(span, sf), tok) => sf.delegate(span, tok, Fragments),
|
||||
|
||||
|
@ -236,6 +235,9 @@ impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
|
|||
Transition(Done).incomplete()
|
||||
}
|
||||
|
||||
// TODO: For whitespace, which can be stripped by XIRF.
|
||||
(st, Xirf::Text(..)) => Transition(st).incomplete(),
|
||||
|
||||
todo => todo!("{todo:?}"),
|
||||
}
|
||||
}
|
||||
|
@ -260,6 +262,8 @@ pub enum SymtableState {
|
|||
Sym(Span, Option<SymbolId>, SymAttrs),
|
||||
/// Awaiting a symbol map name.
|
||||
SymMapFrom(Span, SymbolId, SymAttrs, Span),
|
||||
/// Used by functions to declare their parameters.
|
||||
SymRef(Span, SymbolId, SymAttrs, Span),
|
||||
}
|
||||
|
||||
impl parse::Object for (SymbolId, SymAttrs, Span) {}
|
||||
|
@ -273,6 +277,8 @@ impl ParseState for SymtableState {
|
|||
use SymtableState::*;
|
||||
|
||||
match (self, tok) {
|
||||
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
|
||||
|
||||
(Ready, Xirf::Open(QN_SYM, span, _)) => {
|
||||
Transition(Sym(span, None, SymAttrs::default())).incomplete()
|
||||
}
|
||||
|
@ -304,7 +310,9 @@ impl ParseState for SymtableState {
|
|||
(
|
||||
Sym(span_sym, Some(name), attrs),
|
||||
Xirf::Open(QN_FROM, span_from, _),
|
||||
) if attrs.ty == Some(SymType::Map) => {
|
||||
) if attrs.ty == Some(SymType::Map)
|
||||
|| attrs.ty == Some(SymType::RetMap) =>
|
||||
{
|
||||
Transition(SymMapFrom(span_sym, name, attrs, span_from))
|
||||
.incomplete()
|
||||
}
|
||||
|
@ -329,6 +337,25 @@ impl ParseState for SymtableState {
|
|||
Transition(Sym(span_sym, Some(name), attrs)).incomplete()
|
||||
}
|
||||
|
||||
// TODO: These don't yield any events;
|
||||
// can they be removed from the compiler?
|
||||
// The old XmloReader ignored these.
|
||||
(
|
||||
Sym(span_sym, Some(name), attrs),
|
||||
Xirf::Open(QN_SYM_REF, span_ref, _),
|
||||
) => {
|
||||
Transition(SymRef(span_sym, name, attrs, span_ref)).incomplete()
|
||||
}
|
||||
|
||||
(SymRef(span_sym, name, attrs, _), Xirf::Close(..)) => {
|
||||
Transition(Sym(span_sym, Some(name), attrs)).incomplete()
|
||||
}
|
||||
|
||||
(st @ SymRef(..), _) => Transition(st).incomplete(),
|
||||
|
||||
// TODO: For whitespace, which can be stripped by XIRF.
|
||||
(st, Xirf::Text(..)) => Transition(st).incomplete(),
|
||||
|
||||
todo => todo!("{todo:?}"),
|
||||
}
|
||||
}
|
||||
|
@ -507,6 +534,8 @@ impl ParseState for SymDepsState {
|
|||
use SymDepsState::*;
|
||||
|
||||
match (self, tok) {
|
||||
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
|
||||
|
||||
(Ready, Xirf::Open(QN_SYM_DEP, span, _)) => {
|
||||
Transition(SymUnnamed(span)).incomplete()
|
||||
}
|
||||
|
@ -529,17 +558,30 @@ impl ParseState for SymDepsState {
|
|||
) => Transition(SymRefDone(span, name, span_ref))
|
||||
.ok(XmloEvent::Symbol(ref_name, span_ref_name)),
|
||||
|
||||
// TODO: For xmlns attributes, which will go away in XIRF.
|
||||
(SymRefUnnamed(span, name, span_ref), Xirf::Attr(..)) => {
|
||||
Transition(SymRefUnnamed(span, name, span_ref)).incomplete()
|
||||
}
|
||||
|
||||
(SymRefUnnamed(span, name, span_ref), _) => {
|
||||
Transition(SymRefUnnamed(span, name, span_ref))
|
||||
.err(XmloError::MalformedSymRef(name, span_ref))
|
||||
}
|
||||
|
||||
// TODO: For xmlns attributes, which will go away in XIRF.
|
||||
(SymRefDone(span, name, ref_span), Xirf::Attr(..)) => {
|
||||
Transition(SymRefDone(span, name, ref_span)).incomplete()
|
||||
}
|
||||
|
||||
(SymRefDone(span, name, _), Xirf::Close(..)) => {
|
||||
Transition(Sym(span, name)).incomplete()
|
||||
}
|
||||
|
||||
(Sym(..), Xirf::Close(..)) => Transition(Ready).incomplete(),
|
||||
|
||||
// TODO: For whitespace, which can be stripped by XIRF.
|
||||
(st, Xirf::Text(..)) => Transition(st).incomplete(),
|
||||
|
||||
todo => todo!("sym-deps {todo:?}"),
|
||||
}
|
||||
}
|
||||
|
@ -573,14 +615,34 @@ impl ParseState for FragmentsState {
|
|||
use FragmentsState::*;
|
||||
|
||||
match (self, tok) {
|
||||
(Ready, Xirf::Attr(..)) => Transition(Ready).incomplete(),
|
||||
|
||||
(Ready, Xirf::Open(QN_FRAGMENT, span, _)) => {
|
||||
Transition(FragmentUnnamed(span)).incomplete()
|
||||
}
|
||||
|
||||
(FragmentUnnamed(span), Xirf::Attr(Attr(QN_ID, id, _)))
|
||||
if id != raw::WS_EMPTY =>
|
||||
// TODO: For whitespace, which can be stripped by XIRF.
|
||||
(Ready, Xirf::Text(..)) => Transition(Ready).incomplete(),
|
||||
|
||||
(FragmentUnnamed(span), Xirf::Attr(Attr(QN_ID, id, _))) => {
|
||||
match id {
|
||||
// See "compiler bug" comment below.
|
||||
raw::WS_EMPTY => {
|
||||
Transition(FragmentUnnamed(span)).incomplete()
|
||||
}
|
||||
id => Transition(Fragment(span, id)).incomplete(),
|
||||
}
|
||||
}
|
||||
|
||||
(FragmentUnnamed(span), Xirf::Attr(Attr(key, ..)))
|
||||
if key != QN_ID =>
|
||||
{
|
||||
Transition(Fragment(span, id)).incomplete()
|
||||
Transition(FragmentUnnamed(span)).incomplete()
|
||||
}
|
||||
|
||||
// Compiler bug: `<preproc:fragment id="" />` in `rater/core/base`.
|
||||
(FragmentUnnamed(_span), Xirf::Close(None, ..)) => {
|
||||
Transition(Ready).incomplete()
|
||||
}
|
||||
|
||||
(FragmentUnnamed(span), _) => Transition(FragmentUnnamed(span))
|
||||
|
@ -591,8 +653,12 @@ impl ParseState for FragmentsState {
|
|||
.ok(XmloEvent::Fragment(id, text, span))
|
||||
}
|
||||
|
||||
(Fragment(span, id), _) => Transition(Fragment(span, id))
|
||||
.err(XmloError::MissingFragmentText(id, span)),
|
||||
// TODO: Also a compiler bug, for some generated classes.
|
||||
// This needs fixing in the compiler.
|
||||
(Fragment(_span, _id), Xirf::Close(..)) => {
|
||||
//eprintln!("warning: empty fragment text for {id} at {span}");
|
||||
Transition(Ready).incomplete()
|
||||
}
|
||||
|
||||
(FragmentDone(..), Xirf::Close(..)) => {
|
||||
Transition(Ready).incomplete()
|
||||
|
|
|
@ -258,7 +258,7 @@ where
|
|||
},
|
||||
|
||||
XmlEvent::End(ele) if ele.name() == b"preproc:fragments" => {
|
||||
Ok(XmloEvent::Eoh)
|
||||
Ok(XmloEvent::Eoh(UNKNOWN_SPAN))
|
||||
}
|
||||
|
||||
// Ignore and recurse, looking for something we can process
|
||||
|
@ -424,12 +424,6 @@ where
|
|||
///
|
||||
/// Map symbols contain additional information describing source
|
||||
/// inputs external to the system.
|
||||
///
|
||||
/// Errors
|
||||
/// ======
|
||||
/// - [`XmloError::InvalidMapFrom`] if `@name` missing or if unexpected
|
||||
/// data (e.g. elements) are encountered.
|
||||
/// - [`XmloError::XmlError`] on XML parsing failure.
|
||||
fn process_map_from<'a>(
|
||||
reader: &mut XmlReader<B>,
|
||||
buffer: &mut Vec<u8>,
|
||||
|
|
|
@ -271,7 +271,7 @@ xmlo_tests! {
|
|||
|
||||
let result = sut.read_event()?;
|
||||
|
||||
assert_eq!(XmloEvent::Eoh, result);
|
||||
assert_eq!(XmloEvent::Eoh(UNKNOWN_SPAN), result);
|
||||
}
|
||||
|
||||
// DONE
|
||||
|
|
|
@ -593,8 +593,10 @@ fn sym_fragment_empty_id() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sym_fragment_missing_text() {
|
||||
// TODO: Re-enable after compiler bug is resolved.
|
||||
// See implementation.
|
||||
//#[test]
|
||||
fn _sym_fragment_missing_text() {
|
||||
let id = "fragsym".into();
|
||||
|
||||
let toks = [
|
||||
|
@ -671,6 +673,7 @@ fn xmlo_composite_parsers_header() {
|
|||
)),
|
||||
Parsed::Object(XmloEvent::SymDepStart(symdep_name, S3)),
|
||||
Parsed::Object(XmloEvent::Fragment(symfrag_id, frag, S4)),
|
||||
Parsed::Object(XmloEvent::Eoh(S3)),
|
||||
]),
|
||||
sut.filter(|parsed| match parsed {
|
||||
Ok(Parsed::Incomplete) => false,
|
||||
|
|
|
@ -81,8 +81,8 @@ pub trait TokenStream<T: Token> = Iterator<Item = T>;
|
|||
/// consider using [`TokenStream`].
|
||||
pub trait TokenResultStream<T: Token, E: Error> = Iterator<Item = Result<T, E>>;
|
||||
|
||||
/// A [`ParserState`] capable of being automatically stitched together with
|
||||
/// a parent [`ParserState`] `SP` to create a composite parser.
|
||||
/// A [`ParseState`] capable of being automatically stitched together with
|
||||
/// a parent [`ParseState`] `SP` to create a composite parser.
|
||||
///
|
||||
/// Conceptually,
|
||||
/// this can be visualized as combining the state machines of multiple
|
||||
|
|
Loading…
Reference in New Issue