tamer: obj::xmlo: Begin transition to streaming quick-xml reader

I'm finally back to TAMER development.

The original plan, some time ago, was to gate an entirely new XmloReader
behind a feature flag (wip-xmlo-xir-reader), and go from there, leaving the
existing implementation untouched.  Unfortunately, it became too difficult
and confusing to marry the old aggregate API with the new streaming one.

AsgBuilder is the only system interacting with XmloReader, so I decided (see
previous commits) to just go the route of refactoring the existing
one.  I'm not yet sure if I'll continue to progressively refactor this one
and eliminate the two separate implementations behind the flag, or if I'll
get this API similar and then keep the flag and reimplement it.  But I'll
know soon.

DEV-11449
main
Mike Gerwitz 2022-03-10 13:31:24 -05:00
parent 74ddc77adb
commit dcfae8a624
4 changed files with 77 additions and 20 deletions

View File

@ -152,6 +152,19 @@ where
) -> Result<S>;
}
/// Internal state machine for [`AsgBuilder`].
///
/// This will likely be worked into [`AsgBuilderState`] eventually and
/// exists during a transition to the streaming parsers.
#[derive(Debug, Clone, Copy)]
enum AsgBuilderInternalState {
/// The "old way" of doing things; not yet refactored.
None,
/// Processing symbol dependencies.
SymDep(SymbolId),
}
impl<O, S, G> AsgBuilder<O, S> for G
where
O: IdentObjectState<O>,
@ -167,9 +180,12 @@ where
let first = state.is_first();
let found = state.found.get_or_insert(Default::default());
use AsgBuilderInternalState as IS;
let mut istate = IS::None;
while let Some(ev) = xmlo.next() {
match ev? {
XmloEvent::Package(attrs) => {
match (istate, ev?) {
(IS::None, XmloEvent::Package(attrs)) => {
if first {
state.name = attrs.name;
state.relroot = attrs.relroot;
@ -178,13 +194,15 @@ where
elig = attrs.elig;
}
XmloEvent::SymDeps(sym, deps) => {
for dep_sym in deps {
self.add_dep_lookup(sym, dep_sym);
}
(IS::None | IS::SymDep(_), XmloEvent::SymDepStart(sym)) => {
istate = IS::SymDep(sym);
}
XmloEvent::SymDecl(sym, attrs) => {
(IS::SymDep(sym), XmloEvent::Symbol(dep_sym)) => {
self.add_dep_lookup(sym, dep_sym);
}
(IS::None, XmloEvent::SymDecl(sym, attrs)) => {
if let Some(sym_src) = attrs.src {
found.insert(sym_src);
} else {
@ -218,7 +236,10 @@ where
}
}
XmloEvent::Fragment(sym, text) => {
// Fragments follow SymDeps.
(IS::None | IS::SymDep(_), XmloEvent::Fragment(sym, text)) => {
istate = IS::None;
let frag = self
.lookup(sym)
.ok_or(AsgBuilderError::MissingFragmentIdent(sym))?;
@ -231,7 +252,11 @@ where
// may change in the future, in which case this
// responsibility can be delegated to the linker (to produce
// an `Iterator` that stops at EOH).
XmloEvent::Eoh => break,
(IS::None, XmloEvent::Eoh) => break,
(istate, ev) => {
todo!("unexpected state transition: {istate:?} -> {ev:?}")
}
}
}
@ -393,8 +418,11 @@ mod test {
let sym_to1 = "to1".intern();
let sym_to2 = "to2".intern();
let evs =
vec![Ok(XmloEvent::SymDeps(sym_from, vec![sym_to1, sym_to2]))];
let evs = vec![
Ok(XmloEvent::SymDepStart(sym_from)),
Ok(XmloEvent::Symbol(sym_to1)),
Ok(XmloEvent::Symbol(sym_to2)),
];
let _ = sut
.import_xmlo(evs.into_iter(), SutState::new())

View File

@ -153,6 +153,14 @@ pub enum XmloEvent {
/// slice into an internal buffer.
SymDeps(SymbolId, Vec<SymbolId>),
/// Begin adjacency list for a given symbol and interpret subsequent
/// symbols as edges (dependencies).
SymDepStart(SymbolId),
/// A symbol reference whose interpretation is dependent on the current
/// state.
Symbol(SymbolId),
/// Text (compiled code) fragment for a given symbol.
///
/// This contains the compiler output for a given symbol,

View File

@ -62,6 +62,7 @@ use quick_xml::events::BytesStart;
use quick_xml::events::Event as XmlEvent;
#[cfg(not(test))]
use quick_xml::Reader as XmlReader;
use std::collections::VecDeque;
use std::convert::TryInto;
use std::io::BufRead;
use std::iter::Iterator;
@ -108,6 +109,14 @@ where
/// This is known after processing the root `package` element,
/// provided that it's a proper root node.
pkg_name: Option<SymbolId>,
/// Queue of events already processed,
/// to be returned on [`XmloReader::read_event`].
///
/// This exists as an incremental transition toward producing a
/// streaming API and will eventually be eliminated.
/// It does incur a small performance cost.
event_queue: VecDeque<XmloEvent>,
}
impl<B> XmloReader<B>
@ -128,6 +137,7 @@ where
sub_buffer: Vec::new(),
seen_root: false,
pkg_name: None,
event_queue: VecDeque::new(),
}
}
@ -158,6 +168,14 @@ where
self.buffer.clear();
self.sub_buffer.clear();
// Return queued events first before continuing processing.
// This allows us to begin to transition to a streaming API without
// many structural changes,
// but will eventually go away.
if let Some(event) = self.event_queue.pop_front() {
return Ok(event);
}
let event = self.reader.read_event(&mut self.buffer)?;
// Ensure that the first encountered node is something we expect
@ -196,6 +214,7 @@ where
&ele,
&mut self.reader,
&mut self.sub_buffer,
&mut self.event_queue,
),
b"preproc:fragment" => Self::process_fragment(
@ -475,6 +494,7 @@ where
ele: &'a BytesStart<'a>,
reader: &mut XmlReader<B>,
buffer: &mut Vec<u8>,
event_queue: &mut VecDeque<XmloEvent>,
) -> XmloResult<XmloEvent> {
let name = ele
.attributes()
@ -485,14 +505,14 @@ where
Ok(unsafe { attr.value.intern_utf8_unchecked() })
})?;
let mut deps = Vec::new();
event_queue.push_back(XmloEvent::SymDepStart(name));
loop {
match reader.read_event(buffer)? {
XmlEvent::Empty(symref)
if symref.name() == b"preproc:sym-ref" =>
{
deps.push(
event_queue.push_back(XmloEvent::Symbol(
symref
.attributes()
.with_checks(false)
@ -508,7 +528,7 @@ where
})
},
)?,
);
));
}
// We assume that elements are properly nested, so this must
@ -525,7 +545,7 @@ where
}
}
Ok(XmloEvent::SymDeps(name, deps))
Ok(event_queue.pop_front().unwrap())
}
/// Process `preproc:fragment` element.

View File

@ -227,13 +227,14 @@ xmlo_tests! {
)),
}));
let result = sut.read_event()?;
let result = sut.take(3).collect::<Result<Vec<_>, _>>()?;
assert_eq!(
XmloEvent::SymDeps(
"depsym".intern(),
vec!["dep1".intern(), "dep2".intern()]
),
vec![
XmloEvent::SymDepStart("depsym".intern()),
XmloEvent::Symbol("dep1".intern()),
XmloEvent::Symbol("dep2".intern()),
],
result
);
}