tamer: parse::Parser (lower_while_ok): New method

This introduces a WIP lowering operation, abstracting away quite a bit of
the manual wiring work, which is really important to providing an API that
provides the proper level of abstraction for actually understanding what the
system is doing.

This does not yet have tests associated with it---I had started, but it's a
lot of work and boilerplate for something that is going to
evolve.  Generally, I wouldn't use that as an excuse, but the robust type
definitions in play, combined with the tiny amount of actual logic, provide
a pretty high level of confidence.  It's very difficult to wire these types
together and produce something incorrect without doing something obviously
bad.

Similarly, I'm holding off on proper docs too, though I did write some
information here.

More to come, after I actually get to work on the XmloReader.

On a side note: I'm happy to have made progress on this, since this wiring
is something I've been dreading and wondering about since before the Parser
abstraction even existed.

Note also that this makes parser::feed_toks private again---I don't intend
to support push parsers yet, since they're only needed internally.  Maybe
for error recovery, but I'll wait to decide until it's actually needed.

DEV-10863
main
Mike Gerwitz 2022-03-23 14:25:04 -04:00
parent b4a7591357
commit fbf786086a
3 changed files with 119 additions and 25 deletions

View File

@ -48,11 +48,11 @@ use crate::{
};
use fxhash::FxBuildHasher;
use petgraph_graphml::GraphMl;
use std::error::Error;
use std::fs;
use std::io::Write;
use std::io::{BufReader, BufWriter};
use std::path::{Path, PathBuf};
use std::{error::Error, iter};
type LinkerAsg = DefaultAsg<IdentObject>;
type LinkerAsgBuilderState = AsgBuilderState<FxBuildHasher>;
@ -201,28 +201,20 @@ fn load_xmlo<'a, P: AsRef<Path>, S: Escaper>(
// TODO: This entire block is a WIP and will be incrementally
// abstracted away.
into_iter_while_ok(XmlXirReader::new(file, escaper), |toks| {
into_iter_while_ok(flat::State::<64>::parse(toks), |xirf| {
let mut xmlo = XmloReader::parse(iter::empty());
let foo = xirf.map(|parsed| match parsed {
Parsed::Incomplete => Ok(Parsed::Incomplete),
Parsed::Object(obj) => {
let item: flat::Object = obj;
xmlo.feed_tok(item)
}
});
into_iter_while_ok(foo, |xmlo_out| {
// TODO: Transitionary---we do not want to filter.
depgraph.import_xmlo(
xmlo_out.filter_map(|parsed| match parsed {
Parsed::Incomplete => None,
Parsed::Object(obj) => Some(Ok(obj)),
}),
state,
)
})
})
flat::State::<64>::parse(toks).lower_while_ok::<XmloReader, _>(
|xirf| {
into_iter_while_ok(xirf, |xmlo_out| {
// TODO: Transitionary---we do not want to filter.
depgraph.import_xmlo(
xmlo_out.filter_map(|parsed| match parsed {
Parsed::Incomplete => None,
Parsed::Object(obj) => Some(Ok(obj)),
}),
state,
)
})
},
)
})????
}
};

View File

@ -39,7 +39,7 @@ pub type XmloResult<T> = Result<T, XmloError>;
#[cfg(feature = "wip-xmlo-xir-reader")]
mod new {
//! Re-implementation of `XmloReader` using a [`TokenStream`].
//! Re-implementation of `XmloReader` as a [`ParseState`].
//!
//! This module will be merged into [`super`] once complete;
//! it exists to make feature-flagging less confusing and error-prone.

View File

@ -21,8 +21,10 @@
//!
//! _TODO: Some proper docs and examples!_
use crate::iter::{TripIter, TrippableIterator};
use crate::span::Span;
use std::fmt::Debug;
use std::iter::{self, Empty};
use std::mem::take;
use std::{error::Error, fmt::Display};
@ -262,7 +264,12 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// [`Iterator`] implementation.
/// The pull system also uses this method to provided data to the
/// parser.
pub fn feed_tok(&mut self, tok: S::Token) -> ParsedResult<S> {
///
/// This method is intentionally private,
/// since push parsers are currently supported only internally.
/// The only thing preventing this being public is formalization and a
/// commitment to maintain it.
fn feed_tok(&mut self, tok: S::Token) -> ParsedResult<S> {
// Store the most recently encountered Span for error
// reporting in case we encounter an EOF.
self.last_span = Some(tok.span());
@ -282,6 +289,101 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
Err(e) => Err(e.into()),
}
}
/// Lower the IR produced by this [`Parser`] into another IR by piping
/// the output to a new parser defined by the [`ParseState`] `LS`.
///
/// This parser consumes tokens `S::Token` and produces the IR
/// `S::Output`.
/// If there is some other [`ParseState`] `LS` such that
/// `LS::Token == S::Output`
/// (that is—the output of this parser is the input to another),
/// then this method will wire the two together into a new iterator
/// that produces `LS::Output`.
///
/// Visually, we have,
/// within the provided closure `f`,
/// a [`LowerIter`] that acts as this pipeline:
///
/// ```text
/// (S::Token) -> (S::Output == LS::Token) -> (LS::Output)
/// ```
///
/// The new iterator is a [`LowerIter`],
/// and scoped to the provided closure `f`.
/// The outer [`Result`] of `Self`'s [`ParsedResult`] is stripped by
/// a [`TripIter`] before being provided as input to a new push
/// [`Parser`] utilizing `LS`.
/// A push parser,
/// rather than pulling tokens from a [`TokenStream`],
/// has tokens pushed into it;
/// this parser is created automatically for you.
///
/// _TODO_: There's no way to access the inner parser for error recovery
/// after tripping the [`TripIter`].
/// Consequently,
/// this API (likely the return type) will change.
#[inline]
pub fn lower_while_ok<LS, U>(
&mut self,
f: impl FnOnce(&mut LowerIter<S, I, LS>) -> U,
) -> Result<U, ParseError<S::Token, S::Error>>
where
LS: ParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
{
self.while_ok(|toks| {
// TODO: This parser is not accessible after error recovery!
let lower = LS::parse(iter::empty());
f(&mut LowerIter { lower, toks })
})
}
}
/// An IR lowering operation that pipes the output of one [`Parser`] to the
/// input of another.
///
/// This is produced by [`Parser::lower_while_ok`].
pub struct LowerIter<'a, 'b, S, I, LS>
where
S: ParseState,
I: TokenStream<S::Token>,
LS: ParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
{
/// A push [`Parser`].
lower: Parser<LS, Empty<LS::Token>>,
/// Source tokens from higher-level [`Parser`],
/// with the outer [`Result`] having been stripped by a [`TripIter`].
toks: &'a mut TripIter<
'b,
Parser<S, I>,
Parsed<S::Object>,
ParseError<S::Token, S::Error>,
>,
}
impl<'a, 'b, S, I, LS> Iterator for LowerIter<'a, 'b, S, I, LS>
where
S: ParseState,
I: TokenStream<S::Token>,
LS: ParseState<Token = S::Object>,
<S as ParseState>::Object: Token,
{
type Item = ParsedResult<LS>;
/// Pull a token through the higher-level [`Parser`],
/// push it to the lowering parser,
/// and yield the resulting [`ParseResult`].
#[inline]
fn next(&mut self) -> Option<Self::Item> {
match self.toks.next() {
None => None,
Some(Parsed::Incomplete) => Some(Ok(Parsed::Incomplete)),
Some(Parsed::Object(obj)) => Some(self.lower.feed_tok(obj)),
}
}
}
impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {