tamer: pipeline: Initial concept for declarative pipeline definition
This has been the ultimate goal for the pipeline for some time---the ability to declaratively define the lowering pipeline in a way that is clear, concise, and is correct by definition. The reason that the lowering pipeline required so much boilerplate was because of the robust types involved, which ensures that everything in the pipeline is compatible with one-another---it's not possible to construct a pipeline that will not work. Of course, there is nuance involved in some cases---I didn't want to include the `until` clause, which makes it fail the "obviously correct" criterion, but that can be improved over time. This only abstracts away `load_xmlo` and `parse_package_xml`; next I'll have to evolve the abstraction to support lifetimes for `lower_xmli`'s `AsgTreeToXirf`. That pipeline also ends with a custom sink that really ought to become its own parser, but I don't want to jump down that rabbit hole right now, so we may just support custom sinks for now with the intent of removing it in the future. This has been a long time coming. The ultimate goal is that you should be able to look at the parser pipelines to have a clear, high-level overview of how everything fits together. I'm not generating documentation yet, but that'll help serve as a guide as well. DEV-13162main
parent
f34f2644e9
commit
9c6b00a124
|
@ -145,9 +145,9 @@ fn compile<R: Reporter>(
|
|||
|
||||
// TODO: Determine a good default capacity once we have this populated
|
||||
// and can come up with some heuristics.
|
||||
let air_ctx = parse_package_xml(
|
||||
let (air_ctx,) = parse_package_xml(
|
||||
src,
|
||||
DefaultAsg::with_capacity(1024, 2048).into(),
|
||||
DefaultAsg::with_capacity(1024, 2048),
|
||||
report_err,
|
||||
)?;
|
||||
|
||||
|
|
|
@ -106,8 +106,8 @@ fn load_xmlo<P: AsRef<Path>, S: Escaper>(
|
|||
|
||||
let src = &mut lowerable(XmlXirReader::new(file, escaper, ctx));
|
||||
|
||||
let (mut air_ctx, mut state) =
|
||||
pipeline::load_xmlo::<TameldError, _>(src, air_ctx, state, identity)?;
|
||||
let (mut state, mut air_ctx) =
|
||||
pipeline::load_xmlo::<TameldError, _>(src, state, air_ctx, identity)?;
|
||||
|
||||
let mut dir = path;
|
||||
dir.pop();
|
||||
|
|
|
@ -43,18 +43,14 @@
|
|||
use std::convert::Infallible;
|
||||
|
||||
use crate::{
|
||||
asg::{
|
||||
air::{AirAggregate, AirAggregateCtx},
|
||||
visit::TreeWalkRel,
|
||||
Asg, AsgTreeToXirf,
|
||||
},
|
||||
asg::{air::AirAggregate, visit::TreeWalkRel, Asg, AsgTreeToXirf},
|
||||
diagnose::Diagnostic,
|
||||
iter::TrippableIterator,
|
||||
nir::{InterpolateNir, NirToAir, TplShortDesugar, XirfToNir},
|
||||
obj::xmlo::{XmloAirContext, XmloReader, XmloToAir, XmloToken},
|
||||
obj::xmlo::{XmloReader, XmloToAir, XmloToken},
|
||||
parse::{
|
||||
terminal, FinalizeError, FromParseError, Lower, LowerSource,
|
||||
ParseError, Parsed, ParsedObject, UnknownToken,
|
||||
ParseError, ParseState, Parsed, ParsedObject, UnknownToken,
|
||||
},
|
||||
xir::{
|
||||
autoclose::XirfAutoClose,
|
||||
|
@ -64,97 +60,199 @@ use crate::{
|
|||
},
|
||||
};
|
||||
|
||||
/// Load an `xmlo` file represented by `src` into the graph held
|
||||
/// by `air_ctx`.
|
||||
/// Declaratively define a lowering pipeline.
|
||||
///
|
||||
/// Loading an object file will result in opaque objects being added to the
|
||||
/// graph.
|
||||
/// A lowering pipeline stitches together parsers such that the objects of
|
||||
/// one become the tokens of the next;
|
||||
/// see the [module-level documentation](self) for more information.
|
||||
///
|
||||
/// TODO: To re-use this in `tamec` we want to be able to ignore fragments.
|
||||
/// Composing those types results in a significant amount of boilerplate.
|
||||
/// This macro is responsible for generating a function that,
|
||||
/// given a source and optional [`ParseState`] contexts,
|
||||
/// will carry out the lowering operation and invoke the provided sink on
|
||||
/// each object that comes out the end.
|
||||
///
|
||||
/// TODO: More documentation once this has been further cleaned up.
|
||||
pub fn load_xmlo<ER: Diagnostic, EU: Diagnostic>(
|
||||
src: impl LowerSource<UnknownToken, XirToken, XirError>,
|
||||
air_ctx: AirAggregateCtx,
|
||||
xmlo_ctx: XmloAirContext,
|
||||
report: impl FnMut(Result<(), ER>) -> Result<(), EU>,
|
||||
) -> Result<(AirAggregateCtx, XmloAirContext), EU>
|
||||
where
|
||||
ER: From<ParseError<UnknownToken, XirError>>
|
||||
+ FromParseError<PartialXirToXirf<4, Text>>
|
||||
+ FromParseError<XmloReader>
|
||||
+ FromParseError<XmloToAir>
|
||||
+ FromParseError<AirAggregate>,
|
||||
EU: From<FinalizeError>,
|
||||
{
|
||||
// TODO: This entire block is a WIP and will be incrementally
|
||||
// abstracted away.
|
||||
#[rustfmt::skip] // better visualize the structure despite the line length
|
||||
let (((), air_ctx), xmlo_ctx) = Lower::<
|
||||
ParsedObject<UnknownToken, XirToken, XirError>,
|
||||
PartialXirToXirf<4, Text>,
|
||||
ER,
|
||||
>::lower::<_, EU>(&mut src.map(|result| result.map_err(ER::from)), |toks| {
|
||||
Lower::<PartialXirToXirf<4, Text>, XmloReader, _>::lower(toks, |xmlo| {
|
||||
let mut iter = xmlo.scan(false, |st, rtok| match st {
|
||||
/// TODO: This is not yet finalized;
|
||||
/// more documentation is needed once the approach is solid.
|
||||
macro_rules! lower_pipeline {
|
||||
($(
|
||||
$(#[$meta:meta])*
|
||||
$vis:vis $fn:ident($srcobj:ty, $srcerr:ty)
|
||||
$(|> $lower:ty $([$ctx:ident])? $(, until ($until:pat))?)*;
|
||||
)*) => {$(
|
||||
$(#[$meta])*
|
||||
$vis fn $fn<ER: Diagnostic, EU: Diagnostic>(
|
||||
src: impl LowerSource<UnknownToken, $srcobj, $srcerr>,
|
||||
$(
|
||||
// Each parser may optionally receive context from an
|
||||
// earlier run.
|
||||
$($ctx: impl Into<<$lower as ParseState>::PubContext>,)?
|
||||
)*
|
||||
sink: impl FnMut(Result<(), ER>) -> Result<(), EU>,
|
||||
) -> Result<
|
||||
(
|
||||
$(
|
||||
// Any context that is passed in is also returned so
|
||||
// that individual pipelines can continue to build
|
||||
// upon state from previous pipelines.
|
||||
$( lower_pipeline!(@ret_ty $lower, $ctx), )?
|
||||
)*
|
||||
),
|
||||
EU
|
||||
>
|
||||
where
|
||||
// Recoverable errors (ER) are errors that could potentially be
|
||||
// handled by the sink.
|
||||
// Parsers are always expected to perform error recovery to the
|
||||
// best of their ability.
|
||||
// We need to support widening into this error type from every
|
||||
// individual ParseState in this pipeline,
|
||||
// plus the source.
|
||||
ER: From<ParseError<UnknownToken, $srcerr>>
|
||||
$(
|
||||
+ From<ParseError<
|
||||
<$lower as ParseState>::Token,
|
||||
<$lower as ParseState>::Error,
|
||||
>>
|
||||
)*,
|
||||
|
||||
// Unrecoverable errors (EU) are errors that the sink chooses
|
||||
// not to handle.
|
||||
// It is constructed explicitly from the sink,
|
||||
// so the only type conversion that we are concerned about
|
||||
// ourselves is producing it from a finalization error,
|
||||
// which is _not_ an error that parsers are expected to
|
||||
// recover from.
|
||||
EU: From<FinalizeError>,
|
||||
{
|
||||
let lower_pipeline!(@ret_pat $($($ctx)?)*) = lower_pipeline!(
|
||||
@body_head(src, $srcobj, $srcerr, sink)
|
||||
$((|> $lower $([$ctx])? $(, until ($until))?))*
|
||||
)?;
|
||||
|
||||
Ok(($(
|
||||
$($ctx,)?
|
||||
)*))
|
||||
}
|
||||
)*};
|
||||
|
||||
(@ret_ty $lower:ty, $_ctx:ident) => {
|
||||
<$lower as ParseState>::PubContext
|
||||
};
|
||||
|
||||
// Because of how the lowering pipeline composes,
|
||||
// contexts are nested with a terminal unit at the left,
|
||||
// as in `(((), B), A)` with `A` being a parser that appears earlier
|
||||
// in the pipeline than `B`.
|
||||
(@ret_pat $ctx:ident $($rest:tt)*) => {
|
||||
(lower_pipeline!(@ret_pat $($rest)*), $ctx)
|
||||
};
|
||||
(@ret_pat) => {
|
||||
()
|
||||
};
|
||||
|
||||
// First lowering operation from the source.
|
||||
//
|
||||
// This doesn't support context or `until`;
|
||||
// it can be added if ever it is needed.
|
||||
(
|
||||
@body_head($src:ident, $srcobj:ty, $srcerr:ty, $sink:ident)
|
||||
(|> $head:ty) $($rest:tt)*
|
||||
) => {
|
||||
Lower::<
|
||||
ParsedObject<UnknownToken, $srcobj, $srcerr>,
|
||||
$head,
|
||||
ER,
|
||||
>::lower::<_, EU>(&mut $src.map(|result| result.map_err(ER::from)), |next| {
|
||||
lower_pipeline!(
|
||||
@body_inner(next, $head, $sink)
|
||||
$($rest)*
|
||||
)
|
||||
})
|
||||
};
|
||||
|
||||
// Lower without context
|
||||
// (with the default context for the parser).
|
||||
//
|
||||
// This doesn't support `until`;
|
||||
// it can be added if needed.
|
||||
(
|
||||
@body_inner($next:ident, $lower_prev:ty, $sink:ident)
|
||||
(|> $lower:ty) $($rest:tt)*
|
||||
) => {
|
||||
Lower::<$lower_prev, $lower, _>::lower($next, |next| {
|
||||
lower_pipeline!(
|
||||
@body_inner(next, $lower, $sink)
|
||||
$($rest)*
|
||||
)
|
||||
})
|
||||
};
|
||||
|
||||
// Lower with a context provided by the caller,
|
||||
// optionally with an `until` clause that stops at (and includes) a
|
||||
// matching object from the previous parser.
|
||||
(
|
||||
@body_inner($next:ident, $lower_prev:ty, $sink:ident)
|
||||
(|> $lower:ty [$ctx:ident] $(, until ($until:pat))?) $($rest:tt)*
|
||||
) => {{
|
||||
// Shadow $next with a new iterator if `until` clause was provided.
|
||||
// `take_while` unfortunately doesn't include the first object that
|
||||
// does not match,
|
||||
// thus the more verbose `scan` which includes the first
|
||||
// non-match and then trips the iterator.
|
||||
$(
|
||||
let $next = &mut $next.scan(false, |st, rtok| match st {
|
||||
true => None,
|
||||
false => {
|
||||
*st =
|
||||
matches!(rtok, Ok(Parsed::Object(XmloToken::Eoh(..))));
|
||||
matches!(rtok, Ok(Parsed::Object($until)));
|
||||
Some(rtok)
|
||||
}
|
||||
});
|
||||
)?
|
||||
|
||||
Lower::<XmloReader, XmloToAir, _>::lower_with_context(&mut iter, xmlo_ctx, |air| {
|
||||
Lower::<XmloToAir, AirAggregate, _>::lower_with_context(air, air_ctx, |end| {
|
||||
terminal::<AirAggregate, _>(end).try_for_each(report)
|
||||
})
|
||||
})
|
||||
Lower::<$lower_prev, $lower, _>::lower_with_context($next, $ctx, |next| {
|
||||
lower_pipeline!(
|
||||
@body_inner(next, $lower, $sink)
|
||||
$($rest)*
|
||||
)
|
||||
})
|
||||
})?;
|
||||
}};
|
||||
|
||||
Ok((air_ctx, xmlo_ctx))
|
||||
// Terminal sink is applied after there are no more lowering operations
|
||||
// remaining in the pipeline definition.
|
||||
(@body_inner($next:ident, $lower_prev:ty, $sink:ident)) => {
|
||||
terminal::<$lower_prev, _>($next).try_for_each($sink)
|
||||
};
|
||||
}
|
||||
|
||||
/// Parse a source package into the [ASG](crate::asg) using TAME's XML
|
||||
/// source language.
|
||||
///
|
||||
/// TODO: More documentation once this has been further cleaned up.
|
||||
pub fn parse_package_xml<ER: Diagnostic, EU: Diagnostic>(
|
||||
src: impl LowerSource<UnknownToken, XirToken, XirError>,
|
||||
air_ctx: AirAggregateCtx,
|
||||
report: impl FnMut(Result<(), ER>) -> Result<(), EU>,
|
||||
) -> Result<AirAggregateCtx, EU>
|
||||
where
|
||||
ER: From<ParseError<UnknownToken, XirError>>
|
||||
+ FromParseError<XirToXirf<64, Text>>
|
||||
+ FromParseError<XirfToNir>
|
||||
+ FromParseError<TplShortDesugar>
|
||||
+ FromParseError<InterpolateNir>
|
||||
+ FromParseError<NirToAir>
|
||||
+ FromParseError<AirAggregate>,
|
||||
EU: From<FinalizeError>,
|
||||
{
|
||||
#[rustfmt::skip] // better visualize the structure despite the line length
|
||||
let ((), air_ctx) = Lower::<
|
||||
ParsedObject<UnknownToken, XirToken, XirError>,
|
||||
XirToXirf<64, RefinedText>,
|
||||
ER,
|
||||
>::lower::<_, EU>(&mut src.map(|result| result.map_err(ER::from)), |toks| {
|
||||
Lower::<XirToXirf<64, RefinedText>, XirfToNir, _>::lower(toks, |nir| {
|
||||
Lower::<XirfToNir, TplShortDesugar, _>::lower(nir, |nir| {
|
||||
Lower::<TplShortDesugar, InterpolateNir, _>::lower(nir, |nir| {
|
||||
Lower::<InterpolateNir, NirToAir, _>::lower(nir, |air| {
|
||||
Lower::<NirToAir, AirAggregate, _>::lower_with_context(air, air_ctx, |end| {
|
||||
terminal::<AirAggregate, _>(end).try_for_each(report)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
})?;
|
||||
lower_pipeline! {
|
||||
/// Load an `xmlo` file represented by `src` into the graph held
|
||||
/// by `air_ctx`.
|
||||
///
|
||||
/// Loading an object file will result in opaque objects being added to the
|
||||
/// graph.
|
||||
///
|
||||
/// TODO: To re-use this in `tamec` we want to be able to ignore fragments.
|
||||
///
|
||||
/// TODO: More documentation once this has been further cleaned up.
|
||||
pub load_xmlo(XirToken, XirError)
|
||||
|> PartialXirToXirf<4, Text>
|
||||
|> XmloReader
|
||||
|> XmloToAir[xmlo_ctx], until (XmloToken::Eoh(..))
|
||||
|> AirAggregate[air_ctx];
|
||||
|
||||
Ok(air_ctx)
|
||||
/// Parse a source package into the [ASG](crate::asg) using TAME's XML
|
||||
/// source language.
|
||||
///
|
||||
/// TODO: More documentation once this has been further cleaned up.
|
||||
pub parse_package_xml(XirToken, XirError)
|
||||
|> XirToXirf<64, RefinedText>
|
||||
|> XirfToNir
|
||||
|> TplShortDesugar
|
||||
|> InterpolateNir
|
||||
|> NirToAir
|
||||
|> AirAggregate[air_ctx];
|
||||
}
|
||||
|
||||
/// Lower an [`Asg`]-derived token stream into an `xmli` file.
|
||||
|
|
Loading…
Reference in New Issue