From 61d556c89e587628f83f6acdf0db8f12788bf071 Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Mon, 12 Jun 2023 16:42:05 -0400 Subject: [PATCH] tamer: pipeline: Generate recoverable sum error types This was a significant undertaking, with a few thrown-out approaches. The documentation describes what approach was taken, but I'd also like to provide some insight into the approaches that I rejected for various reasons, or because they simply didn't work. The problem that this commit tries to solve is encapsulation of error types. Prior to the introduction of the lowering pipeline macro `lower_pipeline!`, all pipelines were written by hand using `Lower` and specifying the applicable types. This included creating sum types to accommodate each of the errors so that `Lower` could widen automatically. The introduction of the `lower_pipeline!` macro resolved the boilerplate and type complexity concerns for the parsers by allowing the pipeline to be concisely declared. However, it still accepted an error sum type `ER` for recoverable errors, which meant that we had to break a level of encapsulation, peering into the pipeline to know both what parsers were in play and what their error types were. These error sum types were also the source of a lot of tedious boilerplate that made adding new parsers to the pipeline unnecessarily unpleasant; the purpose of the macro is to make composition both easy and clear, and error types were undermining it. Another benefit of sum types per pipeline is that callers need only aggregate those pipeline types, if they care about them, rather than every error type used as a component of the pipeline. So, this commit generates the error types. Doing so was non-trivial. Associated Types and Lifetimes ------------------------------ Error types are associated with their `ParseState` as `ParseState::Error`. As described in this commit, TAMER's approach to errors is that they never contain non-static lifetimes; interning and copying are used to that effect. And, indeed, no errors in TAMER have lifetimes. But, some `ParseState`s may. In this case, `AsgTreeToXirf`: ``` impl<'a> ParseState for AsgTreeToXirf<'a> { // [...] type Error = AsgTreeToXirfError; // [...] } ``` Even though `AsgTreeToXirfError` does not have a lifetime, the `ParseState` it is associated with _does_`. So to reference that type, we must use ` as ParseState>::Error`. So if we have a sum type: ``` enum Sum<'a> { // ^^ oh no! vv AsgTreeToXirfError( as ParseState>::Error), } ``` There's no way to elide or make anonymous that lifetime, since it's not used, at the time of writing. `for<'a>` also cannot be used in this context. The solution in this commit is to use a macro (`lower_error_sum`) to rewrite lifetimes: to `'static`: ``` enum Sum { AsgTreeToXirfError( as ParseState>::Error), } ``` The `Error` associated type will resolve to `AsgTreeToXirfError` all the same either way, since it has no lifetimes of its own, letalone any referencing trait bounds. That's not to say that we _couldn't_ support lifetimes as long as they're attached to context, but we have no need to at the moment, and it adds significant cognitive overhead. Further, the diagnostic system doesn't deal in lifetimes, and so would need reworking as well. Not worth it. An alternative solution to this that was rejected is an explicitly `Error` type in the macro application: ``` // in the lowering pipeline |> AsgTreeToXirf<'a> { // lifetime type Error = AsgTreeToXirfError; // no lifetime } ``` But this requires peeling back the `ParseState` to see what its error is and _duplicate_ it here. Silly, and it breaks encapsulation, since the lowering pipeline is supposed to return its own error type. Yet another option considered was to standardize a submodule convention whereby each `ParseState` would have a module exporting `Error`, among other types. This would decouple it from the parent type. However, we still have the duplication between that and an associated type. Further, there's no way to enforce this convention (effectively a module API)---the macro would just fail in obscure ways, at least with `macro_rules!`. It would have been an ugly kluge. Overlapping Error Types ----------------------- Another concern with generating the sum type, resolved in a previous commit, was overlapping error types, which prohibited `impl From for ER` generation. The problem with that a number of `ParseState`s used `Infallible` as their `Error` type. This was resolved in a previous commit by creating Infallible-like newtypes (variantless enums). This was not the only option. `From` fits naturally into how TAMER handles sum types, and fits naturally into `Lower`'s `WidenedError`. The alternative is generating explicit `map_err`s in `lower_pipeline!`. This would have allowed for overlapping error types because the _caller_ knows what the correct target variant is in the sum type. The problem with an explicit `map_err` is that it places more power in `lower_pipeline!`, which is _supposed_ to be a macro that simply removes boilerplate; it's not supposed to increase expressiveness. It's also not fun dealing with complexity in macros; they're much more confusing that normal code. With the decided-upon approach (newtypes + `From`), hand-written `Lower` pipelines are just as expressive---just more verbose---as `lower_pipeline!`, and handles widening for you. Rust's type system will also handle the complexity of widening automatically for us without us having to reason about it in the macro. This is not always desirable, but in this case, I feel that it is. --- tamer/src/bin/tamec.rs | 182 +++++-------------------------- tamer/src/ld/poc.rs | 86 +++------------ tamer/src/parse.rs | 2 +- tamer/src/parse/lower.rs | 27 ++++- tamer/src/pipeline.rs | 69 ++++++++++-- tamer/src/pipeline/macro.rs | 206 ++++++++++++++++++++++++++++-------- 6 files changed, 288 insertions(+), 284 deletions(-) diff --git a/tamer/src/bin/tamec.rs b/tamer/src/bin/tamec.rs index cbbf7196..5801d25c 100644 --- a/tamer/src/bin/tamec.rs +++ b/tamer/src/bin/tamec.rs @@ -39,24 +39,13 @@ use std::{ path::Path, }; use tamer::{ - asg::{ - air::Air, visit::TreeWalkRel, AsgError, AsgTreeToXirfError, DefaultAsg, - }, + asg::DefaultAsg, diagnose::{ AnnotatedSpan, Diagnostic, FsSpanResolver, Reporter, VisualReporter, }, - nir::{ - InterpError, Nir, NirToAirError, TplShortDesugarError, XirfToNirError, - }, - parse::{lowerable, FinalizeError, ParseError, Token, UnknownToken}, - pipeline::parse_package_xml, - xir::{ - self, - autoclose::XirfAutoCloseError, - flat::{RefinedText, Text, XirToXirfError, XirfToXirError, XirfToken}, - reader::XmlXirReader, - DefaultEscaper, Token as XirToken, - }, + parse::{lowerable, FinalizeError, ParseError, Token}, + pipeline::{parse_package_xml, LowerXmliError, ParsePackageXmlError}, + xir::{self, reader::XmlXirReader, DefaultEscaper}, }; /// Types of commands @@ -93,7 +82,7 @@ fn src_reader<'a>( fn copy_xml_to<'e, W: io::Write + 'e>( mut fout: W, escaper: &'e DefaultEscaper, -) -> impl FnMut(&Result) + 'e { +) -> impl FnMut(&Result) + 'e { use tamer::xir::writer::XmlWriter; let mut xmlwriter = Default::default(); @@ -124,7 +113,7 @@ fn compile( let mut ebuf = String::new(); - let report_err = |result: Result<(), RecoverableError>| { + let report_err = |result: Result<(), ParsePackageXmlError<_>>| { result.or_else(|e| { // See below note about buffering. ebuf.clear(); @@ -205,11 +194,13 @@ fn derive_xmli( // Write failures should immediately bail out; // we can't skip writing portions of the file and // just keep going! - result.and_then(|tok| { - tok.write(&mut fout, st, escaper) - .map(|newst| st = newst) - .map_err(Into::::into) - }) + result + .map_err(Into::::into) + .and_then(|tok| { + tok.write(&mut fout, st, escaper) + .map(|newst| st = newst) + .map_err(Into::::into) + }) })?; Ok(()) @@ -312,17 +303,20 @@ fn parse_options(opts: Options, args: Vec) -> Result { /// /// These are errors that will result in aborting execution and exiting with /// a non-zero status. -/// Contrast this with [`RecoverableError`], +/// Contrast this with recoverable errors in [`tamer::pipeline`], /// which is reported real-time to the user and _does not_ cause the /// program to abort until the end of the compilation unit. +/// +/// Note that an recoverable error, +/// under a normal compilation strategy, +/// will result in an [`UnrecoverableError::ErrorsDuringLowering`] at the +/// end of the compilation unit. #[derive(Debug)] pub enum UnrecoverableError { Io(io::Error), Fmt(fmt::Error), XirWriterError(xir::writer::Error), - AsgTreeToXirfError(ParseError), - XirfAutoCloseError(ParseError, XirfAutoCloseError>), - XirfToXirError(ParseError, XirfToXirError>), + LowerXmliError(LowerXmliError), ErrorsDuringLowering(ErrorCount), FinalizeError(FinalizeError), } @@ -333,38 +327,6 @@ pub enum UnrecoverableError { /// have in your code. type ErrorCount = usize; -/// An error that occurs during the lowering pipeline that may be recovered -/// from to continue parsing and collection of additional errors. -/// -/// This represents the aggregation of all possible errors that can occur -/// during lowering. -/// This cannot include panics, -/// but efforts have been made to reduce panics to situations that -/// represent the equivalent of assertions. -/// -/// These errors are distinct from [`UnrecoverableError`], -/// which represents the errors that could be returned to the toplevel -/// `main`, -/// because these errors are intended to be reported to the user _and then -/// recovered from_ so that compilation may continue and more errors may -/// be collected; -/// nobody wants a compiler that reports one error at a time. -/// -/// Note that an recoverable error, -/// under a normal compilation strategy, -/// will result in an [`UnrecoverableError::ErrorsDuringLowering`] at the -/// end of the compilation unit. -#[derive(Debug)] -pub enum RecoverableError { - XirParseError(ParseError), - XirfParseError(ParseError), - NirParseError(ParseError, XirfToNirError>), - TplShortDesugarError(ParseError), - InterpError(ParseError), - NirToAirError(ParseError), - AirAggregateError(ParseError), -} - impl From for UnrecoverableError { fn from(e: io::Error) -> Self { Self::Io(e) @@ -383,23 +345,9 @@ impl From for UnrecoverableError { } } -impl From> for UnrecoverableError { - fn from(e: ParseError) -> Self { - Self::AsgTreeToXirfError(e) - } -} - -impl From, XirfToXirError>> for UnrecoverableError { - fn from(e: ParseError, XirfToXirError>) -> Self { - Self::XirfToXirError(e) - } -} - -impl From, XirfAutoCloseError>> - for UnrecoverableError -{ - fn from(e: ParseError, XirfAutoCloseError>) -> Self { - Self::XirfAutoCloseError(e) +impl From> for UnrecoverableError { + fn from(e: LowerXmliError) -> Self { + Self::LowerXmliError(e) } } @@ -417,50 +365,6 @@ impl From> for UnrecoverableError { } } -impl From> for RecoverableError { - fn from(e: ParseError) -> Self { - Self::XirParseError(e) - } -} - -impl From> for RecoverableError { - fn from(e: ParseError) -> Self { - Self::XirfParseError(e) - } -} - -impl From, XirfToNirError>> - for RecoverableError -{ - fn from(e: ParseError, XirfToNirError>) -> Self { - Self::NirParseError(e) - } -} - -impl From> for RecoverableError { - fn from(e: ParseError) -> Self { - Self::TplShortDesugarError(e) - } -} - -impl From> for RecoverableError { - fn from(e: ParseError) -> Self { - Self::InterpError(e) - } -} - -impl From> for RecoverableError { - fn from(e: ParseError) -> Self { - Self::NirToAirError(e) - } -} - -impl From> for RecoverableError { - fn from(e: ParseError) -> Self { - Self::AirAggregateError(e) - } -} - impl Display for UnrecoverableError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use UnrecoverableError::*; @@ -468,10 +372,8 @@ impl Display for UnrecoverableError { match self { Io(e) => Display::fmt(e, f), Fmt(e) => Display::fmt(e, f), + LowerXmliError(e) => Display::fmt(e, f), XirWriterError(e) => Display::fmt(e, f), - AsgTreeToXirfError(e) => Display::fmt(e, f), - XirfToXirError(e) => Display::fmt(e, f), - XirfAutoCloseError(e) => Display::fmt(e, f), FinalizeError(e) => Display::fmt(e, f), // TODO: Use formatter for dynamic "error(s)" @@ -482,22 +384,6 @@ impl Display for UnrecoverableError { } } -impl Display for RecoverableError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use RecoverableError::*; - - match self { - XirParseError(e) => Display::fmt(e, f), - XirfParseError(e) => Display::fmt(e, f), - NirParseError(e) => Display::fmt(e, f), - TplShortDesugarError(e) => Display::fmt(e, f), - InterpError(e) => Display::fmt(e, f), - NirToAirError(e) => Display::fmt(e, f), - AirAggregateError(e) => Display::fmt(e, f), - } - } -} - impl Error for UnrecoverableError {} impl Diagnostic for UnrecoverableError { @@ -505,9 +391,7 @@ impl Diagnostic for UnrecoverableError { use UnrecoverableError::*; match self { - AsgTreeToXirfError(e) => e.describe(), - XirfToXirError(e) => e.describe(), - XirfAutoCloseError(e) => e.describe(), + LowerXmliError(e) => e.describe(), FinalizeError(e) => e.describe(), // Fall back to `Display` @@ -518,22 +402,6 @@ impl Diagnostic for UnrecoverableError { } } -impl Diagnostic for RecoverableError { - fn describe(&self) -> Vec { - use RecoverableError::*; - - match self { - XirParseError(e) => e.describe(), - XirfParseError(e) => e.describe(), - NirParseError(e) => e.describe(), - TplShortDesugarError(e) => e.describe(), - InterpError(e) => e.describe(), - NirToAirError(e) => e.describe(), - AirAggregateError(e) => e.describe(), - } - } -} - #[cfg(test)] mod test { use super::*; diff --git a/tamer/src/ld/poc.rs b/tamer/src/ld/poc.rs index 4da7b896..edcaf5bc 100644 --- a/tamer/src/ld/poc.rs +++ b/tamer/src/ld/poc.rs @@ -26,30 +26,25 @@ use super::xmle::{ XmleSections, }; use crate::{ - asg::{ - air::{Air, AirAggregateCtx}, - AsgError, DefaultAsg, - }, + asg::{air::AirAggregateCtx, DefaultAsg}, diagnose::{AnnotatedSpan, Diagnostic}, fs::{ Filesystem, FsCanonicalizer, PathFile, VisitOnceFile, VisitOnceFilesystem, }, ld::xmle::Sections, - obj::xmlo::{XmloAirContext, XmloAirError, XmloError, XmloToken}, - parse::{lowerable, FinalizeError, ParseError, UnknownToken}, - pipeline, + obj::xmlo::XmloAirContext, + parse::{lowerable, FinalizeError}, + pipeline::{self, LoadXmloError}, sym::{GlobalSymbolResolve, SymbolId}, xir::{ - flat::{Text, XirToXirfError, XirfToken}, reader::XmlXirReader, writer::{Error as XirWriterError, XmlWriter}, - DefaultEscaper, Error as XirError, Escaper, Token as XirToken, + DefaultEscaper, Error as XirError, Escaper, }, }; use fxhash::FxBuildHasher; use std::{ - convert::identity, error::Error, fmt::{self, Display}, fs, @@ -107,9 +102,9 @@ fn load_xmlo, S: Escaper>( let src = &mut lowerable(XmlXirReader::new(file, escaper, ctx)); let (mut state, mut air_ctx) = - pipeline::load_xmlo::<_, TameldError, _, _, _>(state, air_ctx)( - src, identity, - )?; + pipeline::load_xmlo(state, air_ctx)(src, |result| { + result.map_err(TameldError::from) + })?; let mut dir = path; dir.pop(); @@ -160,11 +155,7 @@ fn output_xmle<'a, X: XmleSections<'a>, S: Escaper>( pub enum TameldError { Io(NeqIoError), SortError(SortError), - XirParseError(ParseError), - XirfParseError(ParseError), - XmloParseError(ParseError, XmloError>), - XmloLowerError(ParseError), - AirLowerError(ParseError), + LoadXmloError(LoadXmloError), XirWriterError(XirWriterError), FinalizeError(FinalizeError), Fmt(fmt::Error), @@ -207,33 +198,9 @@ impl From for TameldError { } } -impl From> for TameldError { - fn from(e: ParseError) -> Self { - Self::XirParseError(e) - } -} - -impl From, XmloError>> for TameldError { - fn from(e: ParseError, XmloError>) -> Self { - Self::XmloParseError(e) - } -} - -impl From> for TameldError { - fn from(e: ParseError) -> Self { - Self::XirfParseError(e) - } -} - -impl From> for TameldError { - fn from(e: ParseError) -> Self { - Self::XmloLowerError(e) - } -} - -impl From> for TameldError { - fn from(e: ParseError) -> Self { - Self::AirLowerError(e) +impl From> for TameldError { + fn from(e: LoadXmloError) -> Self { + Self::LoadXmloError(e) } } @@ -260,11 +227,7 @@ impl Display for TameldError { match self { Self::Io(e) => Display::fmt(e, f), Self::SortError(e) => Display::fmt(e, f), - Self::XirParseError(e) => Display::fmt(e, f), - Self::XirfParseError(e) => Display::fmt(e, f), - Self::XmloParseError(e) => Display::fmt(e, f), - Self::XmloLowerError(e) => Display::fmt(e, f), - Self::AirLowerError(e) => Display::fmt(e, f), + Self::LoadXmloError(e) => Display::fmt(e, f), Self::XirWriterError(e) => Display::fmt(e, f), Self::FinalizeError(e) => Display::fmt(e, f), Self::Fmt(e) => Display::fmt(e, f), @@ -272,31 +235,12 @@ impl Display for TameldError { } } -impl Error for TameldError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - Self::Io(e) => Some(e), - Self::SortError(e) => Some(e), - Self::XirParseError(e) => Some(e), - Self::XirfParseError(e) => Some(e), - Self::XmloParseError(e) => Some(e), - Self::XmloLowerError(e) => Some(e), - Self::AirLowerError(e) => Some(e), - Self::XirWriterError(e) => Some(e), - Self::FinalizeError(e) => Some(e), - Self::Fmt(e) => Some(e), - } - } -} +impl Error for TameldError {} impl Diagnostic for TameldError { fn describe(&self) -> Vec { match self { - Self::XirParseError(e) => e.describe(), - Self::XirfParseError(e) => e.describe(), - Self::XmloParseError(e) => e.describe(), - Self::XmloLowerError(e) => e.describe(), - Self::AirLowerError(e) => e.describe(), + Self::LoadXmloError(e) => e.describe(), Self::FinalizeError(e) => e.describe(), Self::SortError(e) => e.describe(), diff --git a/tamer/src/parse.rs b/tamer/src/parse.rs index 4139bc45..07202025 100644 --- a/tamer/src/parse.rs +++ b/tamer/src/parse.rs @@ -32,7 +32,7 @@ pub mod util; pub use error::{FinalizeError, ParseError}; pub use lower::{ lowerable, terminal, FromParseError, Lower, LowerIter, LowerSource, - ParsedObject, + ParseStateError, ParsedObject, }; pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser}; pub use state::{ diff --git a/tamer/src/parse/lower.rs b/tamer/src/parse/lower.rs index 56d76eb3..2dc6c28f 100644 --- a/tamer/src/parse/lower.rs +++ b/tamer/src/parse/lower.rs @@ -221,6 +221,16 @@ where } } +/// Short-hand [`ParseError`] with types derived from the provided +/// [`ParseState`] `S`. +/// +/// The reason that [`ParseError`] does not accept [`ParseState`] is because +/// a [`ParseState`] may carry a lot of additional type baggage---​ +/// including lifetimes and other generics---​ +/// that are irrelevant to the error type. +pub type ParseStateError = + ParseError<::Token, ::Error>; + /// A [`Diagnostic`] error type common to both `S` and `LS`. /// /// This error type must be able to accommodate error variants from all @@ -235,9 +245,17 @@ where /// which may then decide what to do /// (e.g. report errors and permit recovery, /// or terminate at the first sign of trouble). -pub trait WidenedError = Diagnostic - + From::Token, ::Error>> - + From::Token, ::Error>>; +/// +/// Note that the [`From`] trait bound utilizing `S` is purely a development +/// aid to help guide the user (developer) in deriving the necessary +/// types, +/// since lowering pipelines are deeply complex with all the types +/// involved. +/// It can be safely removed in the future, +/// at least at the time of writing, +/// and have no effect on compilation. +pub trait WidenedError = + Diagnostic + From> + From>; /// Convenience trait for converting [`From`] a [`ParseError`] for the /// provided [`ParseState`] `S`. @@ -246,8 +264,7 @@ pub trait WidenedError = Diagnostic /// that is almost certainly already utilized, /// rather than having to either import more types or use the verbose /// associated type. -pub trait FromParseError = - From::Token, ::Error>>; +pub trait FromParseError = From>; /// A [`ParsedResult`](super::ParsedResult) with a [`WidenedError`]. pub type WidenedParsedResult = diff --git a/tamer/src/pipeline.rs b/tamer/src/pipeline.rs index 75f23528..151d7e18 100644 --- a/tamer/src/pipeline.rs +++ b/tamer/src/pipeline.rs @@ -42,15 +42,70 @@ //! //! Error Widening //! ============== +//! Every lowering pipeline will have an associated error sum type generated +//! for it; +//! this is necessary to maintain an appropriate level of encapsulation +//! and keep implementation details away from the caller. +//! All of the individual errors types is otherwise significant source of +//! complexity. +//! +//! Since all [`ParseState`]s in the lowering pipeline are expected to +//! support error recovery, +//! this generated error sum type represents a _recoverable_ error. +//! It is up to the sink to deermine whether the error should be promoted +//! into an unrecoverable error `EU`, +//! which is the error type yielded by the lowering operation. +//! Error reporting and recovery should be utilized whenever it makes sense +//! to present the user with as many errors as possible rather than +//! aborting the process immediately, +//! which would otherwise force the user to correct errors one at a +//! time. +//! +//! [`ParseState`] Requirements +//! --------------------------- //! Each [`ParseState`] in the pipeline is expected to have its own unique //! error type, //! utilizing newtypes if necessary; //! this ensures that errors are able to be uniquely paired with each //! [`ParseState`] that produced it without having to perform an //! explicit mapping at the call site. -//! To facilitate that automatic mapping/aggregation, -//! this uniqueness property also allows for generation of [`From`] -//! implementations that will not overlap. +//! This uniqueness property allows for generation of [`From`] +//! implementations that will not overlap, +//! and remains compatible with the API of [`Lower`]. +//! +//! [`ParseState::Error`] Lifetime Requirements and Workarounds +//! ----------------------------------------------------------- +//! Error types in TAMER _never_ have lifetime bounds; +//! this is necessary to allow error types to be propapgated all the way +//! up the stack regardless of dependencies.[^lifetime-alt] +//! +//! [^lifetime-alt]: Rather than utilizing references with lifetimes, +//! TAMER error types may hold symbols representing interned values, +//! or may instead [`Copy`] data that has no interner. +//! +//! However, +//! [`ParseState::Error`] is an associated type on [`ParseState`], +//! which _may_ have lifetimes.[^parse-state-lifetime-ex] +//! At the time of writing, +//! even though the associated error type does not utilize the lifetime +//! bounds of the [`ParseState`], +//! Rust still requires some lifetime specification and will not elide +//! it or allow for anonymous lifetimes. +//! +//! [^parse-state-lifetime-ex]: One example of a [`ParseState`] with +//! an associated lifetime is [`AsgTreeToXirf`]. +//! +//! We want to be able to derive error types from the provided +//! [`ParseState`]s along so that the caller does not have to peel back +//! layers of abstraction in order to determine how the error type ought +//! to be specified. +//! To handle this, +//! the `lower_pipeline!` macro will _rewrite all lifetimes to `'static`'_ +//! in the provided pipeline types. +//! Since no [`ParseState::Error`] type should have a lifetime, +//! and therefore should not reference the lifetime of its parent +//! [`ParseState`], +//! this should have no practical effect on the error type itself. use crate::{ asg::{air::AirAggregate, AsgTreeToXirf}, @@ -59,7 +114,7 @@ use crate::{ obj::xmlo::{XmloReader, XmloToAir, XmloToken}, parse::{ terminal, FinalizeError, Lower, LowerSource, ParseError, ParseState, - Parsed, ParsedObject, UnknownToken, + ParseStateError, Parsed, ParsedObject, UnknownToken, }, xir::{ autoclose::XirfAutoClose, @@ -82,7 +137,7 @@ lower_pipeline! { /// TODO: To re-use this in `tamec` we want to be able to ignore fragments. /// /// TODO: More documentation once this has been further cleaned up. - pub load_xmlo + pub load_xmlo -> LoadXmlo |> PartialXirToXirf<4, Text> |> XmloReader |> XmloToAir[xmlo_ctx], until (XmloToken::Eoh(..)) @@ -92,7 +147,7 @@ lower_pipeline! { /// source language. /// /// TODO: More documentation once this has been further cleaned up. - pub parse_package_xml + pub parse_package_xml -> ParsePackageXml |> XirToXirf<64, RefinedText> |> XirfToNir |> TplShortDesugar @@ -104,7 +159,7 @@ lower_pipeline! { /// `xmli` file. /// /// TODO: More documentation once this has been further cleaned up. - pub lower_xmli<'a> + pub lower_xmli<'a> -> LowerXmli |> AsgTreeToXirf<'a>[asg] |> XirfAutoClose |> XirfToXir; diff --git a/tamer/src/pipeline/macro.rs b/tamer/src/pipeline/macro.rs index d396f563..625b466c 100644 --- a/tamer/src/pipeline/macro.rs +++ b/tamer/src/pipeline/macro.rs @@ -25,6 +25,103 @@ //! and to see TAMER's pipelines, //! see the [parent module](super). +#[cfg(doc)] +use crate::parse::ParseState; + +/// Generate an error sum type for a lowering pipeline. +/// +/// Given a series of [`ParseState`] types, +/// this derives a sum type capable of representing the associated +/// [`ParseState::Error`] of each. +/// See the [parent module](super) for more information, +/// including the challenges/concerns with this approach. +/// In particular, +/// note that all lifetimes on the [`ParseState`] type are rewritten to be +/// `'static'; +/// all associated `Error` types must not contain non-static lifetimes, +/// as is the standard convention in TAMER. +macro_rules! lower_error_sum { + ( + $(#[$meta:meta])* + $vis:vis $name:ident = $( + $st:ident $(<$($l:lifetime,)* $($c:literal,)* $($t:ident,)*>)? + )+ + ) => { + // Pair `'static` with each lifetime so that it may be used to + // replace the respective lifetime in `@gen` + // (we need an iteration token). + lower_error_sum!( + @gen + $(#[$meta])* + $vis $name = $($st$(<$($l: 'static,)* $($c,)* $($t,)*>)?)+ + ); + }; + + ( + @gen + $(#[$meta:meta])* + $vis:vis $name:ident = $( + $st:ident $(< + $($_:lifetime: $l:lifetime,)* + // ^^ `'static` (see above) + $($c:literal,)* + $($t:ident,)* + >)? + )+ + ) => { + $(#[$meta])* + #[derive(Debug, PartialEq)] + $vis enum $name { + Src(ParseError), + $( + $st(ParseStateError<$st$(<$($l,)* $($c,)* $($t),* >)?>) + // ^^ `'static` + ),+ + } + + impl From> + for $name + { + fn from(e: ParseError) -> Self { + Self::Src(e) + } + } + + $( + impl + From)?>> + for $name + { + fn from(e: ParseStateError<$st$(<$($l,)* $($c,)* $($t),*>)?>) -> Self { + Self::$st(e) + } + } + )+ + + impl std::fmt::Display for $name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Src(e) => std::fmt::Display::fmt(e, f), + $( + Self::$st(e) => std::fmt::Display::fmt(e, f), + )+ + } + } + } + + impl Diagnostic for $name { + fn describe(&self) -> Vec { + match self { + Self::Src(e) => e.describe(), + $( + Self::$st(e) => e.describe(), + )+ + } + } + } + }; +} + /// Declaratively define a lowering pipeline. /// /// A lowering pipeline stitches together parsers such that the objects of @@ -42,9 +139,48 @@ macro_rules! lower_pipeline { ($( $(#[$meta:meta])* - $vis:vis $fn:ident$(<$l:lifetime>)? - $(|> $lower:ty $([$ctx:ident])? $(, until ($until:pat))?)*; + $vis:vis $fn:ident$(<$l:lifetime>)? -> $struct:ident + $(|> + $lower_name:tt$(<$($lower_t:tt),+>)? + $([$ctx:ident])? + $(, until ($until:pat))? + )* + ; )*) => {$( + paste::paste! { + lower_error_sum! { + /// Recoverable error for + #[doc=concat!("[`", stringify!($fn), "`]")] + /// lowering pipeline. + /// + /// This represents an error that occurred from one of the + /// [`ParseState`]s in the lowering pipeline. + /// Since all [`ParseState`]s are expected to attempt + /// recovery on failure, + /// this error represents a _recoverable_ error. + /// Whether or not the error should be treated as + /// recoverable is entirely at the discretion of the sink + /// provided to the pipeline; + /// a sink may choose to promote all errors to + /// unrecoverable. + $vis [<$struct Error>] = $( + $lower_name$(<$($lower_t,)+>)? + )* + } + } + + lower_pipeline!( + @pipeline + $vis $fn$(<$l>)? -> $struct + $(|> $lower_name$(<$($lower_t),+>)? $([$ctx])? $(, until ($until))?)* + ); + )*}; + + (@pipeline + $(#[$meta:meta])* + $vis:vis $fn:ident$(<$l:lifetime>)? -> $struct:ident + $(|> $lower:ty $([$ctx:ident])? $(, until ($until:pat))?)* + ) => {paste::paste!{ $(#[$meta])* /// /// Pipeline Definition @@ -80,26 +216,21 @@ macro_rules! lower_pipeline { /// 2. The _source_ token stream is accepted by the closure, /// which consists of tokens expected by the first parser /// in the pipeline; - /// 4. A _sink_ serves as the final destination for the token + /// 3. A _sink_ serves as the final destination for the token /// stream. - /// 5. A [`Result`] consisting of the updated context that was + /// 4. A [`Result`] consisting of the updated context that was /// originally passed into the function, /// so that it may be utilized in future pipelines. - /// 6. A _recoverable error_ type `ER` that may be utilized when + /// 5. A _recoverable error_ type + #[doc=concat!("[`", stringify!([<$struct Error>]), "`]")] + /// that may be utilized when /// compilation should continue despite an error. - /// All parsers are expected to perform their own error - /// recovery in an attempt to continue parsing to discover - /// further errors; - /// as such, - /// this error type `ER` must be able to contain the - /// errors of any parser in the pipeline, - /// which is the reason for the large block of - /// [`From`]s in this function's `where` clause. - /// 7. An _unrecoverable error_ type `EU` that may be yielded by + /// See [`crate::pipeline`] for more information. + /// 6. An _unrecoverable error_ type `EU` that may be yielded by /// the sink to terminate compilation immediately. /// This is a component of the [`Result`] type that is /// ultimately yielded as the result of this function. - $vis fn $fn<$($l,)? ES: Diagnostic, ER: Diagnostic, EU: Diagnostic, SA, SB>( + $vis fn $fn<$($l,)? ES: Diagnostic + 'static, EU: Diagnostic, SA, SB>( $( // Each parser may optionally receive context from an // earlier run. @@ -117,21 +248,6 @@ macro_rules! lower_pipeline { EU > where - // Recoverable errors (ER) are errors that could potentially be - // handled by the sink. - // Parsers are always expected to perform error recovery to the - // best of their ability. - // We need to support widening into this error type from every - // individual ParseState in this pipeline, - // plus the source. - ER: From> - $( - + From::Token, - <$lower as ParseState>::Error, - >> - )*, - // Unrecoverable errors (EU) are errors that the sink chooses // not to handle. // It is constructed explicitly from the sink, @@ -148,21 +264,25 @@ macro_rules! lower_pipeline { >, SB: FnMut( - Result + Result]> ) -> Result<(), EU> { move |src, sink| { - let lower_pipeline!(@ret_pat $($($ctx)?)*) = lower_pipeline!( - @body_head(src, sink) - $((|> $lower $([$ctx])? $(, until ($until))?))* - )?; + // Recoverable error type (for brevity). + #[doc(hidden)] + type ER = [<$struct Error>]; - Ok(($( - $($ctx,)? - )*)) + let lower_pipeline!(@ret_pat $($($ctx)?)*) = lower_pipeline!( + @body_head(src, sink) + $((|> $lower $([$ctx])? $(, until ($until))?))* + )?; + + Ok(($( + $($ctx,)? + )*)) } } - )*}; + }}; (@ret_ctx_ty $lower:ty, $_ctx:ident) => { <$lower as ParseState>::PubContext @@ -205,8 +325,8 @@ macro_rules! lower_pipeline { Lower::< ParsedObject, $head, - ER, - >::lower::<_, EU>(&mut $src.map(|result| result.map_err(ER::from)), |next| { + ER, + >::lower::<_, EU>(&mut $src.map(|result| result.map_err(ER::Src)), |next| { lower_pipeline!( @body_inner(next, $head, $sink) $($rest)* @@ -222,9 +342,9 @@ macro_rules! lower_pipeline { Lower::< ParsedObject, $head, - ER, + ER, >::lower_with_context::<_, EU>( - &mut $src.map(|result| result.map_err(ER::from)), + &mut $src.map(|result| result.map_err(ER::Src)), $ctx, |next| { lower_pipeline!(