tamer: parse::error::ParseError: Extract some variants into FinalizeError

This helps to clarify the situations under which these errors can occur, and the generality also helps to show why the inner types are as they are (e.g. use of `String`). But more importantly, this allows for an error type in `finalize` that is detached from the `ParseState`, which will be able to be utilized in the lowering pipeline as a more general error distinguishable from other lowering errors. At the moment I'm maintaining BC, but a following commit will demonstrate the use case to introduce recoverable vs. non-recoverable errors. DEV-13158
2022-10-26 10:54:30 -04:00 · 2022-10-26 10:54:30 -04:00 · 26aaf6efc1
parent 2087672c47
commit 26aaf6efc1
7 changed files with 140 additions and 89 deletions
--- a/tamer/src/parse.rs
+++ b/tamer/src/parse.rs
@ -27,7 +27,7 @@ mod parser;
 mod state;
 mod trace;

-pub use error::ParseError;
+pub use error::{FinalizeError, ParseError};
 pub use lower::{Lower, LowerIter, ParsedObject};
 pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser};
 pub use state::{
@ -271,11 +271,13 @@ pub mod test {
        //     state,
        //   we must fail when we encounter the end of the stream.
        assert_eq!(
-            Some(Err(ParseError::UnexpectedEof(
-                span.endpoints().1.unwrap(),
-                // All the states have the same string
-                //   (at time of writing).
-                EchoState::default().to_string(),
+            Some(Err(ParseError::FinalizeError(
+                FinalizeError::UnexpectedEof(
+                    span.endpoints().1.unwrap(),
+                    // All the states have the same string
+                    //   (at time of writing).
+                    EchoState::default().to_string(),
+                )
            ))),
            sut.next()
        );
@ -331,7 +333,8 @@ pub mod test {
        let result = sut.finalize();
        assert_matches!(
            result,
-            Err((_, ParseError::UnexpectedEof(s, _))) if s == span.endpoints().1.unwrap()
+            Err((_, FinalizeError::UnexpectedEof(s, _)))
+                if s == span.endpoints().1.unwrap()
        );

        // The sut should have been re-returned,
--- a/tamer/src/parse/error.rs
+++ b/tamer/src/parse/error.rs
@ -43,6 +43,98 @@ use super::{ParseState, ParseStatus, Parser};
 ///   [`StateError`][ParseError::StateError] variant.
 #[derive(Debug, PartialEq)]
 pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
+    /// The parser reached an unhandled dead state.
+    ///
+    /// For more information,
+    ///   see [`ParseState::delegate`] and [`Parser::feed_tok`].
+    ///
+    /// The string is intended to describe what was expected to have been
+    ///   available based on the current [`ParseState`].
+    /// It is a heap-allocated string so that a copy of [`ParseState`]
+    ///   needn't be stored.
+    UnexpectedToken(T, String),
+
+    /// A parser-specific error associated with an inner
+    ///   [`ParseState`].
+    StateError(E),
+
+    /// The parser has no more input,
+    ///   but it failed to automatically finalize.
+    ///
+    /// See [`Parser::finalize`] for more information.
+    FinalizeError(FinalizeError),
+}
+
+impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
+    pub fn inner_into<EB: Diagnostic + PartialEq + Eq>(
+        self,
+    ) -> ParseError<T, EB>
+    where
+        EA: Into<EB>,
+    {
+        use ParseError::*;
+        match self {
+            UnexpectedToken(x, desc) => UnexpectedToken(x, desc),
+            StateError(e) => StateError(e.into()),
+            FinalizeError(e) => FinalizeError(e),
+        }
+    }
+}
+
+//impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
+//    fn from(e: E) -> Self {
+//        Self::StateError(e)
+//    }
+//}
+
+impl<T: Token, E: Diagnostic + PartialEq> From<FinalizeError>
+    for ParseError<T, E>
+{
+    fn from(e: FinalizeError) -> Self {
+        Self::FinalizeError(e)
+    }
+}
+
+impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::UnexpectedToken(tok, desc) => {
+                write!(f, "unexpected {} while {desc}", TtQuote::wrap(tok))
+            }
+            Self::StateError(e) => Display::fmt(e, f),
+            Self::FinalizeError(e) => Display::fmt(e, f),
+        }
+    }
+}
+
+impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        match self {
+            Self::UnexpectedToken(_, _) => None,
+            Self::StateError(e) => Some(e),
+            Self::FinalizeError(e) => Some(e),
+        }
+    }
+}
+
+impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
+    for ParseError<T, E>
+{
+    fn describe(&self) -> Vec<AnnotatedSpan> {
+        use ParseError::*;
+
+        match self {
+            UnexpectedToken(tok, desc) => tok.span().error(desc).into(),
+            // TODO: Is there any additional useful context we can augment
+            //   this with?
+            StateError(e) => e.describe(),
+            FinalizeError(e) => e.describe(),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub enum FinalizeError {
    /// Token stream ended unexpectedly.
    ///
    /// This error means that the parser was expecting more input before
@ -64,17 +156,6 @@ pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
    ///   needn't be stored.
    UnexpectedEof(Span, String),

-    /// The parser reached an unhandled dead state.
-    ///
-    /// For more information,
-    ///   see [`ParseState::delegate`] and [`Parser::feed_tok`].
-    ///
-    /// The string is intended to describe what was expected to have been
-    ///   available based on the current [`ParseState`].
-    /// It is a heap-allocated string so that a copy of [`ParseState`]
-    ///   needn't be stored.
-    UnexpectedToken(T, String),
-
    /// The parser contains an outstanding token of lookahead that is no
    ///   longer
    ///     (or possibly never was)
@ -88,44 +169,14 @@ pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
    ///
    /// See [`Parser::take_lookahead_tok`] for more information.
    Lookahead(Span, String),
-
-    /// A parser-specific error associated with an inner
-    ///   [`ParseState`].
-    StateError(E),
 }

-impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
-    pub fn inner_into<EB: Diagnostic + PartialEq + Eq>(
-        self,
-    ) -> ParseError<T, EB>
-    where
-        EA: Into<EB>,
-    {
-        use ParseError::*;
-        match self {
-            UnexpectedEof(span, desc) => UnexpectedEof(span, desc),
-            UnexpectedToken(x, desc) => UnexpectedToken(x, desc),
-            Lookahead(span, desc) => Lookahead(span, desc),
-            StateError(e) => StateError(e.into()),
-        }
-    }
-}
-
-impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
-    fn from(e: E) -> Self {
-        Self::StateError(e)
-    }
-}
-
-impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl Display for FinalizeError {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
            Self::UnexpectedEof(_, desc) => {
                write!(f, "unexpected end of input while {desc}")
            }
-            Self::UnexpectedToken(tok, desc) => {
-                write!(f, "unexpected {} while {desc}", TtQuote::wrap(tok))
-            }
            // This is not really something the user should have to deal
            //   with,
            //     but maybe this will provide enough information that the
@ -140,33 +191,19 @@ impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
                       outstanding token of lookahead while {desc}"
                )
            }
-            Self::StateError(e) => Display::fmt(e, f),
        }
    }
 }

-impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
-    fn source(&self) -> Option<&(dyn Error + 'static)> {
-        match self {
-            Self::StateError(e) => Some(e),
-            _ => None,
-        }
-    }
-}
+impl Error for FinalizeError {}

-impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
-    for ParseError<T, E>
-{
+impl Diagnostic for FinalizeError {
    fn describe(&self) -> Vec<AnnotatedSpan> {
-        use ParseError::*;
+        use FinalizeError::*;

        match self {
            UnexpectedEof(span, desc) => span.error(desc).into(),
-            UnexpectedToken(tok, desc) => tok.span().error(desc).into(),
            Lookahead(span, desc) => span.error(desc).into(),
-            // TODO: Is there any additional useful context we can augment
-            //   this with?
-            StateError(e) => e.describe(),
        }
    }
 }
--- a/tamer/src/parse/lower.rs
+++ b/tamer/src/parse/lower.rs
@ -63,7 +63,11 @@ where
    /// Consume inner parser and yield its context.
    #[inline]
    fn finalize(self) -> Result<FinalizedParser<LS>, E> {
-        self.lower.finalize().map_err(|(_, e)| e.into())
+        // TODO: Propagate `FinalizeError` rather than maintaining API BC
+        //   with `ParseError`.
+        self.lower.finalize().map_err(|(_, e)| {
+            ParseError::<LS::Token, LS::Error>::FinalizeError(e).into()
+        })
    }
 }

--- a/tamer/src/parse/parser.rs
+++ b/tamer/src/parse/parser.rs
@ -22,8 +22,8 @@
 use super::{
    state::ClosedParseState,
    trace::{self, ParserTrace},
-    ParseError, ParseResult, ParseState, ParseStatus, TokenStream, Transition,
-    TransitionResult,
+    FinalizeError, ParseError, ParseResult, ParseState, ParseStatus,
+    TokenStream, Transition, TransitionResult,
 };
 use crate::{
    parse::state::{Lookahead, TransitionData},
@ -168,7 +168,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
    ///     since the parser will have no later opportunity to continue
    ///     parsing.
    /// Consequently,
-    ///   the caller should expect [`ParseError::UnexpectedEof`] if the
+    ///   the caller should expect [`FinalizeError::UnexpectedEof`] if the
    ///   parser is not in an accepting state.
    ///
    /// To re-use the context returned by this method,
@ -176,10 +176,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
    /// Note that whether the context is permitted to be reused,
    ///   or is useful independently to the caller,
    ///   is a decision made by the [`ParseState`].
-    pub fn finalize(
-        self,
-    ) -> Result<FinalizedParser<S>, (Self, ParseError<S::Token, S::Error>)>
-    {
+    pub fn finalize(self) -> Result<FinalizedParser<S>, (Self, FinalizeError)> {
        match self.assert_accepting() {
            Ok(()) => Ok(FinalizedParser(self.ctx)),
            Err(err) => Err((self, err)),
@ -188,19 +185,19 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {

    /// Return [`Ok`] if the parser both has no outstanding lookahead token
    ///   and is in an accepting state,
-    ///     otherwise [`Err`] with [`ParseError::UnexpectedEof`].
+    ///     otherwise [`Err`] with [`FinalizeError::UnexpectedEof`].
    ///
    /// See [`finalize`](Self::finalize) for the public-facing method.
-    fn assert_accepting(&self) -> Result<(), ParseError<S::Token, S::Error>> {
+    fn assert_accepting(&self) -> Result<(), FinalizeError> {
        let st = self.state.as_ref().unwrap();

        if let Some(Lookahead(lookahead)) = &self.lookahead {
-            Err(ParseError::Lookahead(lookahead.span(), st.to_string()))
+            Err(FinalizeError::Lookahead(lookahead.span(), st.to_string()))
        } else if st.is_accepting(&self.ctx) {
            Ok(())
        } else {
            let endpoints = self.last_span.endpoints();
-            Err(ParseError::UnexpectedEof(
+            Err(FinalizeError::UnexpectedEof(
                endpoints.1.unwrap_or(endpoints.0),
                st.to_string(),
            ))
@ -318,7 +315,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {

                match result {
                    Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
-                    Err(e) => Err(ParseError::from(e)),
+                    Err(e) => Err(ParseError::StateError(e)),
                }
            }
        }
@ -382,7 +379,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
    ///
    /// If the underlying [`TokenStream`] yields [`None`],
    ///   then the [`ParseState`] must be in an accepting state;
-    ///     otherwise, [`ParseError::UnexpectedEof`] will occur.
+    ///     otherwise, [`ParseError::FinalizeError`] will occur.
    ///
    /// This is intended to be invoked by [`Iterator::next`].
    /// Accepting a token rather than the [`TokenStream`] allows the caller
@ -395,7 +392,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
        match otok {
            None => match self.assert_accepting() {
                Ok(()) => None,
-                Err(e) => Some(Err(e)),
+                Err(e) => Some(Err(e.into())),
            },

            Some(tok) => Some(self.feed_tok(tok)),
@ -689,7 +686,7 @@ pub mod test {
            .finalize()
            .expect_err("must not finalize with token of lookahead");

-        assert_matches!(err, ParseError::Lookahead(span, _) if span == DUMMY_SPAN);
+        assert_matches!(err, FinalizeError::Lookahead(span, _) if span == DUMMY_SPAN);
    }

    // Tests the above,
@ -715,7 +712,7 @@ pub mod test {
            .finalize()
            .expect_err("must not finalize with token of lookahead");

-        assert_matches!(err, ParseError::Lookahead(span, _) if span == DUMMY_SPAN);
+        assert_matches!(err, FinalizeError::Lookahead(span, _) if span == DUMMY_SPAN);

        // The token of lookahead should still be available to the parser,
        //   and this should consume it.
--- a/tamer/src/parse/state.rs
+++ b/tamer/src/parse/state.rs
@ -222,7 +222,7 @@ where
    /// An accepting state represents a valid state to stop parsing.
    /// If parsing stops at a state that is _not_ accepting,
    ///   then the [`TokenStream`] has ended unexpectedly and should produce
-    ///   a [`ParseError::UnexpectedEof`].
+    ///   a [`ParseError::FinalizeError`].
    ///
    /// It makes sense for there to be exist multiple accepting states for a
    ///   parser.
--- a/tamer/src/xir/flat/test.rs
+++ b/tamer/src/xir/flat/test.rs
@ -26,7 +26,7 @@ use std::assert_matches::assert_matches;

 use super::*;
 use crate::convert::ExpectInto;
-use crate::parse::{ParseError, Parsed};
+use crate::parse::{FinalizeError, ParseError, Parsed};
 use crate::span::dummy::*;
 use crate::sym::GlobalSymbolIntern;
 use crate::xir::test::{
@ -413,7 +413,12 @@ fn not_accepting_state_if_element_open() {
    );

    // Element was not closed.
-    assert_matches!(sut.next(), Some(Err(ParseError::UnexpectedEof(..))));
+    assert_matches!(
+        sut.next(),
+        Some(Err(ParseError::FinalizeError(
+            FinalizeError::UnexpectedEof(..)
+        )))
+    );
 }

 // XML permits comment nodes before and after the document root element.
--- a/tamer/src/xir/parse/ele/test.rs
+++ b/tamer/src/xir/parse/ele/test.rs
@ -37,7 +37,9 @@ use std::{assert_matches::assert_matches, error::Error, fmt::Display};
 use crate::{
    convert::ExpectInto,
    diagnose::Diagnostic,
-    parse::{Object, ParseError, ParseState, Parsed, ParsedResult},
+    parse::{
+        FinalizeError, Object, ParseError, ParseState, Parsed, ParsedResult,
+    },
    span::{dummy::*, Span},
    sym::SymbolId,
    xir::{
@ -2446,7 +2448,10 @@ fn superstate_not_accepting_until_root_close() {
        .expect_err("child accepting must not be accepting for superstate");

    let err = sut.next().unwrap().unwrap_err();
-    assert_matches!(err, ParseError::UnexpectedEof(..),);
+    assert_matches!(
+        err,
+        ParseError::FinalizeError(FinalizeError::UnexpectedEof(..))
+    );
 }

 // Ensure that we can actually export the generated identifiers