tamer: parse::error::ParseError: Extract some variants into FinalizeError

This helps to clarify the situations under which these errors can occur, and
the generality also helps to show why the inner types are as they
are (e.g. use of `String`).

But more importantly, this allows for an error type in `finalize` that is
detached from the `ParseState`, which will be able to be utilized in the
lowering pipeline as a more general error distinguishable from other
lowering errors.  At the moment I'm maintaining BC, but a following commit
will demonstrate the use case to introduce recoverable vs. non-recoverable
errors.

DEV-13158
main
Mike Gerwitz 2022-10-26 10:54:30 -04:00
parent 2087672c47
commit 26aaf6efc1
7 changed files with 140 additions and 89 deletions

View File

@ -27,7 +27,7 @@ mod parser;
mod state; mod state;
mod trace; mod trace;
pub use error::ParseError; pub use error::{FinalizeError, ParseError};
pub use lower::{Lower, LowerIter, ParsedObject}; pub use lower::{Lower, LowerIter, ParsedObject};
pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser}; pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser};
pub use state::{ pub use state::{
@ -271,11 +271,13 @@ pub mod test {
// state, // state,
// we must fail when we encounter the end of the stream. // we must fail when we encounter the end of the stream.
assert_eq!( assert_eq!(
Some(Err(ParseError::UnexpectedEof( Some(Err(ParseError::FinalizeError(
span.endpoints().1.unwrap(), FinalizeError::UnexpectedEof(
// All the states have the same string span.endpoints().1.unwrap(),
// (at time of writing). // All the states have the same string
EchoState::default().to_string(), // (at time of writing).
EchoState::default().to_string(),
)
))), ))),
sut.next() sut.next()
); );
@ -331,7 +333,8 @@ pub mod test {
let result = sut.finalize(); let result = sut.finalize();
assert_matches!( assert_matches!(
result, result,
Err((_, ParseError::UnexpectedEof(s, _))) if s == span.endpoints().1.unwrap() Err((_, FinalizeError::UnexpectedEof(s, _)))
if s == span.endpoints().1.unwrap()
); );
// The sut should have been re-returned, // The sut should have been re-returned,

View File

@ -43,6 +43,98 @@ use super::{ParseState, ParseStatus, Parser};
/// [`StateError`][ParseError::StateError] variant. /// [`StateError`][ParseError::StateError] variant.
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum ParseError<T: Token, E: Diagnostic + PartialEq> { pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
/// The parser reached an unhandled dead state.
///
/// For more information,
/// see [`ParseState::delegate`] and [`Parser::feed_tok`].
///
/// The string is intended to describe what was expected to have been
/// available based on the current [`ParseState`].
/// It is a heap-allocated string so that a copy of [`ParseState`]
/// needn't be stored.
UnexpectedToken(T, String),
/// A parser-specific error associated with an inner
/// [`ParseState`].
StateError(E),
/// The parser has no more input,
/// but it failed to automatically finalize.
///
/// See [`Parser::finalize`] for more information.
FinalizeError(FinalizeError),
}
impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
pub fn inner_into<EB: Diagnostic + PartialEq + Eq>(
self,
) -> ParseError<T, EB>
where
EA: Into<EB>,
{
use ParseError::*;
match self {
UnexpectedToken(x, desc) => UnexpectedToken(x, desc),
StateError(e) => StateError(e.into()),
FinalizeError(e) => FinalizeError(e),
}
}
}
//impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
// fn from(e: E) -> Self {
// Self::StateError(e)
// }
//}
impl<T: Token, E: Diagnostic + PartialEq> From<FinalizeError>
for ParseError<T, E>
{
fn from(e: FinalizeError) -> Self {
Self::FinalizeError(e)
}
}
impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnexpectedToken(tok, desc) => {
write!(f, "unexpected {} while {desc}", TtQuote::wrap(tok))
}
Self::StateError(e) => Display::fmt(e, f),
Self::FinalizeError(e) => Display::fmt(e, f),
}
}
}
impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::UnexpectedToken(_, _) => None,
Self::StateError(e) => Some(e),
Self::FinalizeError(e) => Some(e),
}
}
}
impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
for ParseError<T, E>
{
fn describe(&self) -> Vec<AnnotatedSpan> {
use ParseError::*;
match self {
UnexpectedToken(tok, desc) => tok.span().error(desc).into(),
// TODO: Is there any additional useful context we can augment
// this with?
StateError(e) => e.describe(),
FinalizeError(e) => e.describe(),
}
}
}
#[derive(Debug, PartialEq)]
pub enum FinalizeError {
/// Token stream ended unexpectedly. /// Token stream ended unexpectedly.
/// ///
/// This error means that the parser was expecting more input before /// This error means that the parser was expecting more input before
@ -64,17 +156,6 @@ pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
/// needn't be stored. /// needn't be stored.
UnexpectedEof(Span, String), UnexpectedEof(Span, String),
/// The parser reached an unhandled dead state.
///
/// For more information,
/// see [`ParseState::delegate`] and [`Parser::feed_tok`].
///
/// The string is intended to describe what was expected to have been
/// available based on the current [`ParseState`].
/// It is a heap-allocated string so that a copy of [`ParseState`]
/// needn't be stored.
UnexpectedToken(T, String),
/// The parser contains an outstanding token of lookahead that is no /// The parser contains an outstanding token of lookahead that is no
/// longer /// longer
/// (or possibly never was) /// (or possibly never was)
@ -88,44 +169,14 @@ pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
/// ///
/// See [`Parser::take_lookahead_tok`] for more information. /// See [`Parser::take_lookahead_tok`] for more information.
Lookahead(Span, String), Lookahead(Span, String),
/// A parser-specific error associated with an inner
/// [`ParseState`].
StateError(E),
} }
impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> { impl Display for FinalizeError {
pub fn inner_into<EB: Diagnostic + PartialEq + Eq>( fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
self,
) -> ParseError<T, EB>
where
EA: Into<EB>,
{
use ParseError::*;
match self {
UnexpectedEof(span, desc) => UnexpectedEof(span, desc),
UnexpectedToken(x, desc) => UnexpectedToken(x, desc),
Lookahead(span, desc) => Lookahead(span, desc),
StateError(e) => StateError(e.into()),
}
}
}
impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
fn from(e: E) -> Self {
Self::StateError(e)
}
}
impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Self::UnexpectedEof(_, desc) => { Self::UnexpectedEof(_, desc) => {
write!(f, "unexpected end of input while {desc}") write!(f, "unexpected end of input while {desc}")
} }
Self::UnexpectedToken(tok, desc) => {
write!(f, "unexpected {} while {desc}", TtQuote::wrap(tok))
}
// This is not really something the user should have to deal // This is not really something the user should have to deal
// with, // with,
// but maybe this will provide enough information that the // but maybe this will provide enough information that the
@ -140,33 +191,19 @@ impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
outstanding token of lookahead while {desc}" outstanding token of lookahead while {desc}"
) )
} }
Self::StateError(e) => Display::fmt(e, f),
} }
} }
} }
impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> { impl Error for FinalizeError {}
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::StateError(e) => Some(e),
_ => None,
}
}
}
impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic impl Diagnostic for FinalizeError {
for ParseError<T, E>
{
fn describe(&self) -> Vec<AnnotatedSpan> { fn describe(&self) -> Vec<AnnotatedSpan> {
use ParseError::*; use FinalizeError::*;
match self { match self {
UnexpectedEof(span, desc) => span.error(desc).into(), UnexpectedEof(span, desc) => span.error(desc).into(),
UnexpectedToken(tok, desc) => tok.span().error(desc).into(),
Lookahead(span, desc) => span.error(desc).into(), Lookahead(span, desc) => span.error(desc).into(),
// TODO: Is there any additional useful context we can augment
// this with?
StateError(e) => e.describe(),
} }
} }
} }

View File

@ -63,7 +63,11 @@ where
/// Consume inner parser and yield its context. /// Consume inner parser and yield its context.
#[inline] #[inline]
fn finalize(self) -> Result<FinalizedParser<LS>, E> { fn finalize(self) -> Result<FinalizedParser<LS>, E> {
self.lower.finalize().map_err(|(_, e)| e.into()) // TODO: Propagate `FinalizeError` rather than maintaining API BC
// with `ParseError`.
self.lower.finalize().map_err(|(_, e)| {
ParseError::<LS::Token, LS::Error>::FinalizeError(e).into()
})
} }
} }

View File

@ -22,8 +22,8 @@
use super::{ use super::{
state::ClosedParseState, state::ClosedParseState,
trace::{self, ParserTrace}, trace::{self, ParserTrace},
ParseError, ParseResult, ParseState, ParseStatus, TokenStream, Transition, FinalizeError, ParseError, ParseResult, ParseState, ParseStatus,
TransitionResult, TokenStream, Transition, TransitionResult,
}; };
use crate::{ use crate::{
parse::state::{Lookahead, TransitionData}, parse::state::{Lookahead, TransitionData},
@ -168,7 +168,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// since the parser will have no later opportunity to continue /// since the parser will have no later opportunity to continue
/// parsing. /// parsing.
/// Consequently, /// Consequently,
/// the caller should expect [`ParseError::UnexpectedEof`] if the /// the caller should expect [`FinalizeError::UnexpectedEof`] if the
/// parser is not in an accepting state. /// parser is not in an accepting state.
/// ///
/// To re-use the context returned by this method, /// To re-use the context returned by this method,
@ -176,10 +176,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// Note that whether the context is permitted to be reused, /// Note that whether the context is permitted to be reused,
/// or is useful independently to the caller, /// or is useful independently to the caller,
/// is a decision made by the [`ParseState`]. /// is a decision made by the [`ParseState`].
pub fn finalize( pub fn finalize(self) -> Result<FinalizedParser<S>, (Self, FinalizeError)> {
self,
) -> Result<FinalizedParser<S>, (Self, ParseError<S::Token, S::Error>)>
{
match self.assert_accepting() { match self.assert_accepting() {
Ok(()) => Ok(FinalizedParser(self.ctx)), Ok(()) => Ok(FinalizedParser(self.ctx)),
Err(err) => Err((self, err)), Err(err) => Err((self, err)),
@ -188,19 +185,19 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// Return [`Ok`] if the parser both has no outstanding lookahead token /// Return [`Ok`] if the parser both has no outstanding lookahead token
/// and is in an accepting state, /// and is in an accepting state,
/// otherwise [`Err`] with [`ParseError::UnexpectedEof`]. /// otherwise [`Err`] with [`FinalizeError::UnexpectedEof`].
/// ///
/// See [`finalize`](Self::finalize) for the public-facing method. /// See [`finalize`](Self::finalize) for the public-facing method.
fn assert_accepting(&self) -> Result<(), ParseError<S::Token, S::Error>> { fn assert_accepting(&self) -> Result<(), FinalizeError> {
let st = self.state.as_ref().unwrap(); let st = self.state.as_ref().unwrap();
if let Some(Lookahead(lookahead)) = &self.lookahead { if let Some(Lookahead(lookahead)) = &self.lookahead {
Err(ParseError::Lookahead(lookahead.span(), st.to_string())) Err(FinalizeError::Lookahead(lookahead.span(), st.to_string()))
} else if st.is_accepting(&self.ctx) { } else if st.is_accepting(&self.ctx) {
Ok(()) Ok(())
} else { } else {
let endpoints = self.last_span.endpoints(); let endpoints = self.last_span.endpoints();
Err(ParseError::UnexpectedEof( Err(FinalizeError::UnexpectedEof(
endpoints.1.unwrap_or(endpoints.0), endpoints.1.unwrap_or(endpoints.0),
st.to_string(), st.to_string(),
)) ))
@ -318,7 +315,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
match result { match result {
Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()), Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
Err(e) => Err(ParseError::from(e)), Err(e) => Err(ParseError::StateError(e)),
} }
} }
} }
@ -382,7 +379,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
/// ///
/// If the underlying [`TokenStream`] yields [`None`], /// If the underlying [`TokenStream`] yields [`None`],
/// then the [`ParseState`] must be in an accepting state; /// then the [`ParseState`] must be in an accepting state;
/// otherwise, [`ParseError::UnexpectedEof`] will occur. /// otherwise, [`ParseError::FinalizeError`] will occur.
/// ///
/// This is intended to be invoked by [`Iterator::next`]. /// This is intended to be invoked by [`Iterator::next`].
/// Accepting a token rather than the [`TokenStream`] allows the caller /// Accepting a token rather than the [`TokenStream`] allows the caller
@ -395,7 +392,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
match otok { match otok {
None => match self.assert_accepting() { None => match self.assert_accepting() {
Ok(()) => None, Ok(()) => None,
Err(e) => Some(Err(e)), Err(e) => Some(Err(e.into())),
}, },
Some(tok) => Some(self.feed_tok(tok)), Some(tok) => Some(self.feed_tok(tok)),
@ -689,7 +686,7 @@ pub mod test {
.finalize() .finalize()
.expect_err("must not finalize with token of lookahead"); .expect_err("must not finalize with token of lookahead");
assert_matches!(err, ParseError::Lookahead(span, _) if span == DUMMY_SPAN); assert_matches!(err, FinalizeError::Lookahead(span, _) if span == DUMMY_SPAN);
} }
// Tests the above, // Tests the above,
@ -715,7 +712,7 @@ pub mod test {
.finalize() .finalize()
.expect_err("must not finalize with token of lookahead"); .expect_err("must not finalize with token of lookahead");
assert_matches!(err, ParseError::Lookahead(span, _) if span == DUMMY_SPAN); assert_matches!(err, FinalizeError::Lookahead(span, _) if span == DUMMY_SPAN);
// The token of lookahead should still be available to the parser, // The token of lookahead should still be available to the parser,
// and this should consume it. // and this should consume it.

View File

@ -222,7 +222,7 @@ where
/// An accepting state represents a valid state to stop parsing. /// An accepting state represents a valid state to stop parsing.
/// If parsing stops at a state that is _not_ accepting, /// If parsing stops at a state that is _not_ accepting,
/// then the [`TokenStream`] has ended unexpectedly and should produce /// then the [`TokenStream`] has ended unexpectedly and should produce
/// a [`ParseError::UnexpectedEof`]. /// a [`ParseError::FinalizeError`].
/// ///
/// It makes sense for there to be exist multiple accepting states for a /// It makes sense for there to be exist multiple accepting states for a
/// parser. /// parser.

View File

@ -26,7 +26,7 @@ use std::assert_matches::assert_matches;
use super::*; use super::*;
use crate::convert::ExpectInto; use crate::convert::ExpectInto;
use crate::parse::{ParseError, Parsed}; use crate::parse::{FinalizeError, ParseError, Parsed};
use crate::span::dummy::*; use crate::span::dummy::*;
use crate::sym::GlobalSymbolIntern; use crate::sym::GlobalSymbolIntern;
use crate::xir::test::{ use crate::xir::test::{
@ -413,7 +413,12 @@ fn not_accepting_state_if_element_open() {
); );
// Element was not closed. // Element was not closed.
assert_matches!(sut.next(), Some(Err(ParseError::UnexpectedEof(..)))); assert_matches!(
sut.next(),
Some(Err(ParseError::FinalizeError(
FinalizeError::UnexpectedEof(..)
)))
);
} }
// XML permits comment nodes before and after the document root element. // XML permits comment nodes before and after the document root element.

View File

@ -37,7 +37,9 @@ use std::{assert_matches::assert_matches, error::Error, fmt::Display};
use crate::{ use crate::{
convert::ExpectInto, convert::ExpectInto,
diagnose::Diagnostic, diagnose::Diagnostic,
parse::{Object, ParseError, ParseState, Parsed, ParsedResult}, parse::{
FinalizeError, Object, ParseError, ParseState, Parsed, ParsedResult,
},
span::{dummy::*, Span}, span::{dummy::*, Span},
sym::SymbolId, sym::SymbolId,
xir::{ xir::{
@ -2446,7 +2448,10 @@ fn superstate_not_accepting_until_root_close() {
.expect_err("child accepting must not be accepting for superstate"); .expect_err("child accepting must not be accepting for superstate");
let err = sut.next().unwrap().unwrap_err(); let err = sut.next().unwrap().unwrap_err();
assert_matches!(err, ParseError::UnexpectedEof(..),); assert_matches!(
err,
ParseError::FinalizeError(FinalizeError::UnexpectedEof(..))
);
} }
// Ensure that we can actually export the generated identifiers // Ensure that we can actually export the generated identifiers