tamer: parse::error::ParseError: Extract some variants into FinalizeError

This helps to clarify the situations under which these errors can occur, and
the generality also helps to show why the inner types are as they
are (e.g. use of `String`).

But more importantly, this allows for an error type in `finalize` that is
detached from the `ParseState`, which will be able to be utilized in the
lowering pipeline as a more general error distinguishable from other
lowering errors.  At the moment I'm maintaining BC, but a following commit
will demonstrate the use case to introduce recoverable vs. non-recoverable
errors.

DEV-13158
main
Mike Gerwitz 2022-10-26 10:54:30 -04:00
parent 2087672c47
commit 26aaf6efc1
7 changed files with 140 additions and 89 deletions

View File

@ -27,7 +27,7 @@ mod parser;
mod state;
mod trace;
pub use error::ParseError;
pub use error::{FinalizeError, ParseError};
pub use lower::{Lower, LowerIter, ParsedObject};
pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser};
pub use state::{
@ -271,11 +271,13 @@ pub mod test {
// state,
// we must fail when we encounter the end of the stream.
assert_eq!(
Some(Err(ParseError::UnexpectedEof(
span.endpoints().1.unwrap(),
// All the states have the same string
// (at time of writing).
EchoState::default().to_string(),
Some(Err(ParseError::FinalizeError(
FinalizeError::UnexpectedEof(
span.endpoints().1.unwrap(),
// All the states have the same string
// (at time of writing).
EchoState::default().to_string(),
)
))),
sut.next()
);
@ -331,7 +333,8 @@ pub mod test {
let result = sut.finalize();
assert_matches!(
result,
Err((_, ParseError::UnexpectedEof(s, _))) if s == span.endpoints().1.unwrap()
Err((_, FinalizeError::UnexpectedEof(s, _)))
if s == span.endpoints().1.unwrap()
);
// The sut should have been re-returned,

View File

@ -43,6 +43,98 @@ use super::{ParseState, ParseStatus, Parser};
/// [`StateError`][ParseError::StateError] variant.
#[derive(Debug, PartialEq)]
pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
/// The parser reached an unhandled dead state.
///
/// For more information,
/// see [`ParseState::delegate`] and [`Parser::feed_tok`].
///
/// The string is intended to describe what was expected to have been
/// available based on the current [`ParseState`].
/// It is a heap-allocated string so that a copy of [`ParseState`]
/// needn't be stored.
UnexpectedToken(T, String),
/// A parser-specific error associated with an inner
/// [`ParseState`].
StateError(E),
/// The parser has no more input,
/// but it failed to automatically finalize.
///
/// See [`Parser::finalize`] for more information.
FinalizeError(FinalizeError),
}
impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
pub fn inner_into<EB: Diagnostic + PartialEq + Eq>(
self,
) -> ParseError<T, EB>
where
EA: Into<EB>,
{
use ParseError::*;
match self {
UnexpectedToken(x, desc) => UnexpectedToken(x, desc),
StateError(e) => StateError(e.into()),
FinalizeError(e) => FinalizeError(e),
}
}
}
//impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
// fn from(e: E) -> Self {
// Self::StateError(e)
// }
//}
impl<T: Token, E: Diagnostic + PartialEq> From<FinalizeError>
for ParseError<T, E>
{
fn from(e: FinalizeError) -> Self {
Self::FinalizeError(e)
}
}
impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnexpectedToken(tok, desc) => {
write!(f, "unexpected {} while {desc}", TtQuote::wrap(tok))
}
Self::StateError(e) => Display::fmt(e, f),
Self::FinalizeError(e) => Display::fmt(e, f),
}
}
}
impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::UnexpectedToken(_, _) => None,
Self::StateError(e) => Some(e),
Self::FinalizeError(e) => Some(e),
}
}
}
impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
for ParseError<T, E>
{
fn describe(&self) -> Vec<AnnotatedSpan> {
use ParseError::*;
match self {
UnexpectedToken(tok, desc) => tok.span().error(desc).into(),
// TODO: Is there any additional useful context we can augment
// this with?
StateError(e) => e.describe(),
FinalizeError(e) => e.describe(),
}
}
}
#[derive(Debug, PartialEq)]
pub enum FinalizeError {
/// Token stream ended unexpectedly.
///
/// This error means that the parser was expecting more input before
@ -64,17 +156,6 @@ pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
/// needn't be stored.
UnexpectedEof(Span, String),
/// The parser reached an unhandled dead state.
///
/// For more information,
/// see [`ParseState::delegate`] and [`Parser::feed_tok`].
///
/// The string is intended to describe what was expected to have been
/// available based on the current [`ParseState`].
/// It is a heap-allocated string so that a copy of [`ParseState`]
/// needn't be stored.
UnexpectedToken(T, String),
/// The parser contains an outstanding token of lookahead that is no
/// longer
/// (or possibly never was)
@ -88,44 +169,14 @@ pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
///
/// See [`Parser::take_lookahead_tok`] for more information.
Lookahead(Span, String),
/// A parser-specific error associated with an inner
/// [`ParseState`].
StateError(E),
}
impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
pub fn inner_into<EB: Diagnostic + PartialEq + Eq>(
self,
) -> ParseError<T, EB>
where
EA: Into<EB>,
{
use ParseError::*;
match self {
UnexpectedEof(span, desc) => UnexpectedEof(span, desc),
UnexpectedToken(x, desc) => UnexpectedToken(x, desc),
Lookahead(span, desc) => Lookahead(span, desc),
StateError(e) => StateError(e.into()),
}
}
}
impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
fn from(e: E) -> Self {
Self::StateError(e)
}
}
impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
impl Display for FinalizeError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::UnexpectedEof(_, desc) => {
write!(f, "unexpected end of input while {desc}")
}
Self::UnexpectedToken(tok, desc) => {
write!(f, "unexpected {} while {desc}", TtQuote::wrap(tok))
}
// This is not really something the user should have to deal
// with,
// but maybe this will provide enough information that the
@ -140,33 +191,19 @@ impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
outstanding token of lookahead while {desc}"
)
}
Self::StateError(e) => Display::fmt(e, f),
}
}
}
impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::StateError(e) => Some(e),
_ => None,
}
}
}
impl Error for FinalizeError {}
impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
for ParseError<T, E>
{
impl Diagnostic for FinalizeError {
fn describe(&self) -> Vec<AnnotatedSpan> {
use ParseError::*;
use FinalizeError::*;
match self {
UnexpectedEof(span, desc) => span.error(desc).into(),
UnexpectedToken(tok, desc) => tok.span().error(desc).into(),
Lookahead(span, desc) => span.error(desc).into(),
// TODO: Is there any additional useful context we can augment
// this with?
StateError(e) => e.describe(),
}
}
}

View File

@ -63,7 +63,11 @@ where
/// Consume inner parser and yield its context.
#[inline]
fn finalize(self) -> Result<FinalizedParser<LS>, E> {
self.lower.finalize().map_err(|(_, e)| e.into())
// TODO: Propagate `FinalizeError` rather than maintaining API BC
// with `ParseError`.
self.lower.finalize().map_err(|(_, e)| {
ParseError::<LS::Token, LS::Error>::FinalizeError(e).into()
})
}
}

View File

@ -22,8 +22,8 @@
use super::{
state::ClosedParseState,
trace::{self, ParserTrace},
ParseError, ParseResult, ParseState, ParseStatus, TokenStream, Transition,
TransitionResult,
FinalizeError, ParseError, ParseResult, ParseState, ParseStatus,
TokenStream, Transition, TransitionResult,
};
use crate::{
parse::state::{Lookahead, TransitionData},
@ -168,7 +168,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// since the parser will have no later opportunity to continue
/// parsing.
/// Consequently,
/// the caller should expect [`ParseError::UnexpectedEof`] if the
/// the caller should expect [`FinalizeError::UnexpectedEof`] if the
/// parser is not in an accepting state.
///
/// To re-use the context returned by this method,
@ -176,10 +176,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// Note that whether the context is permitted to be reused,
/// or is useful independently to the caller,
/// is a decision made by the [`ParseState`].
pub fn finalize(
self,
) -> Result<FinalizedParser<S>, (Self, ParseError<S::Token, S::Error>)>
{
pub fn finalize(self) -> Result<FinalizedParser<S>, (Self, FinalizeError)> {
match self.assert_accepting() {
Ok(()) => Ok(FinalizedParser(self.ctx)),
Err(err) => Err((self, err)),
@ -188,19 +185,19 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// Return [`Ok`] if the parser both has no outstanding lookahead token
/// and is in an accepting state,
/// otherwise [`Err`] with [`ParseError::UnexpectedEof`].
/// otherwise [`Err`] with [`FinalizeError::UnexpectedEof`].
///
/// See [`finalize`](Self::finalize) for the public-facing method.
fn assert_accepting(&self) -> Result<(), ParseError<S::Token, S::Error>> {
fn assert_accepting(&self) -> Result<(), FinalizeError> {
let st = self.state.as_ref().unwrap();
if let Some(Lookahead(lookahead)) = &self.lookahead {
Err(ParseError::Lookahead(lookahead.span(), st.to_string()))
Err(FinalizeError::Lookahead(lookahead.span(), st.to_string()))
} else if st.is_accepting(&self.ctx) {
Ok(())
} else {
let endpoints = self.last_span.endpoints();
Err(ParseError::UnexpectedEof(
Err(FinalizeError::UnexpectedEof(
endpoints.1.unwrap_or(endpoints.0),
st.to_string(),
))
@ -318,7 +315,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Parser<S, I> {
match result {
Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
Err(e) => Err(ParseError::from(e)),
Err(e) => Err(ParseError::StateError(e)),
}
}
}
@ -382,7 +379,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
///
/// If the underlying [`TokenStream`] yields [`None`],
/// then the [`ParseState`] must be in an accepting state;
/// otherwise, [`ParseError::UnexpectedEof`] will occur.
/// otherwise, [`ParseError::FinalizeError`] will occur.
///
/// This is intended to be invoked by [`Iterator::next`].
/// Accepting a token rather than the [`TokenStream`] allows the caller
@ -395,7 +392,7 @@ impl<S: ClosedParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
match otok {
None => match self.assert_accepting() {
Ok(()) => None,
Err(e) => Some(Err(e)),
Err(e) => Some(Err(e.into())),
},
Some(tok) => Some(self.feed_tok(tok)),
@ -689,7 +686,7 @@ pub mod test {
.finalize()
.expect_err("must not finalize with token of lookahead");
assert_matches!(err, ParseError::Lookahead(span, _) if span == DUMMY_SPAN);
assert_matches!(err, FinalizeError::Lookahead(span, _) if span == DUMMY_SPAN);
}
// Tests the above,
@ -715,7 +712,7 @@ pub mod test {
.finalize()
.expect_err("must not finalize with token of lookahead");
assert_matches!(err, ParseError::Lookahead(span, _) if span == DUMMY_SPAN);
assert_matches!(err, FinalizeError::Lookahead(span, _) if span == DUMMY_SPAN);
// The token of lookahead should still be available to the parser,
// and this should consume it.

View File

@ -222,7 +222,7 @@ where
/// An accepting state represents a valid state to stop parsing.
/// If parsing stops at a state that is _not_ accepting,
/// then the [`TokenStream`] has ended unexpectedly and should produce
/// a [`ParseError::UnexpectedEof`].
/// a [`ParseError::FinalizeError`].
///
/// It makes sense for there to be exist multiple accepting states for a
/// parser.

View File

@ -26,7 +26,7 @@ use std::assert_matches::assert_matches;
use super::*;
use crate::convert::ExpectInto;
use crate::parse::{ParseError, Parsed};
use crate::parse::{FinalizeError, ParseError, Parsed};
use crate::span::dummy::*;
use crate::sym::GlobalSymbolIntern;
use crate::xir::test::{
@ -413,7 +413,12 @@ fn not_accepting_state_if_element_open() {
);
// Element was not closed.
assert_matches!(sut.next(), Some(Err(ParseError::UnexpectedEof(..))));
assert_matches!(
sut.next(),
Some(Err(ParseError::FinalizeError(
FinalizeError::UnexpectedEof(..)
)))
);
}
// XML permits comment nodes before and after the document root element.

View File

@ -37,7 +37,9 @@ use std::{assert_matches::assert_matches, error::Error, fmt::Display};
use crate::{
convert::ExpectInto,
diagnose::Diagnostic,
parse::{Object, ParseError, ParseState, Parsed, ParsedResult},
parse::{
FinalizeError, Object, ParseError, ParseState, Parsed, ParsedResult,
},
span::{dummy::*, Span},
sym::SymbolId,
xir::{
@ -2446,7 +2448,10 @@ fn superstate_not_accepting_until_root_close() {
.expect_err("child accepting must not be accepting for superstate");
let err = sut.next().unwrap().unwrap_err();
assert_matches!(err, ParseError::UnexpectedEof(..),);
assert_matches!(
err,
ParseError::FinalizeError(FinalizeError::UnexpectedEof(..))
);
}
// Ensure that we can actually export the generated identifiers