From eafb3b2a1bfc28a5f475b5d9400bdcf95253ab5a Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Wed, 25 May 2022 14:20:10 -0400 Subject: [PATCH] tamer: Add Display impl for each ParseState for generic ParseErrors This is intended to describe, to the user, the state that the parser is in. This will be used to convey additional information for general parser errors, but it should also probably be integrated into parsers' individual errors as well when appropriate. This is something I expected to add at some point, but I wanted to add them because, when dealing with lowering errors, it can be difficult to tell what parser the error originated from. DEV-11864 --- tamer/src/diagnose.rs | 6 +++ tamer/src/obj/xmlo/reader.rs | 82 ++++++++++++++++++++++++++++++++++++ tamer/src/parse.rs | 69 +++++++++++++++++++----------- tamer/src/xir/attr/parse.rs | 13 ++++++ tamer/src/xir/flat.rs | 16 +++++++ tamer/src/xir/flat/test.rs | 28 +++++++----- tamer/src/xir/tree.rs | 21 +++++++++ tamer/src/xir/tree/test.rs | 15 ++++--- 8 files changed, 209 insertions(+), 41 deletions(-) diff --git a/tamer/src/diagnose.rs b/tamer/src/diagnose.rs index a7e21923..7e088dda 100644 --- a/tamer/src/diagnose.rs +++ b/tamer/src/diagnose.rs @@ -163,6 +163,12 @@ impl<'a> From for Label<'a> { } } +impl<'a> From<&'a String> for Label<'a> { + fn from(s: &'a String) -> Self { + Self(Cow::Borrowed(s)) + } +} + impl<'a> From<&'a str> for Label<'a> { fn from(s: &'a str) -> Self { Self(Cow::Borrowed(s)) diff --git a/tamer/src/obj/xmlo/reader.rs b/tamer/src/obj/xmlo/reader.rs index 62b8ce38..364e502f 100644 --- a/tamer/src/obj/xmlo/reader.rs +++ b/tamer/src/obj/xmlo/reader.rs @@ -17,6 +17,8 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . +use std::fmt::Display; + use super::{SymAttrs, XmloError}; use crate::{ num::{Dim, Dtype}, @@ -254,6 +256,26 @@ impl ParseState } } +impl Display + for XmloReader +{ + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use XmloReader::*; + + match self { + Ready => write!(f, "awaiting xmlo input"), + Package => write!(f, "processing package attributes"), + Symtable(_, ss) => Display::fmt(ss, f), + SymDepsExpected => write!(f, "expecting symbol dependency list"), + SymDeps(_, sd) => Display::fmt(sd, f), + FragmentsExpected => write!(f, "expecting start of fragments"), + Fragments(_, sf) => Display::fmt(sf, f), + Eoh => write!(f, "finished parsing header"), + Done => write!(f, "finished parsing xmlo file"), + } + } +} + /// Symbol table parser operating within a delimited context. /// /// This parser expects a parent [`ParseState`] to indicate when symtable @@ -511,6 +533,24 @@ impl SymtableState { } } +impl Display for SymtableState { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use SymtableState::*; + + match self { + Ready => write!(f, "expecting symbol declaration"), + Sym(_, Some(sym), _) => write!(f, "parsing symbol `{sym}`"), + Sym(_, None, _) => write!(f, "parsing a symbol"), + SymMapFrom(_, sym, _, _) => { + write!(f, "expecting map name for symbol `{sym}`") + } + SymRef(_, sym, _, _) => { + write!(f, "parsing refs for symbol `{sym}`") + } + } + } +} + impl From<(SymbolId, SymAttrs, Span)> for XmloToken { fn from(tup: (SymbolId, SymAttrs, Span)) -> Self { match tup { @@ -610,6 +650,29 @@ impl ParseState for SymDepsState { } } +impl Display for SymDepsState { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use SymDepsState::*; + + match self { + Ready => write!(f, "expecting symbol table entry"), + SymUnnamed(_) => write!(f, "expecting name for symbol table entry"), + Sym(_, sym) => write!( + f, + "expecting dependencies for symbol table entry `{sym}`" + ), + SymRefUnnamed(_, sym, _) => write!( + f, + "expecting dependency name for symbol table entry `{sym}`" + ), + SymRefDone(_, sym, _) => write!( + f, + "expending end of dependency for symbol table entry `{sym}`" + ), + } + } +} + /// Text fragment (compiled code) parser for `preproc:fragments` children. /// /// This parser expects a parent [`ParseState`] to indicate when dependency @@ -698,5 +761,24 @@ impl ParseState for FragmentsState { } } +impl Display for FragmentsState { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use FragmentsState::*; + + match self { + Ready => write!(f, "expecting fragment"), + FragmentUnnamed(_) => { + write!(f, "expecting fragment association id") + } + Fragment(_, sym) => { + write!(f, "expecting fragment text for symbol `{sym}`") + } + FragmentDone(_, sym) => { + write!(f, "expecting end of fragment for symbol `{sym}`") + } + } + } +} + #[cfg(test)] mod test; diff --git a/tamer/src/parse.rs b/tamer/src/parse.rs index 43221cb9..9f42026c 100644 --- a/tamer/src/parse.rs +++ b/tamer/src/parse.rs @@ -113,7 +113,7 @@ where /// Whatever the underlying automaton, /// a `(state, token, context)` triple must uniquely determine the next /// parser action. -pub trait ParseState: Default + PartialEq + Eq + Debug { +pub trait ParseState: Default + PartialEq + Eq + Display + Debug { /// Input tokens to the parser. type Token: Token; @@ -642,6 +642,7 @@ impl> Parser { let endpoints = self.last_span.endpoints(); Err(ParseError::UnexpectedEof( endpoints.1.unwrap_or(endpoints.0), + self.state.to_string(), )) } } @@ -672,7 +673,10 @@ impl> Parser { // Nothing handled this dead state, // and we cannot discard a lookahead token, // so we have no choice but to produce an error. - Ok(Dead(invalid)) => Err(ParseError::UnexpectedToken(invalid)), + Ok(Dead(invalid)) => Err(ParseError::UnexpectedToken( + invalid, + self.state.to_string(), + )), Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()), Err(e) => Err(e.into()), @@ -835,7 +839,12 @@ pub enum ParseError { /// If this parser follows another, /// then the combinator ought to substitute a missing span with /// whatever span preceded this invocation. - UnexpectedEof(Span), + /// + /// The string is intended to describe what was expected to have been + /// available based on the current [`ParseState`]. + /// It is a heap-allocated string so that a copy of [`ParseState`] + /// needn't be stored. + UnexpectedEof(Span, String), /// The parser reached an unhandled dead state. /// @@ -844,11 +853,11 @@ pub enum ParseError { /// If that does not occur, /// [`Parser`] produces this error. /// - /// In the future, - /// it may be desirable to be able to query [`ParseState`] for what - /// tokens are acceptable at this point, - /// to provide better error messages. - UnexpectedToken(T), + /// The string is intended to describe what was expected to have been + /// available based on the current [`ParseState`]. + /// It is a heap-allocated string so that a copy of [`ParseState`] + /// needn't be stored. + UnexpectedToken(T, String), /// A parser-specific error associated with an inner /// [`ParseState`]. @@ -864,8 +873,8 @@ impl ParseError { { use ParseError::*; match self { - UnexpectedEof(x) => UnexpectedEof(x), - UnexpectedToken(x) => UnexpectedToken(x), + UnexpectedEof(span, desc) => UnexpectedEof(span, desc), + UnexpectedToken(x, desc) => UnexpectedToken(x, desc), StateError(e) => StateError(e.into()), } } @@ -880,11 +889,11 @@ impl From for ParseError { impl Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::UnexpectedEof(_) => { - write!(f, "unexpected end of input") + Self::UnexpectedEof(_, desc) => { + write!(f, "unexpected end of input while {desc}") } - Self::UnexpectedToken(_tok) => { - write!(f, "unexpected input") + Self::UnexpectedToken(_, desc) => { + write!(f, "unexpected input while {desc}") } Self::StateError(e) => Display::fmt(e, f), } @@ -907,14 +916,9 @@ impl Diagnostic use ParseError::*; match self { - // TODO: More information from the underlying parser on what was expected. - UnexpectedEof(span) => { - span.error("unexpected end of input here").into() - } + UnexpectedEof(span, desc) => span.error(desc).into(), - UnexpectedToken(tok) => { - tok.span().error("this was unexpected").into() - } + UnexpectedToken(tok, desc) => tok.span().error(desc).into(), // TODO: Is there any additional useful context we can augment // this with? @@ -1121,6 +1125,12 @@ pub mod test { } } + impl Display for EchoState { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "::fmt") + } + } + #[derive(Debug, PartialEq, Eq)] enum EchoStateError { InnerError(TestToken), @@ -1183,7 +1193,12 @@ pub mod test { // state, // we must fail when we encounter the end of the stream. assert_eq!( - Some(Err(ParseError::UnexpectedEof(span.endpoints().1.unwrap()))), + Some(Err(ParseError::UnexpectedEof( + span.endpoints().1.unwrap(), + // All the states have the same string + // (at time of writing). + EchoState::default().to_string(), + ))), sut.next() ); } @@ -1238,7 +1253,7 @@ pub mod test { let result = sut.finalize(); assert_matches!( result, - Err((_, ParseError::UnexpectedEof(s))) if s == span.endpoints().1.unwrap() + Err((_, ParseError::UnexpectedEof(s, _))) if s == span.endpoints().1.unwrap() ); // The sut should have been re-returned, @@ -1266,7 +1281,13 @@ pub mod test { // which is unhandled by any parent context // (since we're not composing parsers), // which causes an error due to an unhandled Dead state. - assert_eq!(sut.next(), Some(Err(ParseError::UnexpectedToken(tok))),); + assert_eq!( + sut.next(), + Some(Err(ParseError::UnexpectedToken( + tok, + EchoState::default().to_string() + ))), + ); } // A context can be both retrieved from a finished parser and provided diff --git a/tamer/src/xir/attr/parse.rs b/tamer/src/xir/attr/parse.rs index 1cf4d028..0bf9f5e1 100644 --- a/tamer/src/xir/attr/parse.rs +++ b/tamer/src/xir/attr/parse.rs @@ -87,6 +87,19 @@ impl Default for AttrParseState { } } +impl Display for AttrParseState { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use AttrParseState::*; + + match self { + Empty => write!(f, "expecting an attribute"), + Name(name, _) => { + write!(f, "expecting an attribute value for {name}") + } + } + } +} + /// Attribute parsing error. #[derive(Debug, PartialEq, Eq)] pub enum AttrParseError { diff --git a/tamer/src/xir/flat.rs b/tamer/src/xir/flat.rs index 716c9454..a00c2100 100644 --- a/tamer/src/xir/flat.rs +++ b/tamer/src/xir/flat.rs @@ -255,6 +255,22 @@ where } } +impl Display for State +where + SA: FlatAttrParseState, +{ + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use State::*; + + match self { + PreRoot => write!(f, "expecting document root"), + NodeExpected => write!(f, "expecting a node"), + AttrExpected(sa) => Display::fmt(sa, f), + Done => write!(f, "done parsing"), + } + } +} + impl State where SA: FlatAttrParseState, diff --git a/tamer/src/xir/flat/test.rs b/tamer/src/xir/flat/test.rs index 1c889da3..7918c6ed 100644 --- a/tamer/src/xir/flat/test.rs +++ b/tamer/src/xir/flat/test.rs @@ -22,6 +22,8 @@ //! These tests take place within the context of the XIR parsing framework, //! so they are one layer of abstraction away from unit tests. +use std::assert_matches::assert_matches; + use super::*; use crate::convert::ExpectInto; use crate::parse::{ParseError, Parsed}; @@ -87,9 +89,12 @@ fn extra_closing_tag() { let sut = parse::<1>(toks); - assert_eq!( - Err(ParseError::UnexpectedToken(XirToken::Close(Some(name), S3),)), - sut.collect::>, _>>() + assert_matches!( + sut.collect::>, _>>(), + Err(ParseError::UnexpectedToken( + XirToken::Close(Some(given_name), given_span), + _ + )) if given_name == name && given_span == S3 ); } @@ -109,9 +114,10 @@ fn extra_self_closing_tag() { let sut = parse::<1>(toks); - assert_eq!( - Err(ParseError::UnexpectedToken(XirToken::Close(None, S3),)), - sut.collect::>, _>>() + assert_matches!( + sut.collect::>, _>>(), + Err(ParseError::UnexpectedToken(XirToken::Close(None, given_span), _)) + if given_span == S3, ); } @@ -364,7 +370,7 @@ fn not_accepting_state_if_element_open() { ); // Element was not closed. - assert_eq!(Some(Err(ParseError::UnexpectedEof(S))), sut.next()); + assert_matches!(sut.next(), Some(Err(ParseError::UnexpectedEof(..)))); } // XML permits comment nodes before and after the document root element. @@ -417,11 +423,11 @@ fn content_after_root_close_error() { let sut = parse::<1>(toks); - assert_eq!( + assert_matches!( + sut.collect(), Result::>, _>::Err(ParseError::UnexpectedToken( - XirToken::Open(name, S3) - )), - sut.collect() + XirToken::Open(given_name, given_span), + _)) if given_name == name && given_span == S3 ); } diff --git a/tamer/src/xir/tree.rs b/tamer/src/xir/tree.rs index ae7c6571..4ca0ba12 100644 --- a/tamer/src/xir/tree.rs +++ b/tamer/src/xir/tree.rs @@ -617,6 +617,27 @@ impl ParseState for Stack { } } +impl Display for Stack { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + use Stack::*; + + // These are not very descriptive because XIRT is only used in tests + // at the time of writing. + // If this results in user-facing errors in the future, + // we should probably output more information. + match self { + Empty => write!(f, "waiting for a node"), + BuddingElement(_) => write!(f, "parsing an element"), + ClosedElement(_) => { + write!(f, "completing processing of an element") + } + BuddingAttrList(_, _) => write!(f, "parsing attribute list"), + AttrState(_, _, sa) => Display::fmt(sa, f), + Done => write!(f, "done parsing"), + } + } +} + impl Stack { fn begin_attrs( name: QName, diff --git a/tamer/src/xir/tree/test.rs b/tamer/src/xir/tree/test.rs index cebb16a6..ef7c399b 100644 --- a/tamer/src/xir/tree/test.rs +++ b/tamer/src/xir/tree/test.rs @@ -17,6 +17,8 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . +use std::assert_matches::assert_matches; + use super::super::parse::ParseError; use super::*; use crate::convert::ExpectInto; @@ -384,9 +386,10 @@ fn attr_parser_with_non_attr_token() { let mut sut = attr_parser_from(&mut toks); - assert_eq!( + assert_matches!( sut.next(), - Some(Err(ParseError::UnexpectedToken(Token::Open(name, *S)))) + Some(Err(ParseError::UnexpectedToken(Token::Open(given_name, given_span), _))) + if given_name == name && given_span == *S ); } @@ -416,11 +419,11 @@ fn parser_attr_multiple() { // after which some other parser can continue on the same token // stream // (using this token as a lookahead). - assert_eq!( + assert_matches!( sut.next(), Some(Err(ParseError::UnexpectedToken(Token::Text( - "nohit".into(), - *S - )))) + given_name, + given_span, + ), _))) if given_name == "nohit".into() && given_span == *S ); }