From f14ffc87c2727ca9abb8548b39ab2046ee9e85cc Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Tue, 7 Jun 2022 09:21:53 -0400 Subject: [PATCH] tamer: parse::state::ParseState::DeadToken: New associated type Previously, `ParseStatus::Dead` always yielded `ParseState::Token`. However, I'm working on introducing parsers that aggregate (parsing XML attributes into structs), and those parsers do not know that they have completed aggregation until they reach a dead state; given that, I need to yield additional information at that time. I played around with a number of alternative ideas, but this ended up being the cleanest, relative to the effort involved. For example, introducing another parameter to `ParseStatus::Dead` was too burdensome on APIs that ought not concern themselves with the possibility of receiving an object in addition to a lookahead token, since many parsers are not capable of doing so (given that they map M:(N<=M)). Another option that I abandoned fairly quickly was having `is_accepting` (potentially renamed) return an aggregate object, since that's on the side and didn't feel like it was part of the parsing pipeline. The intent is to abstract this some in a new `ParseState` method for delegation + aggregation. DEV-7145 --- tamer/src/obj/xmlo/reader.rs | 9 +++-- tamer/src/parse.rs | 4 +- tamer/src/parse/lower.rs | 14 ++++--- tamer/src/parse/parser.rs | 6 ++- tamer/src/parse/state.rs | 72 ++++++++++++++++++++++++++++++------ tamer/src/xir/flat.rs | 2 +- tamer/src/xir/tree.rs | 9 +++-- 7 files changed, 85 insertions(+), 31 deletions(-) diff --git a/tamer/src/obj/xmlo/reader.rs b/tamer/src/obj/xmlo/reader.rs index f89468f8..2242426c 100644 --- a/tamer/src/obj/xmlo/reader.rs +++ b/tamer/src/obj/xmlo/reader.rs @@ -136,10 +136,11 @@ impl Display for XmloToken { } /// A parser capable of being composed with [`XmloReader`]. -pub trait XmloState = ParseState -where - ::Error: Into, - ::Object: Into; +pub trait XmloState = + ParseState + where + ::Error: Into, + ::Object: Into; #[derive(Debug, Default, PartialEq, Eq)] pub enum XmloReader< diff --git a/tamer/src/parse.rs b/tamer/src/parse.rs index 58f5c339..f594fa18 100644 --- a/tamer/src/parse.rs +++ b/tamer/src/parse.rs @@ -31,8 +31,8 @@ pub use lower::{Lower, LowerIter, ParsedObject}; pub use parser::{Parsed, ParsedResult, Parser}; pub use state::{ context::{Context, Empty as EmptyContext, NoContext}, - ParseResult, ParseState, ParseStatus, Transition, TransitionResult, - Transitionable, + Aggregate, ParseResult, ParseState, ParseStatus, Transition, + TransitionResult, Transitionable, }; use crate::span::{Span, DUMMY_SPAN}; diff --git a/tamer/src/parse/lower.rs b/tamer/src/parse/lower.rs index cb999476..672f7b99 100644 --- a/tamer/src/parse/lower.rs +++ b/tamer/src/parse/lower.rs @@ -52,7 +52,7 @@ where 'b, I, Parsed, - ParseError, + ParseError, >, } @@ -65,7 +65,9 @@ where { /// Consume inner parser and yield its context. #[inline] - fn finalize(self) -> Result> { + fn finalize( + self, + ) -> Result> { self.lower.finalize().map_err(|(_, e)| e) } } @@ -118,8 +120,8 @@ where where Self: Iterator> + Sized, ::Context: Default, - ParseError: Into, - ParseError: Into, + ParseError: Into, + ParseError: Into, { self.while_ok(|toks| { // TODO: This parser is not accessible after error recovery! @@ -144,8 +146,8 @@ where ) -> Result<(U, LS::Context), E> where Self: Iterator> + Sized, - ParseError: Into, - ParseError: Into, + ParseError: Into, + ParseError: Into, { self.while_ok(|toks| { let lower = LS::parse_with_context(iter::empty(), ctx); diff --git a/tamer/src/parse/parser.rs b/tamer/src/parse/parser.rs index 6ecbb065..b67944d3 100644 --- a/tamer/src/parse/parser.rs +++ b/tamer/src/parse/parser.rs @@ -104,7 +104,7 @@ impl> Parser { /// is a decision made by the [`ParseState`]. pub fn finalize( self, - ) -> Result)> { + ) -> Result)> { match self.assert_accepting() { Ok(()) => Ok(self.ctx), Err(err) => Err((self, err)), @@ -115,7 +115,9 @@ impl> Parser { /// otherwise [`Err`] with [`ParseError::UnexpectedEof`]. /// /// See [`finalize`](Self::finalize) for the public-facing method. - fn assert_accepting(&self) -> Result<(), ParseError> { + fn assert_accepting( + &self, + ) -> Result<(), ParseError> { if self.state.is_accepting() { Ok(()) } else { diff --git a/tamer/src/parse/state.rs b/tamer/src/parse/state.rs index fe12cb71..4701e5a3 100644 --- a/tamer/src/parse/state.rs +++ b/tamer/src/parse/state.rs @@ -32,8 +32,10 @@ use context::{Context, NoContext}; /// Result of some non-parsing operation on a [`Parser`], /// with any error having been wrapped in a [`ParseError`]. -pub type ParseResult = - Result::Token, ::Error>>; +pub type ParseResult = Result< + T, + ParseError<::DeadToken, ::Error>, +>; /// Result of a parsing operation. #[derive(Debug, PartialEq, Eq)] @@ -49,11 +51,8 @@ pub enum ParseStatus { /// Parser encountered a dead state relative to the given token. /// - /// A dead state is an empty accepting state that has no state - /// transition for the given token. - /// A state is empty if a [`ParseStatus::Object`] will not be lost if - /// parsing ends at this point - /// (that is---there is no partially-built object). + /// A dead state is an accepting state that has no state transition for + /// the given token. /// This could simply mean that the parser has completed its job and /// that control must be returned to a parent context. /// @@ -68,9 +67,13 @@ pub enum ParseStatus { /// and that the token following it isn't something that can be /// parsed. /// + /// Certain parsers may aggregate data until reaching a dead state, + /// in which case [`Aggregate`] may be of use to yield both a + /// lookahead token and an aggregate [`ParseStatus::Object`]. + /// /// If there is no parent context to handle the token, /// [`Parser`] must yield an error. - Dead(S::Token), + Dead(S::DeadToken), } impl, T: Object> From for ParseStatus { @@ -109,6 +112,16 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug { /// otherwise-immutable [`ParseState`]. type Context: Debug = context::Empty; + /// Token returned when the parser cannot perform a state transition. + /// + /// This is generally the type of the input token itself + /// (and so the same as [`ParseState::Token`]), + /// which can be used as a token of lookahead. + /// Parsers may change this type to provide additional data. + /// For more information and a practical use case of this, + /// see [`Aggregate`]. + type DeadToken: Token = Self::Token; + /// Construct a parser. /// /// Whether this method is helpful or provides any clarity depends on @@ -220,7 +233,8 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug { into: impl FnOnce(Self) -> SP, ) -> TransitionResult where - Self: StitchableParseState, + Self: StitchableParseState + + ParseState::DeadToken>, C: AsMut<::Context>, { use ParseStatus::{Dead, Incomplete, Object as Obj}; @@ -233,7 +247,7 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug { Transition(into(newst)).result(match result { Ok(Incomplete) => Ok(Incomplete), Ok(Obj(obj)) => Ok(Obj(obj.into())), - Ok(Dead(tok)) => Ok(Dead(tok)), + Ok(Dead(tok)) => Ok(Dead(tok.into())), Err(e) => Err(e.into()), }) } @@ -251,7 +265,10 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug { mut context: C, tok: ::Token, into: impl FnOnce(Self) -> SP, - ) -> ControlFlow, (Self, ::Token, C)> + ) -> ControlFlow< + TransitionResult, + (Self, ::DeadToken, C), + > where Self: StitchableParseState, C: AsMut<::Context>, @@ -302,6 +319,37 @@ where ::Object: Into<::Object>, ::Error: Into<::Error>; +/// Indicates that a parser has completed an aggregate operation, +/// marked by having reached a [dead state](ParseStatus::Dead). +/// +/// This struct is compatible with [`ParseState::DeadToken`] and is intended +/// to be used with parsers that continue to aggregate data until they no +/// longer can. +/// For example, +/// an attribute parser may continue to parse element attributes until it +/// reaches the end of the attribute list, +/// which cannot be determined until reading a [`ParseState::Token`] +/// that must result in a [`ParseStatus::Dead`]. +#[derive(Debug, PartialEq, Eq)] +pub struct Aggregate(pub O, pub T); + +impl Token for Aggregate { + fn span(&self) -> crate::span::Span { + let Aggregate(_, tok) = self; + tok.span() + } +} + +impl Object for Aggregate {} + +impl Display for Aggregate { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Aggregate(_obj, tok) => write!(f, "{tok} with associated object"), + } + } +} + mod transition { use super::{ParseState, ParseStateResult, ParseStatus}; use std::{ @@ -380,7 +428,7 @@ mod transition { /// This corresponds to [`ParseStatus::Dead`], /// and a calling parser should use the provided [`Token`] as /// lookahead. - pub fn dead(self, tok: S::Token) -> TransitionResult { + pub fn dead(self, tok: S::DeadToken) -> TransitionResult { TransitionResult(self, Ok(ParseStatus::Dead(tok))) } } diff --git a/tamer/src/xir/flat.rs b/tamer/src/xir/flat.rs index 1a9ab698..835ac906 100644 --- a/tamer/src/xir/flat.rs +++ b/tamer/src/xir/flat.rs @@ -166,7 +166,7 @@ impl From for XirfToken { /// XIRF-compatible attribute parser. pub trait FlatAttrParseState = - ParseState + ParseState where ::Error: Into, StateContext: AsMut<::Context>; diff --git a/tamer/src/xir/tree.rs b/tamer/src/xir/tree.rs index 4ca0ba12..a61483f7 100644 --- a/tamer/src/xir/tree.rs +++ b/tamer/src/xir/tree.rs @@ -503,10 +503,11 @@ where Done, } -pub trait StackAttrParseState = ParseState -where - ::Error: Into, - EmptyContext: AsMut<::Context>; +pub trait StackAttrParseState = + ParseState + where + ::Error: Into, + EmptyContext: AsMut<::Context>; impl Default for Stack { fn default() -> Self {