tamer: parse::state::ParseState::DeadToken: New associated type

Previously, `ParseStatus::Dead` always yielded
`ParseState::Token`.  However, I'm working on introducing parsers that
aggregate (parsing XML attributes into structs), and those parsers do not
know that they have completed aggregation until they reach a dead state;
given that, I need to yield additional information at that time.

I played around with a number of alternative ideas, but this ended up being
the cleanest, relative to the effort involved.  For example, introducing
another parameter to `ParseStatus::Dead` was too burdensome on APIs that
ought not concern themselves with the possibility of receiving an object in
addition to a lookahead token, since many parsers are not capable of doing
so (given that they map M:(N<=M)).

Another option that I abandoned fairly quickly was having
`is_accepting` (potentially renamed) return an aggregate object, since
that's on the side and didn't feel like it was part of the parsing pipeline.

The intent is to abstract this some in a new `ParseState` method for
delegation + aggregation.

DEV-7145
main
Mike Gerwitz 2022-06-07 09:21:53 -04:00
parent 495c1438fd
commit f14ffc87c2
7 changed files with 85 additions and 31 deletions

View File

@ -136,10 +136,11 @@ impl Display for XmloToken {
}
/// A parser capable of being composed with [`XmloReader`].
pub trait XmloState = ParseState<Token = Xirf, Context = EmptyContext>
where
<Self as ParseState>::Error: Into<XmloError>,
<Self as ParseState>::Object: Into<XmloToken>;
pub trait XmloState =
ParseState<Token = Xirf, DeadToken = Xirf, Context = EmptyContext>
where
<Self as ParseState>::Error: Into<XmloError>,
<Self as ParseState>::Object: Into<XmloToken>;
#[derive(Debug, Default, PartialEq, Eq)]
pub enum XmloReader<

View File

@ -31,8 +31,8 @@ pub use lower::{Lower, LowerIter, ParsedObject};
pub use parser::{Parsed, ParsedResult, Parser};
pub use state::{
context::{Context, Empty as EmptyContext, NoContext},
ParseResult, ParseState, ParseStatus, Transition, TransitionResult,
Transitionable,
Aggregate, ParseResult, ParseState, ParseStatus, Transition,
TransitionResult, Transitionable,
};
use crate::span::{Span, DUMMY_SPAN};

View File

@ -52,7 +52,7 @@ where
'b,
I,
Parsed<S::Object>,
ParseError<S::Token, S::Error>,
ParseError<S::DeadToken, S::Error>,
>,
}
@ -65,7 +65,9 @@ where
{
/// Consume inner parser and yield its context.
#[inline]
fn finalize(self) -> Result<LS::Context, ParseError<LS::Token, LS::Error>> {
fn finalize(
self,
) -> Result<LS::Context, ParseError<LS::DeadToken, LS::Error>> {
self.lower.finalize().map_err(|(_, e)| e)
}
}
@ -118,8 +120,8 @@ where
where
Self: Iterator<Item = ParsedResult<S>> + Sized,
<LS as ParseState>::Context: Default,
ParseError<S::Token, S::Error>: Into<E>,
ParseError<LS::Token, LS::Error>: Into<E>,
ParseError<S::DeadToken, S::Error>: Into<E>,
ParseError<LS::DeadToken, LS::Error>: Into<E>,
{
self.while_ok(|toks| {
// TODO: This parser is not accessible after error recovery!
@ -144,8 +146,8 @@ where
) -> Result<(U, LS::Context), E>
where
Self: Iterator<Item = ParsedResult<S>> + Sized,
ParseError<S::Token, S::Error>: Into<E>,
ParseError<LS::Token, LS::Error>: Into<E>,
ParseError<S::DeadToken, S::Error>: Into<E>,
ParseError<LS::DeadToken, LS::Error>: Into<E>,
{
self.while_ok(|toks| {
let lower = LS::parse_with_context(iter::empty(), ctx);

View File

@ -104,7 +104,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// is a decision made by the [`ParseState`].
pub fn finalize(
self,
) -> Result<S::Context, (Self, ParseError<S::Token, S::Error>)> {
) -> Result<S::Context, (Self, ParseError<S::DeadToken, S::Error>)> {
match self.assert_accepting() {
Ok(()) => Ok(self.ctx),
Err(err) => Err((self, err)),
@ -115,7 +115,9 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
/// otherwise [`Err`] with [`ParseError::UnexpectedEof`].
///
/// See [`finalize`](Self::finalize) for the public-facing method.
fn assert_accepting(&self) -> Result<(), ParseError<S::Token, S::Error>> {
fn assert_accepting(
&self,
) -> Result<(), ParseError<S::DeadToken, S::Error>> {
if self.state.is_accepting() {
Ok(())
} else {

View File

@ -32,8 +32,10 @@ use context::{Context, NoContext};
/// Result of some non-parsing operation on a [`Parser`],
/// with any error having been wrapped in a [`ParseError`].
pub type ParseResult<S, T> =
Result<T, ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>;
pub type ParseResult<S, T> = Result<
T,
ParseError<<S as ParseState>::DeadToken, <S as ParseState>::Error>,
>;
/// Result of a parsing operation.
#[derive(Debug, PartialEq, Eq)]
@ -49,11 +51,8 @@ pub enum ParseStatus<S: ParseState> {
/// Parser encountered a dead state relative to the given token.
///
/// A dead state is an empty accepting state that has no state
/// transition for the given token.
/// A state is empty if a [`ParseStatus::Object`] will not be lost if
/// parsing ends at this point
/// (that is---there is no partially-built object).
/// A dead state is an accepting state that has no state transition for
/// the given token.
/// This could simply mean that the parser has completed its job and
/// that control must be returned to a parent context.
///
@ -68,9 +67,13 @@ pub enum ParseStatus<S: ParseState> {
/// and that the token following it isn't something that can be
/// parsed.
///
/// Certain parsers may aggregate data until reaching a dead state,
/// in which case [`Aggregate`] may be of use to yield both a
/// lookahead token and an aggregate [`ParseStatus::Object`].
///
/// If there is no parent context to handle the token,
/// [`Parser`] must yield an error.
Dead(S::Token),
Dead(S::DeadToken),
}
impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
@ -109,6 +112,16 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
/// otherwise-immutable [`ParseState`].
type Context: Debug = context::Empty;
/// Token returned when the parser cannot perform a state transition.
///
/// This is generally the type of the input token itself
/// (and so the same as [`ParseState::Token`]),
/// which can be used as a token of lookahead.
/// Parsers may change this type to provide additional data.
/// For more information and a practical use case of this,
/// see [`Aggregate`].
type DeadToken: Token = Self::Token;
/// Construct a parser.
///
/// Whether this method is helpful or provides any clarity depends on
@ -220,7 +233,8 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
into: impl FnOnce(Self) -> SP,
) -> TransitionResult<SP>
where
Self: StitchableParseState<SP>,
Self: StitchableParseState<SP>
+ ParseState<DeadToken = <SP as ParseState>::DeadToken>,
C: AsMut<<Self as ParseState>::Context>,
{
use ParseStatus::{Dead, Incomplete, Object as Obj};
@ -233,7 +247,7 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
Transition(into(newst)).result(match result {
Ok(Incomplete) => Ok(Incomplete),
Ok(Obj(obj)) => Ok(Obj(obj.into())),
Ok(Dead(tok)) => Ok(Dead(tok)),
Ok(Dead(tok)) => Ok(Dead(tok.into())),
Err(e) => Err(e.into()),
})
}
@ -251,7 +265,10 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
mut context: C,
tok: <Self as ParseState>::Token,
into: impl FnOnce(Self) -> SP,
) -> ControlFlow<TransitionResult<SP>, (Self, <Self as ParseState>::Token, C)>
) -> ControlFlow<
TransitionResult<SP>,
(Self, <Self as ParseState>::DeadToken, C),
>
where
Self: StitchableParseState<SP>,
C: AsMut<<Self as ParseState>::Context>,
@ -302,6 +319,37 @@ where
<Self as ParseState>::Object: Into<<SP as ParseState>::Object>,
<Self as ParseState>::Error: Into<<SP as ParseState>::Error>;
/// Indicates that a parser has completed an aggregate operation,
/// marked by having reached a [dead state](ParseStatus::Dead).
///
/// This struct is compatible with [`ParseState::DeadToken`] and is intended
/// to be used with parsers that continue to aggregate data until they no
/// longer can.
/// For example,
/// an attribute parser may continue to parse element attributes until it
/// reaches the end of the attribute list,
/// which cannot be determined until reading a [`ParseState::Token`]
/// that must result in a [`ParseStatus::Dead`].
#[derive(Debug, PartialEq, Eq)]
pub struct Aggregate<O: Object, T: Token>(pub O, pub T);
impl<O: Object, T: Token> Token for Aggregate<O, T> {
fn span(&self) -> crate::span::Span {
let Aggregate(_, tok) = self;
tok.span()
}
}
impl<O: Object, T: Token> Object for Aggregate<O, T> {}
impl<O: Object, T: Token> Display for Aggregate<O, T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Aggregate(_obj, tok) => write!(f, "{tok} with associated object"),
}
}
}
mod transition {
use super::{ParseState, ParseStateResult, ParseStatus};
use std::{
@ -380,7 +428,7 @@ mod transition {
/// This corresponds to [`ParseStatus::Dead`],
/// and a calling parser should use the provided [`Token`] as
/// lookahead.
pub fn dead(self, tok: S::Token) -> TransitionResult<S> {
pub fn dead(self, tok: S::DeadToken) -> TransitionResult<S> {
TransitionResult(self, Ok(ParseStatus::Dead(tok)))
}
}

View File

@ -166,7 +166,7 @@ impl From<Attr> for XirfToken {
/// XIRF-compatible attribute parser.
pub trait FlatAttrParseState<const MAX_DEPTH: usize> =
ParseState<Token = XirToken, Object = Attr>
ParseState<Token = XirToken, DeadToken = XirToken, Object = Attr>
where
<Self as ParseState>::Error: Into<XirToXirfError>,
StateContext<MAX_DEPTH>: AsMut<<Self as ParseState>::Context>;

View File

@ -503,10 +503,11 @@ where
Done,
}
pub trait StackAttrParseState = ParseState<Token = XirToken, Object = Attr>
where
<Self as ParseState>::Error: Into<StackError>,
EmptyContext: AsMut<<Self as ParseState>::Context>;
pub trait StackAttrParseState =
ParseState<Token = XirToken, DeadToken = XirToken, Object = Attr>
where
<Self as ParseState>::Error: Into<StackError>,
EmptyContext: AsMut<<Self as ParseState>::Context>;
impl<SA: StackAttrParseState> Default for Stack<SA> {
fn default() -> Self {