tamer: parse::state::ParseState::DeadToken: New associated type
Previously, `ParseStatus::Dead` always yielded `ParseState::Token`. However, I'm working on introducing parsers that aggregate (parsing XML attributes into structs), and those parsers do not know that they have completed aggregation until they reach a dead state; given that, I need to yield additional information at that time. I played around with a number of alternative ideas, but this ended up being the cleanest, relative to the effort involved. For example, introducing another parameter to `ParseStatus::Dead` was too burdensome on APIs that ought not concern themselves with the possibility of receiving an object in addition to a lookahead token, since many parsers are not capable of doing so (given that they map M:(N<=M)). Another option that I abandoned fairly quickly was having `is_accepting` (potentially renamed) return an aggregate object, since that's on the side and didn't feel like it was part of the parsing pipeline. The intent is to abstract this some in a new `ParseState` method for delegation + aggregation. DEV-7145main
parent
495c1438fd
commit
f14ffc87c2
|
@ -136,10 +136,11 @@ impl Display for XmloToken {
|
|||
}
|
||||
|
||||
/// A parser capable of being composed with [`XmloReader`].
|
||||
pub trait XmloState = ParseState<Token = Xirf, Context = EmptyContext>
|
||||
where
|
||||
<Self as ParseState>::Error: Into<XmloError>,
|
||||
<Self as ParseState>::Object: Into<XmloToken>;
|
||||
pub trait XmloState =
|
||||
ParseState<Token = Xirf, DeadToken = Xirf, Context = EmptyContext>
|
||||
where
|
||||
<Self as ParseState>::Error: Into<XmloError>,
|
||||
<Self as ParseState>::Object: Into<XmloToken>;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub enum XmloReader<
|
||||
|
|
|
@ -31,8 +31,8 @@ pub use lower::{Lower, LowerIter, ParsedObject};
|
|||
pub use parser::{Parsed, ParsedResult, Parser};
|
||||
pub use state::{
|
||||
context::{Context, Empty as EmptyContext, NoContext},
|
||||
ParseResult, ParseState, ParseStatus, Transition, TransitionResult,
|
||||
Transitionable,
|
||||
Aggregate, ParseResult, ParseState, ParseStatus, Transition,
|
||||
TransitionResult, Transitionable,
|
||||
};
|
||||
|
||||
use crate::span::{Span, DUMMY_SPAN};
|
||||
|
|
|
@ -52,7 +52,7 @@ where
|
|||
'b,
|
||||
I,
|
||||
Parsed<S::Object>,
|
||||
ParseError<S::Token, S::Error>,
|
||||
ParseError<S::DeadToken, S::Error>,
|
||||
>,
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,9 @@ where
|
|||
{
|
||||
/// Consume inner parser and yield its context.
|
||||
#[inline]
|
||||
fn finalize(self) -> Result<LS::Context, ParseError<LS::Token, LS::Error>> {
|
||||
fn finalize(
|
||||
self,
|
||||
) -> Result<LS::Context, ParseError<LS::DeadToken, LS::Error>> {
|
||||
self.lower.finalize().map_err(|(_, e)| e)
|
||||
}
|
||||
}
|
||||
|
@ -118,8 +120,8 @@ where
|
|||
where
|
||||
Self: Iterator<Item = ParsedResult<S>> + Sized,
|
||||
<LS as ParseState>::Context: Default,
|
||||
ParseError<S::Token, S::Error>: Into<E>,
|
||||
ParseError<LS::Token, LS::Error>: Into<E>,
|
||||
ParseError<S::DeadToken, S::Error>: Into<E>,
|
||||
ParseError<LS::DeadToken, LS::Error>: Into<E>,
|
||||
{
|
||||
self.while_ok(|toks| {
|
||||
// TODO: This parser is not accessible after error recovery!
|
||||
|
@ -144,8 +146,8 @@ where
|
|||
) -> Result<(U, LS::Context), E>
|
||||
where
|
||||
Self: Iterator<Item = ParsedResult<S>> + Sized,
|
||||
ParseError<S::Token, S::Error>: Into<E>,
|
||||
ParseError<LS::Token, LS::Error>: Into<E>,
|
||||
ParseError<S::DeadToken, S::Error>: Into<E>,
|
||||
ParseError<LS::DeadToken, LS::Error>: Into<E>,
|
||||
{
|
||||
self.while_ok(|toks| {
|
||||
let lower = LS::parse_with_context(iter::empty(), ctx);
|
||||
|
|
|
@ -104,7 +104,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
|||
/// is a decision made by the [`ParseState`].
|
||||
pub fn finalize(
|
||||
self,
|
||||
) -> Result<S::Context, (Self, ParseError<S::Token, S::Error>)> {
|
||||
) -> Result<S::Context, (Self, ParseError<S::DeadToken, S::Error>)> {
|
||||
match self.assert_accepting() {
|
||||
Ok(()) => Ok(self.ctx),
|
||||
Err(err) => Err((self, err)),
|
||||
|
@ -115,7 +115,9 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
|||
/// otherwise [`Err`] with [`ParseError::UnexpectedEof`].
|
||||
///
|
||||
/// See [`finalize`](Self::finalize) for the public-facing method.
|
||||
fn assert_accepting(&self) -> Result<(), ParseError<S::Token, S::Error>> {
|
||||
fn assert_accepting(
|
||||
&self,
|
||||
) -> Result<(), ParseError<S::DeadToken, S::Error>> {
|
||||
if self.state.is_accepting() {
|
||||
Ok(())
|
||||
} else {
|
||||
|
|
|
@ -32,8 +32,10 @@ use context::{Context, NoContext};
|
|||
|
||||
/// Result of some non-parsing operation on a [`Parser`],
|
||||
/// with any error having been wrapped in a [`ParseError`].
|
||||
pub type ParseResult<S, T> =
|
||||
Result<T, ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>;
|
||||
pub type ParseResult<S, T> = Result<
|
||||
T,
|
||||
ParseError<<S as ParseState>::DeadToken, <S as ParseState>::Error>,
|
||||
>;
|
||||
|
||||
/// Result of a parsing operation.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
@ -49,11 +51,8 @@ pub enum ParseStatus<S: ParseState> {
|
|||
|
||||
/// Parser encountered a dead state relative to the given token.
|
||||
///
|
||||
/// A dead state is an empty accepting state that has no state
|
||||
/// transition for the given token.
|
||||
/// A state is empty if a [`ParseStatus::Object`] will not be lost if
|
||||
/// parsing ends at this point
|
||||
/// (that is---there is no partially-built object).
|
||||
/// A dead state is an accepting state that has no state transition for
|
||||
/// the given token.
|
||||
/// This could simply mean that the parser has completed its job and
|
||||
/// that control must be returned to a parent context.
|
||||
///
|
||||
|
@ -68,9 +67,13 @@ pub enum ParseStatus<S: ParseState> {
|
|||
/// and that the token following it isn't something that can be
|
||||
/// parsed.
|
||||
///
|
||||
/// Certain parsers may aggregate data until reaching a dead state,
|
||||
/// in which case [`Aggregate`] may be of use to yield both a
|
||||
/// lookahead token and an aggregate [`ParseStatus::Object`].
|
||||
///
|
||||
/// If there is no parent context to handle the token,
|
||||
/// [`Parser`] must yield an error.
|
||||
Dead(S::Token),
|
||||
Dead(S::DeadToken),
|
||||
}
|
||||
|
||||
impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
|
||||
|
@ -109,6 +112,16 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
|
|||
/// otherwise-immutable [`ParseState`].
|
||||
type Context: Debug = context::Empty;
|
||||
|
||||
/// Token returned when the parser cannot perform a state transition.
|
||||
///
|
||||
/// This is generally the type of the input token itself
|
||||
/// (and so the same as [`ParseState::Token`]),
|
||||
/// which can be used as a token of lookahead.
|
||||
/// Parsers may change this type to provide additional data.
|
||||
/// For more information and a practical use case of this,
|
||||
/// see [`Aggregate`].
|
||||
type DeadToken: Token = Self::Token;
|
||||
|
||||
/// Construct a parser.
|
||||
///
|
||||
/// Whether this method is helpful or provides any clarity depends on
|
||||
|
@ -220,7 +233,8 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
|
|||
into: impl FnOnce(Self) -> SP,
|
||||
) -> TransitionResult<SP>
|
||||
where
|
||||
Self: StitchableParseState<SP>,
|
||||
Self: StitchableParseState<SP>
|
||||
+ ParseState<DeadToken = <SP as ParseState>::DeadToken>,
|
||||
C: AsMut<<Self as ParseState>::Context>,
|
||||
{
|
||||
use ParseStatus::{Dead, Incomplete, Object as Obj};
|
||||
|
@ -233,7 +247,7 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
|
|||
Transition(into(newst)).result(match result {
|
||||
Ok(Incomplete) => Ok(Incomplete),
|
||||
Ok(Obj(obj)) => Ok(Obj(obj.into())),
|
||||
Ok(Dead(tok)) => Ok(Dead(tok)),
|
||||
Ok(Dead(tok)) => Ok(Dead(tok.into())),
|
||||
Err(e) => Err(e.into()),
|
||||
})
|
||||
}
|
||||
|
@ -251,7 +265,10 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
|
|||
mut context: C,
|
||||
tok: <Self as ParseState>::Token,
|
||||
into: impl FnOnce(Self) -> SP,
|
||||
) -> ControlFlow<TransitionResult<SP>, (Self, <Self as ParseState>::Token, C)>
|
||||
) -> ControlFlow<
|
||||
TransitionResult<SP>,
|
||||
(Self, <Self as ParseState>::DeadToken, C),
|
||||
>
|
||||
where
|
||||
Self: StitchableParseState<SP>,
|
||||
C: AsMut<<Self as ParseState>::Context>,
|
||||
|
@ -302,6 +319,37 @@ where
|
|||
<Self as ParseState>::Object: Into<<SP as ParseState>::Object>,
|
||||
<Self as ParseState>::Error: Into<<SP as ParseState>::Error>;
|
||||
|
||||
/// Indicates that a parser has completed an aggregate operation,
|
||||
/// marked by having reached a [dead state](ParseStatus::Dead).
|
||||
///
|
||||
/// This struct is compatible with [`ParseState::DeadToken`] and is intended
|
||||
/// to be used with parsers that continue to aggregate data until they no
|
||||
/// longer can.
|
||||
/// For example,
|
||||
/// an attribute parser may continue to parse element attributes until it
|
||||
/// reaches the end of the attribute list,
|
||||
/// which cannot be determined until reading a [`ParseState::Token`]
|
||||
/// that must result in a [`ParseStatus::Dead`].
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Aggregate<O: Object, T: Token>(pub O, pub T);
|
||||
|
||||
impl<O: Object, T: Token> Token for Aggregate<O, T> {
|
||||
fn span(&self) -> crate::span::Span {
|
||||
let Aggregate(_, tok) = self;
|
||||
tok.span()
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Object, T: Token> Object for Aggregate<O, T> {}
|
||||
|
||||
impl<O: Object, T: Token> Display for Aggregate<O, T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Aggregate(_obj, tok) => write!(f, "{tok} with associated object"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod transition {
|
||||
use super::{ParseState, ParseStateResult, ParseStatus};
|
||||
use std::{
|
||||
|
@ -380,7 +428,7 @@ mod transition {
|
|||
/// This corresponds to [`ParseStatus::Dead`],
|
||||
/// and a calling parser should use the provided [`Token`] as
|
||||
/// lookahead.
|
||||
pub fn dead(self, tok: S::Token) -> TransitionResult<S> {
|
||||
pub fn dead(self, tok: S::DeadToken) -> TransitionResult<S> {
|
||||
TransitionResult(self, Ok(ParseStatus::Dead(tok)))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -166,7 +166,7 @@ impl From<Attr> for XirfToken {
|
|||
|
||||
/// XIRF-compatible attribute parser.
|
||||
pub trait FlatAttrParseState<const MAX_DEPTH: usize> =
|
||||
ParseState<Token = XirToken, Object = Attr>
|
||||
ParseState<Token = XirToken, DeadToken = XirToken, Object = Attr>
|
||||
where
|
||||
<Self as ParseState>::Error: Into<XirToXirfError>,
|
||||
StateContext<MAX_DEPTH>: AsMut<<Self as ParseState>::Context>;
|
||||
|
|
|
@ -503,10 +503,11 @@ where
|
|||
Done,
|
||||
}
|
||||
|
||||
pub trait StackAttrParseState = ParseState<Token = XirToken, Object = Attr>
|
||||
where
|
||||
<Self as ParseState>::Error: Into<StackError>,
|
||||
EmptyContext: AsMut<<Self as ParseState>::Context>;
|
||||
pub trait StackAttrParseState =
|
||||
ParseState<Token = XirToken, DeadToken = XirToken, Object = Attr>
|
||||
where
|
||||
<Self as ParseState>::Error: Into<StackError>,
|
||||
EmptyContext: AsMut<<Self as ParseState>::Context>;
|
||||
|
||||
impl<SA: StackAttrParseState> Default for Stack<SA> {
|
||||
fn default() -> Self {
|
||||
|
|
Loading…
Reference in New Issue