tamer: parse::ParseState: Remove Default trait bound
`ParseState` originally required `Default` for use with `mem::take` in `Parser::feed_tok`. This unfortunately cannot last, since more specialized parsers require context during initialization in order to provide useful diagnostic information. (The other option is to require the caller to augment errors with diagnostic information, but that would have to be duplicated by every caller and complicates parser composition; I'd prefer those diagnostic details remain encapsulated.) Replacing `Default` with `Option` is uglier, but it ends up producing the same assembly as `mem::take` did, at least at the time of writing. Because Rust is able to elide unnecessary moves using this implementation, there is no need for `unwrap_unchecked` or other unsafe methods, which is great, since it shows that this parsing methodology is viable entirely in safe Rust. DEV-7145main
parent
f14ffc87c2
commit
3c227e5a2d
|
@ -139,6 +139,7 @@ impl Display for XmloToken {
|
|||
pub trait XmloState =
|
||||
ParseState<Token = Xirf, DeadToken = Xirf, Context = EmptyContext>
|
||||
where
|
||||
Self: Default,
|
||||
<Self as ParseState>::Error: Into<XmloError>,
|
||||
<Self as ParseState>::Object: Into<XmloToken>;
|
||||
|
||||
|
|
|
@ -73,10 +73,13 @@ where
|
|||
}
|
||||
|
||||
/// Lowering operation from one [`ParseState`] to another.
|
||||
///
|
||||
/// Lowering is intended to be used between standalone [`ParseState`]s that
|
||||
/// implement [`Default`].
|
||||
pub trait Lower<S, LS>
|
||||
where
|
||||
S: ParseState,
|
||||
LS: ParseState<Token = S::Object>,
|
||||
LS: ParseState<Token = S::Object> + Default,
|
||||
<S as ParseState>::Object: Token,
|
||||
{
|
||||
/// Lower the IR produced by this [`Parser`] into another IR by piping
|
||||
|
@ -163,7 +166,7 @@ impl<S, LS, I> Lower<S, LS> for I
|
|||
where
|
||||
I: Iterator<Item = ParsedResult<S>> + Sized,
|
||||
S: ParseState,
|
||||
LS: ParseState<Token = S::Object>,
|
||||
LS: ParseState<Token = S::Object> + Default,
|
||||
<S as ParseState>::Object: Token,
|
||||
{
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ use super::{
|
|||
TransitionResult,
|
||||
};
|
||||
use crate::span::{Span, UNKNOWN_SPAN};
|
||||
use std::mem::take;
|
||||
|
||||
#[cfg(doc)]
|
||||
use super::Token;
|
||||
|
@ -79,7 +78,30 @@ impl<S: ParseState> From<ParseStatus<S>> for Parsed<S::Object> {
|
|||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
|
||||
toks: I,
|
||||
state: S,
|
||||
/// Parsing automaton.
|
||||
///
|
||||
/// This [`ParseState`] is stored within an [`Option`] to allow for
|
||||
/// taking ownership for [`ParseState::parse_token`];
|
||||
/// this is where the functional [`ParseState`] is married with
|
||||
/// mutable state.
|
||||
///
|
||||
/// The alternative here is to forego [`Option`] and mandate [`Default`]
|
||||
/// on [`ParseState`],
|
||||
/// which used to be the case in an older implementation.
|
||||
/// However,
|
||||
/// while that requirement was nice to have here,
|
||||
/// it forces [`ParseState`] to be initialized without context,
|
||||
/// which significantly reduces the quality of diagnostic output
|
||||
/// unless augmented by the calling context,
|
||||
/// which puts a much greater burden on the caller.
|
||||
///
|
||||
/// This will only ever contain [`None`] during a the call to
|
||||
/// [`ParseState::parse_token`] in [`Parser::feed_tok`],
|
||||
/// so it is safe to call [`unwrap`] without worrying about panics.
|
||||
///
|
||||
/// For more information,
|
||||
/// see the implementation of [`Parser::feed_tok`].
|
||||
state: Option<S>,
|
||||
last_span: Span,
|
||||
ctx: S::Context,
|
||||
}
|
||||
|
@ -118,13 +140,13 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
|||
fn assert_accepting(
|
||||
&self,
|
||||
) -> Result<(), ParseError<S::DeadToken, S::Error>> {
|
||||
if self.state.is_accepting() {
|
||||
if self.state.as_ref().unwrap().is_accepting() {
|
||||
Ok(())
|
||||
} else {
|
||||
let endpoints = self.last_span.endpoints();
|
||||
Err(ParseError::UnexpectedEof(
|
||||
endpoints.1.unwrap_or(endpoints.0),
|
||||
self.state.to_string(),
|
||||
self.state.as_ref().unwrap().to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
@ -146,9 +168,20 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
|||
// reporting in case we encounter an EOF.
|
||||
self.last_span = tok.span();
|
||||
|
||||
let result;
|
||||
TransitionResult(Transition(self.state), result) =
|
||||
take(&mut self.state).parse_token(tok, &mut self.ctx);
|
||||
// Parse a single token and perform the requested state transition.
|
||||
//
|
||||
// This is where the functional `ParseState` is married with a
|
||||
// mutable abstraction.
|
||||
// Rust will optimize away this take+move+replace under most
|
||||
// circumstances;
|
||||
// if it does not,
|
||||
// then you should utilize `ctx` as necessary.
|
||||
//
|
||||
// Note that this used to use `mem::take`,
|
||||
// and the generated assembly was identical in both cases.
|
||||
let TransitionResult(Transition(state), result) =
|
||||
self.state.take().unwrap().parse_token(tok, &mut self.ctx);
|
||||
self.state.replace(state);
|
||||
|
||||
use ParseStatus::*;
|
||||
match result {
|
||||
|
@ -157,7 +190,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
|||
// so we have no choice but to produce an error.
|
||||
Ok(Dead(invalid)) => Err(ParseError::UnexpectedToken(
|
||||
invalid,
|
||||
self.state.to_string(),
|
||||
self.state.as_ref().unwrap().to_string(),
|
||||
)),
|
||||
|
||||
Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
|
||||
|
@ -198,7 +231,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
|
|||
|
||||
impl<S, I> From<I> for Parser<S, I>
|
||||
where
|
||||
S: ParseState,
|
||||
S: ParseState + Default,
|
||||
I: TokenStream<S::Token>,
|
||||
<S as ParseState>::Context: Default,
|
||||
{
|
||||
|
@ -213,7 +246,7 @@ where
|
|||
fn from(toks: I) -> Self {
|
||||
Self {
|
||||
toks,
|
||||
state: Default::default(),
|
||||
state: Some(Default::default()),
|
||||
last_span: UNKNOWN_SPAN,
|
||||
ctx: Default::default(),
|
||||
}
|
||||
|
@ -222,7 +255,7 @@ where
|
|||
|
||||
impl<S, I, C> From<(I, C)> for Parser<S, I>
|
||||
where
|
||||
S: ParseState<Context = C>,
|
||||
S: ParseState<Context = C> + Default,
|
||||
I: TokenStream<S::Token>,
|
||||
{
|
||||
/// Create a new parser with a provided context.
|
||||
|
@ -234,7 +267,7 @@ where
|
|||
fn from((toks, ctx): (I, C)) -> Self {
|
||||
Self {
|
||||
toks,
|
||||
state: Default::default(),
|
||||
state: Some(Default::default()),
|
||||
last_span: UNKNOWN_SPAN,
|
||||
ctx,
|
||||
}
|
||||
|
|
|
@ -95,7 +95,15 @@ impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
|
|||
/// Whatever the underlying automaton,
|
||||
/// a `(state, token, context)` triple must uniquely determine the next
|
||||
/// parser action.
|
||||
pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
|
||||
///
|
||||
/// A [`ParseState`] is not required to implement [`Default`],
|
||||
/// but it is afforded a number of API conveniences if it does not require
|
||||
/// context for initialization.
|
||||
/// This is generally true for standalone parsers,
|
||||
/// but is not necessarily true for smaller, specialized parsers intended
|
||||
/// for use as components of a larger parser
|
||||
/// (in a spirit similar to parser combinators).
|
||||
pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
|
||||
/// Input tokens to the parser.
|
||||
type Token: Token;
|
||||
|
||||
|
@ -122,18 +130,20 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
|
|||
/// see [`Aggregate`].
|
||||
type DeadToken: Token = Self::Token;
|
||||
|
||||
/// Construct a parser.
|
||||
/// Construct a parser with a [`Default`] state.
|
||||
///
|
||||
/// Whether this method is helpful or provides any clarity depends on
|
||||
/// the context and the types that are able to be inferred.
|
||||
fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I>
|
||||
where
|
||||
Self: Default,
|
||||
Self::Context: Default,
|
||||
{
|
||||
Parser::from(toks)
|
||||
}
|
||||
|
||||
/// Construct a parser with a non-default [`ParseState::Context`].
|
||||
/// Construct a parser with a [`Default`] state but a non-default
|
||||
/// [`ParseState::Context`].
|
||||
///
|
||||
/// This is useful in two ways:
|
||||
///
|
||||
|
@ -150,7 +160,10 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
|
|||
fn parse_with_context<I: TokenStream<Self::Token>>(
|
||||
toks: I,
|
||||
ctx: Self::Context,
|
||||
) -> Parser<Self, I> {
|
||||
) -> Parser<Self, I>
|
||||
where
|
||||
Self: Default,
|
||||
{
|
||||
Parser::from((toks, ctx))
|
||||
}
|
||||
|
||||
|
|
|
@ -168,6 +168,7 @@ impl From<Attr> for XirfToken {
|
|||
pub trait FlatAttrParseState<const MAX_DEPTH: usize> =
|
||||
ParseState<Token = XirToken, DeadToken = XirToken, Object = Attr>
|
||||
where
|
||||
Self: Default,
|
||||
<Self as ParseState>::Error: Into<XirToXirfError>,
|
||||
StateContext<MAX_DEPTH>: AsMut<<Self as ParseState>::Context>;
|
||||
|
||||
|
|
|
@ -506,6 +506,7 @@ where
|
|||
pub trait StackAttrParseState =
|
||||
ParseState<Token = XirToken, DeadToken = XirToken, Object = Attr>
|
||||
where
|
||||
Self: Default,
|
||||
<Self as ParseState>::Error: Into<StackError>,
|
||||
EmptyContext: AsMut<<Self as ParseState>::Context>;
|
||||
|
||||
|
|
Loading…
Reference in New Issue