tamer: parse::ParseState: Remove Default trait bound

`ParseState` originally required `Default` for use with `mem::take` in
`Parser::feed_tok`.  This unfortunately cannot last, since more specialized
parsers require context during initialization in order to provide useful
diagnostic information.  (The other option is to require the caller to
augment errors with diagnostic information, but that would have to be
duplicated by every caller and complicates parser composition; I'd prefer
those diagnostic details remain encapsulated.)

Replacing `Default` with `Option` is uglier, but it ends up producing the
same assembly as `mem::take` did, at least at the time of writing.  Because
Rust is able to elide unnecessary moves using this implementation, there is
no need for `unwrap_unchecked` or other unsafe methods, which is great,
since it shows that this parsing methodology is viable entirely in safe
Rust.

DEV-7145
main
Mike Gerwitz 2022-06-07 15:02:41 -04:00
parent f14ffc87c2
commit 3c227e5a2d
6 changed files with 70 additions and 18 deletions

View File

@ -139,6 +139,7 @@ impl Display for XmloToken {
pub trait XmloState =
ParseState<Token = Xirf, DeadToken = Xirf, Context = EmptyContext>
where
Self: Default,
<Self as ParseState>::Error: Into<XmloError>,
<Self as ParseState>::Object: Into<XmloToken>;

View File

@ -73,10 +73,13 @@ where
}
/// Lowering operation from one [`ParseState`] to another.
///
/// Lowering is intended to be used between standalone [`ParseState`]s that
/// implement [`Default`].
pub trait Lower<S, LS>
where
S: ParseState,
LS: ParseState<Token = S::Object>,
LS: ParseState<Token = S::Object> + Default,
<S as ParseState>::Object: Token,
{
/// Lower the IR produced by this [`Parser`] into another IR by piping
@ -163,7 +166,7 @@ impl<S, LS, I> Lower<S, LS> for I
where
I: Iterator<Item = ParsedResult<S>> + Sized,
S: ParseState,
LS: ParseState<Token = S::Object>,
LS: ParseState<Token = S::Object> + Default,
<S as ParseState>::Object: Token,
{
}

View File

@ -24,7 +24,6 @@ use super::{
TransitionResult,
};
use crate::span::{Span, UNKNOWN_SPAN};
use std::mem::take;
#[cfg(doc)]
use super::Token;
@ -79,7 +78,30 @@ impl<S: ParseState> From<ParseStatus<S>> for Parsed<S::Object> {
#[derive(Debug, PartialEq, Eq)]
pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
toks: I,
state: S,
/// Parsing automaton.
///
/// This [`ParseState`] is stored within an [`Option`] to allow for
/// taking ownership for [`ParseState::parse_token`];
/// this is where the functional [`ParseState`] is married with
/// mutable state.
///
/// The alternative here is to forego [`Option`] and mandate [`Default`]
/// on [`ParseState`],
/// which used to be the case in an older implementation.
/// However,
/// while that requirement was nice to have here,
/// it forces [`ParseState`] to be initialized without context,
/// which significantly reduces the quality of diagnostic output
/// unless augmented by the calling context,
/// which puts a much greater burden on the caller.
///
/// This will only ever contain [`None`] during a the call to
/// [`ParseState::parse_token`] in [`Parser::feed_tok`],
/// so it is safe to call [`unwrap`] without worrying about panics.
///
/// For more information,
/// see the implementation of [`Parser::feed_tok`].
state: Option<S>,
last_span: Span,
ctx: S::Context,
}
@ -118,13 +140,13 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
fn assert_accepting(
&self,
) -> Result<(), ParseError<S::DeadToken, S::Error>> {
if self.state.is_accepting() {
if self.state.as_ref().unwrap().is_accepting() {
Ok(())
} else {
let endpoints = self.last_span.endpoints();
Err(ParseError::UnexpectedEof(
endpoints.1.unwrap_or(endpoints.0),
self.state.to_string(),
self.state.as_ref().unwrap().to_string(),
))
}
}
@ -146,9 +168,20 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
// reporting in case we encounter an EOF.
self.last_span = tok.span();
let result;
TransitionResult(Transition(self.state), result) =
take(&mut self.state).parse_token(tok, &mut self.ctx);
// Parse a single token and perform the requested state transition.
//
// This is where the functional `ParseState` is married with a
// mutable abstraction.
// Rust will optimize away this take+move+replace under most
// circumstances;
// if it does not,
// then you should utilize `ctx` as necessary.
//
// Note that this used to use `mem::take`,
// and the generated assembly was identical in both cases.
let TransitionResult(Transition(state), result) =
self.state.take().unwrap().parse_token(tok, &mut self.ctx);
self.state.replace(state);
use ParseStatus::*;
match result {
@ -157,7 +190,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
// so we have no choice but to produce an error.
Ok(Dead(invalid)) => Err(ParseError::UnexpectedToken(
invalid,
self.state.to_string(),
self.state.as_ref().unwrap().to_string(),
)),
Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
@ -198,7 +231,7 @@ impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
impl<S, I> From<I> for Parser<S, I>
where
S: ParseState,
S: ParseState + Default,
I: TokenStream<S::Token>,
<S as ParseState>::Context: Default,
{
@ -213,7 +246,7 @@ where
fn from(toks: I) -> Self {
Self {
toks,
state: Default::default(),
state: Some(Default::default()),
last_span: UNKNOWN_SPAN,
ctx: Default::default(),
}
@ -222,7 +255,7 @@ where
impl<S, I, C> From<(I, C)> for Parser<S, I>
where
S: ParseState<Context = C>,
S: ParseState<Context = C> + Default,
I: TokenStream<S::Token>,
{
/// Create a new parser with a provided context.
@ -234,7 +267,7 @@ where
fn from((toks, ctx): (I, C)) -> Self {
Self {
toks,
state: Default::default(),
state: Some(Default::default()),
last_span: UNKNOWN_SPAN,
ctx,
}

View File

@ -95,7 +95,15 @@ impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
/// Whatever the underlying automaton,
/// a `(state, token, context)` triple must uniquely determine the next
/// parser action.
pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
///
/// A [`ParseState`] is not required to implement [`Default`],
/// but it is afforded a number of API conveniences if it does not require
/// context for initialization.
/// This is generally true for standalone parsers,
/// but is not necessarily true for smaller, specialized parsers intended
/// for use as components of a larger parser
/// (in a spirit similar to parser combinators).
pub trait ParseState: PartialEq + Eq + Display + Debug + Sized {
/// Input tokens to the parser.
type Token: Token;
@ -122,18 +130,20 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
/// see [`Aggregate`].
type DeadToken: Token = Self::Token;
/// Construct a parser.
/// Construct a parser with a [`Default`] state.
///
/// Whether this method is helpful or provides any clarity depends on
/// the context and the types that are able to be inferred.
fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I>
where
Self: Default,
Self::Context: Default,
{
Parser::from(toks)
}
/// Construct a parser with a non-default [`ParseState::Context`].
/// Construct a parser with a [`Default`] state but a non-default
/// [`ParseState::Context`].
///
/// This is useful in two ways:
///
@ -150,7 +160,10 @@ pub trait ParseState: Default + PartialEq + Eq + Display + Debug {
fn parse_with_context<I: TokenStream<Self::Token>>(
toks: I,
ctx: Self::Context,
) -> Parser<Self, I> {
) -> Parser<Self, I>
where
Self: Default,
{
Parser::from((toks, ctx))
}

View File

@ -168,6 +168,7 @@ impl From<Attr> for XirfToken {
pub trait FlatAttrParseState<const MAX_DEPTH: usize> =
ParseState<Token = XirToken, DeadToken = XirToken, Object = Attr>
where
Self: Default,
<Self as ParseState>::Error: Into<XirToXirfError>,
StateContext<MAX_DEPTH>: AsMut<<Self as ParseState>::Context>;

View File

@ -506,6 +506,7 @@ where
pub trait StackAttrParseState =
ParseState<Token = XirToken, DeadToken = XirToken, Object = Attr>
where
Self: Default,
<Self as ParseState>::Error: Into<StackError>,
EmptyContext: AsMut<<Self as ParseState>::Context>;