1301 lines
44 KiB
Rust
1301 lines
44 KiB
Rust
// Basic streaming parsing framework
|
||
//
|
||
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
|
||
//
|
||
// This file is part of TAME.
|
||
//
|
||
// This program is free software: you can redistribute it and/or modify
|
||
// it under the terms of the GNU General Public License as published by
|
||
// the Free Software Foundation, either version 3 of the License, or
|
||
// (at your option) any later version.
|
||
//
|
||
// This program is distributed in the hope that it will be useful,
|
||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
// GNU General Public License for more details.
|
||
//
|
||
// You should have received a copy of the GNU General Public License
|
||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
//! Basic streaming parser framework for lowering operations.
|
||
//!
|
||
//! _TODO: Some proper docs and examples!_
|
||
|
||
use crate::diagnose::{Annotate, AnnotatedSpan, Diagnostic};
|
||
use crate::iter::{TripIter, TrippableIterator};
|
||
use crate::span::{Span, UNKNOWN_SPAN};
|
||
use std::fmt::Debug;
|
||
use std::hint::unreachable_unchecked;
|
||
use std::iter::{self, Empty};
|
||
use std::mem::take;
|
||
use std::ops::{ControlFlow, Deref, DerefMut, FromResidual, Try};
|
||
use std::{convert::Infallible, error::Error, fmt::Display};
|
||
|
||
/// Result of applying a [`Token`] to a [`ParseState`],
|
||
/// with any error having been wrapped in a [`ParseError`].
|
||
pub type ParsedResult<S> = ParseResult<S, Parsed<<S as ParseState>::Object>>;
|
||
|
||
/// Result of some non-parsing operation on a [`Parser`],
|
||
/// with any error having been wrapped in a [`ParseError`].
|
||
pub type ParseResult<S, T> =
|
||
Result<T, ParseError<<S as ParseState>::Token, <S as ParseState>::Error>>;
|
||
|
||
/// A single datum from a streaming IR with an associated [`Span`].
|
||
///
|
||
/// A token may be a lexeme with associated data,
|
||
/// or a more structured object having been lowered from other IRs.
|
||
pub trait Token: Display + Debug + PartialEq {
|
||
/// Retrieve the [`Span`] representing the source location of the token.
|
||
fn span(&self) -> Span;
|
||
}
|
||
|
||
impl<T: Token> From<T> for Span {
|
||
fn from(tok: T) -> Self {
|
||
tok.span()
|
||
}
|
||
}
|
||
|
||
/// An IR object produced by a lowering operation on one or more [`Token`]s.
|
||
///
|
||
/// Note that an [`Object`] may also be a [`Token`] if it will be in turn
|
||
/// fed to another [`Parser`] for lowering.
|
||
///
|
||
/// This trait exists to disambiguate an otherwise unbounded type for
|
||
/// [`From`] conversions,
|
||
/// used in the [`Transition`] API to provide greater flexibility.
|
||
pub trait Object: Debug + PartialEq {}
|
||
|
||
/// An infallible [`Token`] stream.
|
||
///
|
||
/// If the token stream originates from an operation that could potentially
|
||
/// fail and ought to be propagated,
|
||
/// use [`TokenResultStream`].
|
||
///
|
||
/// The name "stream" in place of "iterator" is intended to convey that this
|
||
/// type is expected to be processed in real-time as a stream,
|
||
/// not read into memory.
|
||
pub trait TokenStream<T: Token> = Iterator<Item = T>;
|
||
|
||
/// A [`Token`] stream that may encounter errors during parsing.
|
||
///
|
||
/// If the stream cannot fail,
|
||
/// consider using [`TokenStream`].
|
||
pub trait TokenResultStream<T: Token, E: Error> = Iterator<Item = Result<T, E>>;
|
||
|
||
/// A [`ParseState`] capable of being automatically stitched together with
|
||
/// a parent [`ParseState`] `SP` to create a composite parser.
|
||
///
|
||
/// Conceptually,
|
||
/// this can be visualized as combining the state machines of multiple
|
||
/// parsers into one larger state machine.
|
||
///
|
||
/// The term _state stitching_ refers to a particular pattern able to be
|
||
/// performed automatically by this parsing framework;
|
||
/// it is not necessary for parser composition,
|
||
/// provided that you perform the necessary wiring yourself in absence
|
||
/// of state stitching.
|
||
pub trait StitchableParseState<SP: ParseState> = ParseState
|
||
where
|
||
SP: ParseState<Token = <Self as ParseState>::Token>,
|
||
<Self as ParseState>::Object: Into<<SP as ParseState>::Object>,
|
||
<Self as ParseState>::Error: Into<<SP as ParseState>::Error>;
|
||
|
||
/// A parsing automaton.
|
||
///
|
||
/// These states are utilized by a [`Parser`].
|
||
///
|
||
/// A [`ParseState`] is also responsible for storing data about the
|
||
/// accepted input,
|
||
/// and handling appropriate type conversions into the final type.
|
||
/// That is---an
|
||
/// automaton may store metadata that is subsequently emitted once an
|
||
/// accepting state has been reached.
|
||
/// Whatever the underlying automaton,
|
||
/// a `(state, token, context)` triple must uniquely determine the next
|
||
/// parser action.
|
||
pub trait ParseState: Default + PartialEq + Eq + Debug {
|
||
/// Input tokens to the parser.
|
||
type Token: Token;
|
||
|
||
/// Objects produced by a parser utilizing these states.
|
||
type Object: Object;
|
||
|
||
/// Errors specific to this set of states.
|
||
type Error: Debug + Diagnostic + PartialEq;
|
||
|
||
/// Object provided to parser alongside each token.
|
||
///
|
||
/// This may be used in situations where Rust/LLVM are unable to
|
||
/// optimize away moves of interior data associated with the
|
||
/// otherwise-immutable [`ParseState`].
|
||
type Context: Debug = EmptyContext;
|
||
|
||
/// Construct a parser.
|
||
///
|
||
/// Whether this method is helpful or provides any clarity depends on
|
||
/// the context and the types that are able to be inferred.
|
||
fn parse<I: TokenStream<Self::Token>>(toks: I) -> Parser<Self, I>
|
||
where
|
||
Self::Context: Default,
|
||
{
|
||
Parser::from(toks)
|
||
}
|
||
|
||
/// Construct a parser with a non-default [`ParseState::Context`].
|
||
///
|
||
/// This is useful in two ways:
|
||
///
|
||
/// 1. To allow for parsing using a context that does not implement
|
||
/// [`Default`],
|
||
/// or whose default is not sufficient; and
|
||
/// 2. To re-use a context from a previous [`Parser`].
|
||
///
|
||
/// If neither of these apply to your situation,
|
||
/// consider [`ParseState::parse`] instead.
|
||
///
|
||
/// To retrieve a context from a parser for re-use,
|
||
/// see [`Parser::finalize`].
|
||
fn parse_with_context<I: TokenStream<Self::Token>>(
|
||
toks: I,
|
||
ctx: Self::Context,
|
||
) -> Parser<Self, I> {
|
||
Parser::from((toks, ctx))
|
||
}
|
||
|
||
/// Parse a single [`Token`] and optionally perform a state transition.
|
||
///
|
||
/// The current state is represented by `self`.
|
||
/// The result of a parsing operation is a state transition with
|
||
/// associated [`ParseStatus`] data.
|
||
///
|
||
/// Note that `self` is owned,
|
||
/// for a couple primary reasons:
|
||
///
|
||
/// 1. This forces the parser to explicitly consider and document all
|
||
/// state transitions,
|
||
/// rather than potentially missing unintended behavior through
|
||
/// implicit behavior; and
|
||
/// 2. It allows for more natural functional composition of state,
|
||
/// which in turn makes it easier to compose parsers
|
||
/// (which conceptually involves stitching together state
|
||
/// machines).
|
||
///
|
||
/// Since a [`ParseState`] produces a new version of itself with each
|
||
/// invocation,
|
||
/// it is functionally pure.
|
||
/// Generally,
|
||
/// Rust/LLVM are able to optimize moves into direct assignments.
|
||
/// However,
|
||
/// there are circumstances where this is _not_ the case,
|
||
/// in which case [`Context`] can be used to provide a mutable context
|
||
/// owned by the caller (e.g. [`Parser`]) to store additional
|
||
/// information that is not subject to Rust's move semantics.
|
||
/// If this is not necessary,
|
||
/// see [`NoContext`].
|
||
fn parse_token(
|
||
self,
|
||
tok: Self::Token,
|
||
ctx: &mut Self::Context,
|
||
) -> TransitionResult<Self>;
|
||
|
||
/// Whether the current state represents an accepting state.
|
||
///
|
||
/// An accepting state represents a valid state to stop parsing.
|
||
/// If parsing stops at a state that is _not_ accepting,
|
||
/// then the [`TokenStream`] has ended unexpectedly and should produce
|
||
/// a [`ParseError::UnexpectedEof`].
|
||
///
|
||
/// It makes sense for there to be exist multiple accepting states for a
|
||
/// parser.
|
||
/// For example:
|
||
/// A parser that parses a list of attributes may be used to parse one
|
||
/// or more attributes,
|
||
/// or the entire list of attributes.
|
||
/// It is acceptable to attempt to parse just one of those attributes,
|
||
/// or it is acceptable to parse all the way until the end.
|
||
fn is_accepting(&self) -> bool;
|
||
|
||
/// Delegate parsing from a compatible, stitched [`ParseState`]~`SP`.
|
||
///
|
||
/// This helps to combine two state machines that speak the same input
|
||
/// language
|
||
/// (share the same [`Self::Token`]),
|
||
/// handling the boilerplate of delegating [`Self::Token`] from a
|
||
/// parent state~`SP` to `Self`.
|
||
///
|
||
/// Token delegation happens after [`Self`] has been entered from a
|
||
/// parent [`ParseState`] context~`SP`,
|
||
/// so stitching the start and accepting states must happen elsewhere
|
||
/// (for now).
|
||
///
|
||
/// This assumes that no lookahead token from [`ParseStatus::Dead`] will
|
||
/// need to be handled by the parent state~`SP`.
|
||
/// To handle a token of lookahead,
|
||
/// use [`Self::delegate_lookahead`] instead.
|
||
///
|
||
/// _TODO: More documentation once this is finalized._
|
||
fn delegate<SP, C>(
|
||
self,
|
||
mut context: C,
|
||
tok: <Self as ParseState>::Token,
|
||
into: impl FnOnce(Self) -> SP,
|
||
) -> TransitionResult<SP>
|
||
where
|
||
Self: StitchableParseState<SP>,
|
||
C: AsMut<<Self as ParseState>::Context>,
|
||
{
|
||
use ParseStatus::{Dead, Incomplete, Object as Obj};
|
||
|
||
let (Transition(newst), result) =
|
||
self.parse_token(tok, context.as_mut()).into();
|
||
|
||
// This does not use `delegate_lookahead` so that we can have
|
||
// `into: impl FnOnce` instead of `Fn`.
|
||
Transition(into(newst)).result(match result {
|
||
Ok(Incomplete) => Ok(Incomplete),
|
||
Ok(Obj(obj)) => Ok(Obj(obj.into())),
|
||
Ok(Dead(tok)) => Ok(Dead(tok)),
|
||
Err(e) => Err(e.into()),
|
||
})
|
||
}
|
||
|
||
/// Delegate parsing from a compatible, stitched [`ParseState`]~`SP` with
|
||
/// support for a lookahead token.
|
||
///
|
||
/// This does the same thing as [`Self::delegate`],
|
||
/// but allows for the handling of a lookahead token from [`Self`]
|
||
/// rather than simply proxying [`ParseStatus::Dead`].
|
||
///
|
||
/// _TODO: More documentation once this is finalized._
|
||
fn delegate_lookahead<SP, C>(
|
||
self,
|
||
mut context: C,
|
||
tok: <Self as ParseState>::Token,
|
||
into: impl FnOnce(Self) -> SP,
|
||
) -> ControlFlow<TransitionResult<SP>, (Self, <Self as ParseState>::Token, C)>
|
||
where
|
||
Self: StitchableParseState<SP>,
|
||
C: AsMut<<Self as ParseState>::Context>,
|
||
{
|
||
use ControlFlow::*;
|
||
use ParseStatus::{Dead, Incomplete, Object as Obj};
|
||
|
||
// NB: Rust/LLVM are generally able to elide these moves into direct
|
||
// assignments,
|
||
// but sometimes this does not work
|
||
// (e.g. XIRF's use of `ArrayVec`).
|
||
// If your [`ParseState`] has a lot of `memcpy`s or other
|
||
// performance issues,
|
||
// move heavy objects into `context`.
|
||
let (Transition(newst), result) =
|
||
self.parse_token(tok, context.as_mut()).into();
|
||
|
||
match result {
|
||
Ok(Incomplete) => Break(Transition(into(newst)).incomplete()),
|
||
Ok(Obj(obj)) => Break(Transition(into(newst)).ok(obj.into())),
|
||
Ok(Dead(tok)) => Continue((newst, tok, context)),
|
||
Err(e) => Break(Transition(into(newst)).err(e)),
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Empty [`Context`] for [`ParseState`]s with pure functional
|
||
/// implementations with no mutable state.
|
||
///
|
||
/// Using this value means that a [`ParseState`] does not require a
|
||
/// context.
|
||
/// All [`Context`]s implement [`AsMut<EmptyContext>`](AsMut),
|
||
/// and so all pure [`ParseState`]s have contexts compatible with every
|
||
/// other parser for composition
|
||
/// (provided that the other invariants in [`StitchableParseState`] are
|
||
/// met).
|
||
///
|
||
/// This can be clearly represented in function signatures using
|
||
/// [`EmptyContext`].
|
||
#[derive(Debug, PartialEq, Eq, Default)]
|
||
pub struct EmptyContext;
|
||
|
||
impl AsMut<EmptyContext> for EmptyContext {
|
||
fn as_mut(&mut self) -> &mut EmptyContext {
|
||
self
|
||
}
|
||
}
|
||
|
||
/// A [`ParseState`] does not require any mutable [`Context`].
|
||
///
|
||
/// A [`ParseState`] using this context is pure
|
||
/// (has no mutable state),
|
||
/// returning a new version of itself on each state change.
|
||
///
|
||
/// This type is intended to be self-documenting:
|
||
/// `_: EmptyContext` is nicer to readers than `_: &mut EmptyContext`.
|
||
///
|
||
/// See [`EmptyContext`] for more information.
|
||
pub type NoContext<'a> = &'a mut EmptyContext;
|
||
|
||
/// Mutable context for [`ParseState`].
|
||
///
|
||
/// [`ParseState`]s are immutable and pure---they
|
||
/// are invoked via [`ParseState::parse_token`] and return a new version
|
||
/// of themselves representing their new state.
|
||
/// Rust/LLVM are generally able to elide intermediate values and moves,
|
||
/// optimizing these parsers away into assignments.
|
||
///
|
||
/// However,
|
||
/// there are circumstances where moves may not be elided and may retain
|
||
/// their `memcpy` equivalents.
|
||
/// To work around this,
|
||
/// [`ParseState::parse_token`] accepts a mutable [`Context`] reference
|
||
/// which is held by the parent [`Parser`],
|
||
/// which can be mutated in-place without worrying about Rust's move
|
||
/// semantics.
|
||
///
|
||
/// Plainly: you should only use this if you have to.
|
||
/// This was added because certain parsers may be invoked millions of times
|
||
/// for each individual token in systems with many source packages,
|
||
/// which may otherwise result in millions of `memcpy`s.
|
||
///
|
||
/// When composing two [`ParseState`]s `A<B, C>`,
|
||
/// a [`Context<B, C>`](Context) must be contravariant over `B` and~`C`.
|
||
/// Concretely,
|
||
/// this means that [`AsMut<B::Context>`](AsMut) and
|
||
/// [`AsMut<C::Context>`](AsMut) must be implemented for `A::Context`.
|
||
/// This almost certainly means that `A::Context` is a product type.
|
||
/// Consequently,
|
||
/// a single [`Parser`] is able to hold a composite [`Context`] in a
|
||
/// single memory location.
|
||
///
|
||
/// [`Context<T>`](Context) implements [`Deref<T>`](Deref) for convenience.
|
||
///
|
||
/// If your [`ParseState`] does not require a mutable [`Context`],
|
||
/// see [`NoContext`].
|
||
#[derive(Debug, Default)]
|
||
pub struct Context<T: Debug + Default>(T, EmptyContext);
|
||
|
||
impl<T: Debug + Default> AsMut<EmptyContext> for Context<T> {
|
||
fn as_mut(&mut self) -> &mut EmptyContext {
|
||
&mut self.1
|
||
}
|
||
}
|
||
|
||
impl<T: Debug + Default> Deref for Context<T> {
|
||
type Target = T;
|
||
|
||
fn deref(&self) -> &Self::Target {
|
||
&self.0
|
||
}
|
||
}
|
||
|
||
impl<T: Debug + Default> DerefMut for Context<T> {
|
||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||
&mut self.0
|
||
}
|
||
}
|
||
|
||
impl<T: Debug + Default> From<T> for Context<T> {
|
||
fn from(x: T) -> Self {
|
||
Context(x, EmptyContext)
|
||
}
|
||
}
|
||
|
||
/// Result of applying a [`Token`] to a [`ParseState`].
|
||
///
|
||
/// This is used by [`ParseState::parse_token`];
|
||
/// see that function for rationale.
|
||
pub type ParseStateResult<S> = Result<ParseStatus<S>, <S as ParseState>::Error>;
|
||
|
||
/// A state transition with associated data.
|
||
///
|
||
/// Conceptually,
|
||
/// imagine the act of a state transition producing data.
|
||
/// See [`Transition`] for convenience methods for producing this tuple.
|
||
#[derive(Debug, PartialEq)]
|
||
pub struct TransitionResult<S: ParseState>(
|
||
pub Transition<S>,
|
||
pub ParseStateResult<S>,
|
||
);
|
||
|
||
/// Denotes a state transition.
|
||
///
|
||
/// This newtype was created to produce clear, self-documenting code;
|
||
/// parsers can get confusing to read with all of the types involved,
|
||
/// so this provides a mental synchronization point.
|
||
///
|
||
/// This also provides some convenience methods to help remote boilerplate
|
||
/// and further improve code clarity.
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
pub struct Transition<S: ParseState>(pub S);
|
||
|
||
impl<S: ParseState> Transition<S> {
|
||
/// A state transition with corresponding data.
|
||
///
|
||
/// This allows [`ParseState::parse_token`] to emit a parsed object and
|
||
/// corresponds to [`ParseStatus::Object`].
|
||
pub fn ok<T>(self, obj: T) -> TransitionResult<S>
|
||
where
|
||
T: Into<ParseStatus<S>>,
|
||
{
|
||
TransitionResult(self, Ok(obj.into()))
|
||
}
|
||
|
||
/// A transition with corresponding error.
|
||
///
|
||
/// This indicates a parsing failure.
|
||
/// The state ought to be suitable for error recovery.
|
||
pub fn err<E: Into<S::Error>>(self, err: E) -> TransitionResult<S> {
|
||
TransitionResult(self, Err(err.into()))
|
||
}
|
||
|
||
/// A state transition with corresponding [`Result`].
|
||
///
|
||
/// This translates the provided [`Result`] in a manner equivalent to
|
||
/// [`Transition::ok`] and [`Transition::err`].
|
||
pub fn result<T, E>(self, result: Result<T, E>) -> TransitionResult<S>
|
||
where
|
||
T: Into<ParseStatus<S>>,
|
||
E: Into<S::Error>,
|
||
{
|
||
TransitionResult(self, result.map(Into::into).map_err(Into::into))
|
||
}
|
||
|
||
/// A state transition indicating that more data is needed before an
|
||
/// object can be emitted.
|
||
///
|
||
/// This corresponds to [`ParseStatus::Incomplete`].
|
||
pub fn incomplete(self) -> TransitionResult<S> {
|
||
TransitionResult(self, Ok(ParseStatus::Incomplete))
|
||
}
|
||
|
||
/// A dead state transition.
|
||
///
|
||
/// This corresponds to [`ParseStatus::Dead`],
|
||
/// and a calling parser should use the provided [`Token`] as
|
||
/// lookahead.
|
||
pub fn dead(self, tok: S::Token) -> TransitionResult<S> {
|
||
TransitionResult(self, Ok(ParseStatus::Dead(tok)))
|
||
}
|
||
}
|
||
|
||
impl<S: ParseState> Into<(Transition<S>, ParseStateResult<S>)>
|
||
for TransitionResult<S>
|
||
{
|
||
fn into(self) -> (Transition<S>, ParseStateResult<S>) {
|
||
(self.0, self.1)
|
||
}
|
||
}
|
||
|
||
impl<S: ParseState> Try for TransitionResult<S> {
|
||
type Output = (Transition<S>, ParseStateResult<S>);
|
||
type Residual = (Transition<S>, ParseStateResult<S>);
|
||
|
||
fn from_output(output: Self::Output) -> Self {
|
||
match output {
|
||
(st, result) => Self(st, result),
|
||
}
|
||
}
|
||
|
||
fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
|
||
match self.into() {
|
||
(st, Ok(x)) => ControlFlow::Continue((st, Ok(x))),
|
||
(st, Err(e)) => ControlFlow::Break((st, Err(e))),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<S: ParseState> FromResidual<(Transition<S>, ParseStateResult<S>)>
|
||
for TransitionResult<S>
|
||
{
|
||
fn from_residual(residual: (Transition<S>, ParseStateResult<S>)) -> Self {
|
||
match residual {
|
||
(st, result) => Self(st, result),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<S: ParseState> FromResidual<Result<Infallible, TransitionResult<S>>>
|
||
for TransitionResult<S>
|
||
{
|
||
fn from_residual(
|
||
residual: Result<Infallible, TransitionResult<S>>,
|
||
) -> Self {
|
||
match residual {
|
||
Err(e) => e,
|
||
// SAFETY: This match arm doesn't seem to be required in
|
||
// core::result::Result's FromResidual implementation,
|
||
// but as of 1.61 nightly it is here.
|
||
// Since this is Infallable,
|
||
// it cannot occur.
|
||
Ok(_) => unsafe { unreachable_unchecked() },
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<S: ParseState> FromResidual<ControlFlow<TransitionResult<S>, Infallible>>
|
||
for TransitionResult<S>
|
||
{
|
||
fn from_residual(
|
||
residual: ControlFlow<TransitionResult<S>, Infallible>,
|
||
) -> Self {
|
||
match residual {
|
||
ControlFlow::Break(result) => result,
|
||
// SAFETY: Infallible, so cannot hit.
|
||
ControlFlow::Continue(_) => unsafe { unreachable_unchecked() },
|
||
}
|
||
}
|
||
}
|
||
|
||
/// An object able to be used as data for a state [`Transition`].
|
||
///
|
||
/// This flips the usual order of things:
|
||
/// rather than using a method of [`Transition`] to provide data,
|
||
/// this starts with the data and produces a transition from it.
|
||
/// This is sometimes necessary to satisfy ownership/borrowing rules.
|
||
///
|
||
/// This trait simply removes boilerplate associated with storing
|
||
/// intermediate values and translating into the resulting type.
|
||
pub trait Transitionable<S: ParseState> {
|
||
/// Perform a state transition to `S` using [`Self`] as the associated
|
||
/// data.
|
||
///
|
||
/// This may be necessary to satisfy ownership/borrowing rules when
|
||
/// state data from `S` is used to compute [`Self`].
|
||
fn transition(self, to: S) -> TransitionResult<S>;
|
||
}
|
||
|
||
impl<S, E> Transitionable<S> for Result<ParseStatus<S>, E>
|
||
where
|
||
S: ParseState,
|
||
<S as ParseState>::Error: From<E>,
|
||
{
|
||
fn transition(self, to: S) -> TransitionResult<S> {
|
||
Transition(to).result(self)
|
||
}
|
||
}
|
||
|
||
impl<S, E> Transitionable<S> for Result<(), E>
|
||
where
|
||
S: ParseState,
|
||
<S as ParseState>::Error: From<E>,
|
||
{
|
||
fn transition(self, to: S) -> TransitionResult<S> {
|
||
Transition(to).result(self.map(|_| ParseStatus::Incomplete))
|
||
}
|
||
}
|
||
|
||
/// A streaming parser defined by a [`ParseState`] with exclusive
|
||
/// mutable access to an underlying [`TokenStream`].
|
||
///
|
||
/// This parser handles operations that are common among all types of
|
||
/// parsers,
|
||
/// such that specialized parsers need only implement logic that is
|
||
/// unique to their operation.
|
||
/// This also simplifies combinators,
|
||
/// since there is more uniformity among distinct parser types.
|
||
///
|
||
/// After you have finished with a parser,
|
||
/// if you have not consumed the entire iterator,
|
||
/// call [`finalize`](Parser::finalize) to ensure that parsing has
|
||
/// completed in an accepting state.
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
pub struct Parser<S: ParseState, I: TokenStream<S::Token>> {
|
||
toks: I,
|
||
state: S,
|
||
last_span: Span,
|
||
ctx: S::Context,
|
||
}
|
||
|
||
impl<S: ParseState, I: TokenStream<S::Token>> Parser<S, I> {
|
||
/// Indicate that no further parsing will take place using this parser,
|
||
/// retrieve any final aggregate state (the context),
|
||
/// and [`drop`] it.
|
||
///
|
||
/// Invoking the method is equivalent to stating that the stream has
|
||
/// ended,
|
||
/// since the parser will have no later opportunity to continue
|
||
/// parsing.
|
||
/// Consequently,
|
||
/// the caller should expect [`ParseError::UnexpectedEof`] if the
|
||
/// parser is not in an accepting state.
|
||
///
|
||
/// To re-use the context returned by this method,
|
||
/// see [`ParseState::parse_with_context`].
|
||
/// Note that whether the context is permitted to be reused,
|
||
/// or is useful independently to the caller,
|
||
/// is a decision made by the [`ParseState`].
|
||
pub fn finalize(
|
||
self,
|
||
) -> Result<S::Context, (Self, ParseError<S::Token, S::Error>)> {
|
||
match self.assert_accepting() {
|
||
Ok(()) => Ok(self.ctx),
|
||
Err(err) => Err((self, err)),
|
||
}
|
||
}
|
||
|
||
/// Return [`Ok`] if the parser is in an accepting state,
|
||
/// otherwise [`Err`] with [`ParseError::UnexpectedEof`].
|
||
///
|
||
/// See [`finalize`](Self::finalize) for the public-facing method.
|
||
fn assert_accepting(&self) -> Result<(), ParseError<S::Token, S::Error>> {
|
||
if self.state.is_accepting() {
|
||
Ok(())
|
||
} else {
|
||
let endpoints = self.last_span.endpoints();
|
||
Err(ParseError::UnexpectedEof(
|
||
endpoints.1.unwrap_or(endpoints.0),
|
||
))
|
||
}
|
||
}
|
||
|
||
/// Feed an input token to the parser.
|
||
///
|
||
/// This _pushes_ data into the parser,
|
||
/// rather than the typical pull system used by [`Parser`]'s
|
||
/// [`Iterator`] implementation.
|
||
/// The pull system also uses this method to provided data to the
|
||
/// parser.
|
||
///
|
||
/// This method is intentionally private,
|
||
/// since push parsers are currently supported only internally.
|
||
/// The only thing preventing this being public is formalization and a
|
||
/// commitment to maintain it.
|
||
fn feed_tok(&mut self, tok: S::Token) -> ParsedResult<S> {
|
||
// Store the most recently encountered Span for error
|
||
// reporting in case we encounter an EOF.
|
||
self.last_span = tok.span();
|
||
|
||
let result;
|
||
TransitionResult(Transition(self.state), result) =
|
||
take(&mut self.state).parse_token(tok, &mut self.ctx);
|
||
|
||
use ParseStatus::*;
|
||
match result {
|
||
// Nothing handled this dead state,
|
||
// and we cannot discard a lookahead token,
|
||
// so we have no choice but to produce an error.
|
||
Ok(Dead(invalid)) => Err(ParseError::UnexpectedToken(invalid)),
|
||
|
||
Ok(parsed @ (Incomplete | Object(..))) => Ok(parsed.into()),
|
||
Err(e) => Err(e.into()),
|
||
}
|
||
}
|
||
|
||
/// Lower the IR produced by this [`Parser`] into another IR by piping
|
||
/// the output to a new parser defined by the [`ParseState`] `LS`.
|
||
///
|
||
/// This parser consumes tokens `S::Token` and produces the IR
|
||
/// `S::Output`.
|
||
/// If there is some other [`ParseState`] `LS` such that
|
||
/// `LS::Token == S::Output`
|
||
/// (that is—the output of this parser is the input to another),
|
||
/// then this method will wire the two together into a new iterator
|
||
/// that produces `LS::Output`.
|
||
///
|
||
/// Visually, we have,
|
||
/// within the provided closure `f`,
|
||
/// a [`LowerIter`] that acts as this pipeline:
|
||
///
|
||
/// ```text
|
||
/// (S::Token) -> (S::Output == LS::Token) -> (LS::Output)
|
||
/// ```
|
||
///
|
||
/// The new iterator is a [`LowerIter`],
|
||
/// and scoped to the provided closure `f`.
|
||
/// The outer [`Result`] of `Self`'s [`ParsedResult`] is stripped by
|
||
/// a [`TripIter`] before being provided as input to a new push
|
||
/// [`Parser`] utilizing `LS`.
|
||
/// A push parser,
|
||
/// rather than pulling tokens from a [`TokenStream`],
|
||
/// has tokens pushed into it;
|
||
/// this parser is created automatically for you.
|
||
///
|
||
/// _TODO_: There's no way to access the inner parser for error recovery
|
||
/// after tripping the [`TripIter`].
|
||
/// Consequently,
|
||
/// this API (likely the return type) will change.
|
||
#[inline]
|
||
pub fn lower_while_ok<LS, U>(
|
||
&mut self,
|
||
f: impl FnOnce(&mut LowerIter<S, I, LS>) -> U,
|
||
) -> Result<U, ParseError<S::Token, S::Error>>
|
||
where
|
||
LS: ParseState<Token = S::Object>,
|
||
<S as ParseState>::Object: Token,
|
||
<LS as ParseState>::Context: Default,
|
||
{
|
||
self.while_ok(|toks| {
|
||
// TODO: This parser is not accessible after error recovery!
|
||
let lower = LS::parse(iter::empty());
|
||
f(&mut LowerIter { lower, toks })
|
||
})
|
||
}
|
||
}
|
||
|
||
/// An IR lowering operation that pipes the output of one [`Parser`] to the
|
||
/// input of another.
|
||
///
|
||
/// This is produced by [`Parser::lower_while_ok`].
|
||
pub struct LowerIter<'a, 'b, S, I, LS>
|
||
where
|
||
S: ParseState,
|
||
I: TokenStream<S::Token>,
|
||
LS: ParseState<Token = S::Object>,
|
||
<S as ParseState>::Object: Token,
|
||
{
|
||
/// A push [`Parser`].
|
||
lower: Parser<LS, Empty<LS::Token>>,
|
||
|
||
/// Source tokens from higher-level [`Parser`],
|
||
/// with the outer [`Result`] having been stripped by a [`TripIter`].
|
||
toks: &'a mut TripIter<
|
||
'b,
|
||
Parser<S, I>,
|
||
Parsed<S::Object>,
|
||
ParseError<S::Token, S::Error>,
|
||
>,
|
||
}
|
||
|
||
impl<'a, 'b, S, I, LS> Iterator for LowerIter<'a, 'b, S, I, LS>
|
||
where
|
||
S: ParseState,
|
||
I: TokenStream<S::Token>,
|
||
LS: ParseState<Token = S::Object>,
|
||
<S as ParseState>::Object: Token,
|
||
{
|
||
type Item = ParsedResult<LS>;
|
||
|
||
/// Pull a token through the higher-level [`Parser`],
|
||
/// push it to the lowering parser,
|
||
/// and yield the resulting [`ParseResult`].
|
||
#[inline]
|
||
fn next(&mut self) -> Option<Self::Item> {
|
||
match self.toks.next() {
|
||
None => None,
|
||
Some(Parsed::Incomplete) => Some(Ok(Parsed::Incomplete)),
|
||
Some(Parsed::Object(obj)) => Some(self.lower.feed_tok(obj)),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<S: ParseState, I: TokenStream<S::Token>> Iterator for Parser<S, I> {
|
||
type Item = ParsedResult<S>;
|
||
|
||
/// Parse a single [`Token`] according to the current
|
||
/// [`ParseState`],
|
||
/// if available.
|
||
///
|
||
/// If the underlying [`TokenStream`] yields [`None`],
|
||
/// then the [`ParseState`] must be in an accepting state;
|
||
/// otherwise, [`ParseError::UnexpectedEof`] will occur.
|
||
///
|
||
/// This is intended to be invoked by [`Iterator::next`].
|
||
/// Accepting a token rather than the [`TokenStream`] allows the caller
|
||
/// to inspect the token first
|
||
/// (e.g. to store a copy of the [`Span`][crate::span::Span]).
|
||
#[inline]
|
||
fn next(&mut self) -> Option<Self::Item> {
|
||
let otok = self.toks.next();
|
||
|
||
match otok {
|
||
None => match self.assert_accepting() {
|
||
Ok(()) => None,
|
||
Err(e) => Some(Err(e)),
|
||
},
|
||
|
||
Some(tok) => Some(self.feed_tok(tok)),
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Common parsing errors produced by [`Parser`].
|
||
///
|
||
/// These errors are common enough that they are handled in a common way,
|
||
/// such that individual parsers needn't check for these situations
|
||
/// themselves.
|
||
///
|
||
/// Having a common type also allows combinators to handle error types in a
|
||
/// consistent way when composing parsers.
|
||
///
|
||
/// Parsers may return their own unique errors via the
|
||
/// [`StateError`][ParseError::StateError] variant.
|
||
#[derive(Debug, PartialEq)]
|
||
pub enum ParseError<T: Token, E: Diagnostic + PartialEq> {
|
||
/// Token stream ended unexpectedly.
|
||
///
|
||
/// This error means that the parser was expecting more input before
|
||
/// reaching an accepting state.
|
||
/// This could represent a truncated file,
|
||
/// a malformed stream,
|
||
/// or maybe just a user that's not done typing yet
|
||
/// (e.g. in the case of an LSP implementation).
|
||
///
|
||
/// If no span is available,
|
||
/// then parsing has not even had the chance to begin.
|
||
/// If this parser follows another,
|
||
/// then the combinator ought to substitute a missing span with
|
||
/// whatever span preceded this invocation.
|
||
UnexpectedEof(Span),
|
||
|
||
/// The parser reached an unhandled dead state.
|
||
///
|
||
/// Once a parser returns [`ParseStatus::Dead`],
|
||
/// a parent context must use that provided token as a lookahead.
|
||
/// If that does not occur,
|
||
/// [`Parser`] produces this error.
|
||
///
|
||
/// In the future,
|
||
/// it may be desirable to be able to query [`ParseState`] for what
|
||
/// tokens are acceptable at this point,
|
||
/// to provide better error messages.
|
||
UnexpectedToken(T),
|
||
|
||
/// A parser-specific error associated with an inner
|
||
/// [`ParseState`].
|
||
StateError(E),
|
||
}
|
||
|
||
impl<T: Token, EA: Diagnostic + PartialEq> ParseError<T, EA> {
|
||
pub fn inner_into<EB: Diagnostic + PartialEq + Eq>(
|
||
self,
|
||
) -> ParseError<T, EB>
|
||
where
|
||
EA: Into<EB>,
|
||
{
|
||
use ParseError::*;
|
||
match self {
|
||
UnexpectedEof(x) => UnexpectedEof(x),
|
||
UnexpectedToken(x) => UnexpectedToken(x),
|
||
StateError(e) => StateError(e.into()),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<T: Token, E: Diagnostic + PartialEq> From<E> for ParseError<T, E> {
|
||
fn from(e: E) -> Self {
|
||
Self::StateError(e)
|
||
}
|
||
}
|
||
|
||
impl<T: Token, E: Diagnostic + PartialEq> Display for ParseError<T, E> {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
match self {
|
||
Self::UnexpectedEof(_) => {
|
||
write!(f, "unexpected end of input")
|
||
}
|
||
Self::UnexpectedToken(_tok) => {
|
||
write!(f, "unexpected input")
|
||
}
|
||
Self::StateError(e) => Display::fmt(e, f),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<T: Token, E: Diagnostic + PartialEq + 'static> Error for ParseError<T, E> {
|
||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||
match self {
|
||
Self::StateError(e) => Some(e),
|
||
_ => None,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<T: Token, E: Diagnostic + PartialEq + 'static> Diagnostic
|
||
for ParseError<T, E>
|
||
{
|
||
fn describe(&self) -> Vec<AnnotatedSpan> {
|
||
use ParseError::*;
|
||
|
||
match self {
|
||
// TODO: More information from the underlying parser on what was expected.
|
||
UnexpectedEof(span) => {
|
||
span.error("unexpected end of input here").into()
|
||
}
|
||
|
||
UnexpectedToken(tok) => {
|
||
tok.span().error("this was unexpected").into()
|
||
}
|
||
|
||
// TODO: Is there any additional useful context we can augment
|
||
// this with?
|
||
StateError(e) => e.describe(),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<S, I> From<I> for Parser<S, I>
|
||
where
|
||
S: ParseState,
|
||
I: TokenStream<S::Token>,
|
||
<S as ParseState>::Context: Default,
|
||
{
|
||
/// Create a new parser with a default context.
|
||
///
|
||
/// This can only be used if the associated [`ParseState::Context`] does
|
||
/// not implement [`Default`];
|
||
/// otherwise,
|
||
/// consider instantiating from a `(TokenStream, Context)` pair.
|
||
/// See also [`ParseState::parse`] and
|
||
/// [`ParseState::parse_with_context`].
|
||
fn from(toks: I) -> Self {
|
||
Self {
|
||
toks,
|
||
state: Default::default(),
|
||
last_span: UNKNOWN_SPAN,
|
||
ctx: Default::default(),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<S, I, C> From<(I, C)> for Parser<S, I>
|
||
where
|
||
S: ParseState<Context = C>,
|
||
I: TokenStream<S::Token>,
|
||
{
|
||
/// Create a new parser with a provided context.
|
||
///
|
||
/// For more information,
|
||
/// see [`ParseState::parse_with_context`].
|
||
///
|
||
/// See also [`ParseState::parse`].
|
||
fn from((toks, ctx): (I, C)) -> Self {
|
||
Self {
|
||
toks,
|
||
state: Default::default(),
|
||
last_span: UNKNOWN_SPAN,
|
||
ctx,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Result of a parsing operation.
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
pub enum ParseStatus<S: ParseState> {
|
||
/// Additional tokens are needed to complete parsing of the next object.
|
||
Incomplete,
|
||
|
||
/// Parsing of an object is complete.
|
||
///
|
||
/// This does not indicate that the parser is complete,
|
||
/// as more objects may be able to be emitted.
|
||
Object(S::Object),
|
||
|
||
/// Parser encountered a dead state relative to the given token.
|
||
///
|
||
/// A dead state is an empty accepting state that has no state
|
||
/// transition for the given token.
|
||
/// A state is empty if a [`ParseStatus::Object`] will not be lost if
|
||
/// parsing ends at this point
|
||
/// (that is---there is no partially-built object).
|
||
/// This could simply mean that the parser has completed its job and
|
||
/// that control must be returned to a parent context.
|
||
///
|
||
/// If a parser is _not_ in an accepting state,
|
||
/// then an error ought to occur rather than a dead state;
|
||
/// the difference between the two is that the token associated with
|
||
/// a dead state can be used as a lookahead token in order to
|
||
/// produce a state transition at a higher level,
|
||
/// whereas an error indicates that parsing has failed.
|
||
/// Intuitively,
|
||
/// this means that a [`ParseStatus::Object`] had just been emitted
|
||
/// and that the token following it isn't something that can be
|
||
/// parsed.
|
||
///
|
||
/// If there is no parent context to handle the token,
|
||
/// [`Parser`] must yield an error.
|
||
Dead(S::Token),
|
||
}
|
||
|
||
impl<S: ParseState<Object = T>, T: Object> From<T> for ParseStatus<S> {
|
||
fn from(obj: T) -> Self {
|
||
Self::Object(obj)
|
||
}
|
||
}
|
||
|
||
/// Result of a parsing operation.
|
||
///
|
||
/// Whereas [`ParseStatus`] is used by [`ParseState`] to influence parser
|
||
/// operation,
|
||
/// this type is public-facing and used by [`Parser`].
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
pub enum Parsed<O> {
|
||
/// Additional tokens are needed to complete parsing of the next object.
|
||
Incomplete,
|
||
|
||
/// Parsing of an object is complete.
|
||
///
|
||
/// This does not indicate that the parser is complete,
|
||
/// as more objects may be able to be emitted.
|
||
Object(O),
|
||
}
|
||
|
||
impl<S: ParseState> From<ParseStatus<S>> for Parsed<S::Object> {
|
||
fn from(status: ParseStatus<S>) -> Self {
|
||
match status {
|
||
ParseStatus::Incomplete => Parsed::Incomplete,
|
||
ParseStatus::Object(x) => Parsed::Object(x),
|
||
ParseStatus::Dead(_) => {
|
||
unreachable!("Dead status must be filtered by Parser")
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
pub mod test {
|
||
use std::{assert_matches::assert_matches, iter::once};
|
||
|
||
use super::*;
|
||
use crate::{span::DUMMY_SPAN as DS, sym::GlobalSymbolIntern};
|
||
|
||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||
enum TestToken {
|
||
Close(Span),
|
||
MarkDone(Span),
|
||
Text(Span),
|
||
SetCtxVal(u8),
|
||
}
|
||
|
||
impl Display for TestToken {
|
||
fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
unimplemented!("fmt::Display")
|
||
}
|
||
}
|
||
|
||
impl Token for TestToken {
|
||
fn span(&self) -> Span {
|
||
use TestToken::*;
|
||
match self {
|
||
Close(span) | MarkDone(span) | Text(span) => *span,
|
||
_ => UNKNOWN_SPAN,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl Object for TestToken {}
|
||
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
enum EchoState {
|
||
Empty,
|
||
Done,
|
||
}
|
||
|
||
impl Default for EchoState {
|
||
fn default() -> Self {
|
||
Self::Empty
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, PartialEq, Default)]
|
||
struct StubContext {
|
||
val: u8,
|
||
}
|
||
|
||
impl ParseState for EchoState {
|
||
type Token = TestToken;
|
||
type Object = TestToken;
|
||
type Error = EchoStateError;
|
||
|
||
type Context = StubContext;
|
||
|
||
fn parse_token(
|
||
self,
|
||
tok: TestToken,
|
||
ctx: &mut StubContext,
|
||
) -> TransitionResult<Self> {
|
||
match tok {
|
||
TestToken::MarkDone(..) => Transition(Self::Done).ok(tok),
|
||
TestToken::Close(..) => {
|
||
Transition(self).err(EchoStateError::InnerError(tok))
|
||
}
|
||
TestToken::Text(..) => Transition(self).dead(tok),
|
||
TestToken::SetCtxVal(val) => {
|
||
ctx.val = val;
|
||
Transition(Self::Done).incomplete()
|
||
}
|
||
}
|
||
}
|
||
|
||
fn is_accepting(&self) -> bool {
|
||
*self == Self::Done
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, PartialEq, Eq)]
|
||
enum EchoStateError {
|
||
InnerError(TestToken),
|
||
}
|
||
|
||
impl Display for EchoStateError {
|
||
fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
unimplemented!()
|
||
}
|
||
}
|
||
|
||
impl Error for EchoStateError {
|
||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||
None
|
||
}
|
||
}
|
||
|
||
impl Diagnostic for EchoStateError {
|
||
fn describe(&self) -> Vec<AnnotatedSpan> {
|
||
unimplemented!()
|
||
}
|
||
}
|
||
|
||
type Sut<I> = Parser<EchoState, I>;
|
||
|
||
#[test]
|
||
fn successful_parse_in_accepting_state_with_spans() {
|
||
// EchoState is placed into a Done state given Comment.
|
||
let tok = TestToken::MarkDone(DS);
|
||
let mut toks = once(tok.clone());
|
||
|
||
let mut sut = Sut::from(&mut toks);
|
||
|
||
// The first token should be processed normally.
|
||
// EchoState proxies the token back.
|
||
assert_eq!(Some(Ok(Parsed::Object(tok))), sut.next());
|
||
|
||
// This is now the end of the token stream,
|
||
// which should be okay provided that the first token put us into
|
||
// a proper accepting state.
|
||
assert_eq!(None, sut.next());
|
||
|
||
// Further, finalizing should work in this state.
|
||
assert!(sut.finalize().is_ok());
|
||
}
|
||
|
||
#[test]
|
||
fn fails_on_end_of_stream_when_not_in_accepting_state() {
|
||
let span = Span::new(10, 20, "ctx".intern());
|
||
let mut toks = [TestToken::Close(span)].into_iter();
|
||
|
||
let mut sut = Sut::from(&mut toks);
|
||
|
||
// The first token is fine,
|
||
// and allows us to acquire our most recent span.
|
||
sut.next();
|
||
|
||
// Given that we have no tokens,
|
||
// and that EchoState::default does not start in an accepting
|
||
// state,
|
||
// we must fail when we encounter the end of the stream.
|
||
assert_eq!(
|
||
Some(Err(ParseError::UnexpectedEof(span.endpoints().1.unwrap()))),
|
||
sut.next()
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn returns_state_specific_error() {
|
||
// TestToken::Close causes EchoState to produce an error.
|
||
let errtok = TestToken::Close(DS);
|
||
let mut toks = [errtok.clone()].into_iter();
|
||
|
||
let mut sut = Sut::from(&mut toks);
|
||
|
||
assert_eq!(
|
||
Some(Err(ParseError::StateError(EchoStateError::InnerError(
|
||
errtok
|
||
)))),
|
||
sut.next()
|
||
);
|
||
|
||
// The token must have been consumed.
|
||
// It is up to a recovery process to either bail out or provide
|
||
// recovery tokens;
|
||
// continuing without recovery is unlikely to make sense.
|
||
assert_eq!(0, toks.len());
|
||
}
|
||
|
||
#[test]
|
||
fn fails_when_parser_is_finalized_in_non_accepting_state() {
|
||
let span = Span::new(10, 10, "ctx".intern());
|
||
|
||
// Set up so that we have a single token that we can use for
|
||
// recovery as part of the same iterator.
|
||
let recovery = TestToken::MarkDone(DS);
|
||
let mut toks = [
|
||
// Used purely to populate a Span.
|
||
TestToken::Close(span),
|
||
// Recovery token here:
|
||
recovery.clone(),
|
||
]
|
||
.into_iter();
|
||
|
||
let mut sut = Sut::from(&mut toks);
|
||
|
||
// Populate our most recently seen token's span.
|
||
sut.next();
|
||
|
||
// Attempting to finalize now in a non-accepting state should fail
|
||
// in the same way that encountering an end-of-stream does,
|
||
// since we're effectively saying "we're done with the stream"
|
||
// and the parser will have no further opportunity to reach an
|
||
// accepting state.
|
||
let result = sut.finalize();
|
||
assert_matches!(
|
||
result,
|
||
Err((_, ParseError::UnexpectedEof(s))) if s == span.endpoints().1.unwrap()
|
||
);
|
||
|
||
// The sut should have been re-returned,
|
||
// allowing for attempted error recovery if the caller can manage
|
||
// to produce a sequence of tokens that will be considered valid.
|
||
// `toks` above is set up already for this,
|
||
// which allows us to assert that we received back the same `sut`.
|
||
let mut sut = result.unwrap_err().0;
|
||
assert_eq!(Some(Ok(Parsed::Object(recovery))), sut.next());
|
||
|
||
// And so we should now be in an accepting state,
|
||
// able to finalize.
|
||
assert!(sut.finalize().is_ok());
|
||
}
|
||
|
||
#[test]
|
||
fn unhandled_dead_state_results_in_error() {
|
||
// A Text will cause our parser to return Dead.
|
||
let tok = TestToken::Text(DS);
|
||
let mut toks = once(tok.clone());
|
||
|
||
let mut sut = Sut::from(&mut toks);
|
||
|
||
// Our parser returns a Dead status,
|
||
// which is unhandled by any parent context
|
||
// (since we're not composing parsers),
|
||
// which causes an error due to an unhandled Dead state.
|
||
assert_eq!(sut.next(), Some(Err(ParseError::UnexpectedToken(tok))),);
|
||
}
|
||
|
||
// A context can be both retrieved from a finished parser and provided
|
||
// to a new one.
|
||
#[test]
|
||
fn provide_and_retrieve_context() {
|
||
// First, verify that it's initialized to a default context.
|
||
let mut toks = vec![TestToken::MarkDone(DS)].into_iter();
|
||
let mut sut = Sut::from(&mut toks);
|
||
sut.next().unwrap().unwrap();
|
||
let ctx = sut.finalize().unwrap();
|
||
assert_eq!(ctx, Default::default());
|
||
|
||
// Next, verify that the context that is manipulated is the context
|
||
// that is returned to us.
|
||
let val = 5;
|
||
let mut toks = vec![TestToken::SetCtxVal(5)].into_iter();
|
||
let mut sut = Sut::from(&mut toks);
|
||
sut.next().unwrap().unwrap();
|
||
let ctx = sut.finalize().unwrap();
|
||
assert_eq!(ctx, StubContext { val });
|
||
|
||
// Finally, verify that the context provided is the context that is
|
||
// used.
|
||
let val = 10;
|
||
let given_ctx = StubContext { val };
|
||
let mut toks = vec![TestToken::MarkDone(DS)].into_iter();
|
||
let mut sut = EchoState::parse_with_context(&mut toks, given_ctx);
|
||
sut.next().unwrap().unwrap();
|
||
let ctx = sut.finalize().unwrap();
|
||
assert_eq!(ctx, StubContext { val });
|
||
}
|
||
}
|