// Parsing automaton // // Copyright (C) 2014-2022 Ryan Specialty Group, LLC. // // This file is part of TAME. // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . //! State transitions for parser automata. use super::{ParseState, ParseStateResult, ParseStatus, Token}; use std::{ convert::Infallible, hint::unreachable_unchecked, ops::{ControlFlow, FromResidual}, }; #[cfg(doc)] use super::Parser; /// A state transition with associated data. /// /// Conceptually, /// imagine the act of a state transition producing data. /// See [`Transition`] for convenience methods for producing this tuple. /// /// Sometimes a parser is not able to complete the operation requested /// based on the provided input token. /// Since TAMER uses a streaming parsing framework that places strict /// limits on control flow, /// a single token can be returned as lookahead to indicate that the /// token could not be parsed yet and should be provided once again /// in place of the next token from the input stream. /// This allows, /// for example, /// for multiple data to be emitted in response to a single token. /// /// This struct is opaque to ensure that critical invariants involving /// transitions and lookahead are properly upheld; /// callers must use the appropriate parsing APIs. #[derive(Debug, PartialEq)] pub struct TransitionResult( /// New parser state. pub(in super::super) Transition, /// Result of the parsing operation. pub(in super::super) TransitionData, ); impl TransitionResult { /// Indicate that this transition include a single token of lookahead, /// which should be provided back to the parser in place of the /// next token from the input stream. pub fn with_lookahead(self, lookahead: S::Token) -> Self { match self { Self(transition, TransitionData::Result(result, None)) => Self( transition, TransitionData::Result(result, Some(Lookahead(lookahead))), ), // This represents a problem with the parser; // we should never specify a lookahead token more than once. // This could be enforced statically with the type system if // ever such a thing is deemed to be worth doing. Self( .., TransitionData::Result(_, Some(prev)) | TransitionData::Dead(prev), ) => { panic!("internal error: lookahead token overwrite: {prev:?}") } } } } /// Token to use as a lookahead token in place of the next token from the /// input stream. #[derive(Debug, PartialEq)] pub struct Lookahead(pub(in super::super) T); /// Information about the state transition. /// /// Note: Ideally a state wouldn't even be required for /// [`Dead`](TransitionData::Dead), /// but [`ParseState`] does not implement [`Default`] and [`Parser`] /// requires _some_ state exist. #[derive(Debug, PartialEq)] pub(in super::super) enum TransitionData { /// State transition was successful or not attempted, /// with an optional token of [`Lookahead`]. /// /// Note that a successful state transition _does not_ imply a /// successful [`ParseStateResult`]---the /// parser may choose to successfully transition into an error /// recovery state to accommodate future tokens. Result(ParseStateResult, Option>), /// No valid state transition exists from the current state for the /// given input token, /// which is returned as a token of [`Lookahead`]. /// /// A dead state is an accepting state that has no state transition for /// the given token. /// This could simply mean that the parser has completed its job and /// that control must be returned to a parent context. /// Note that this differs from an error state, /// where a parser is unable to reach an accepting state because it /// received unexpected input. /// /// Note that the parser may still choose to perform a state transition /// for the sake of error recovery, /// but note that the dead state is generally interpreted to mean /// "I have no further work that I am able to perform" /// and may lead to finalization of the parser. /// If a parser intends to do additional work, /// it should return an error instead via [`TransitionData::Result`]. Dead(Lookahead), } /// A verb denoting a state transition. /// /// This is typically instantiated directly by a [`ParseState`] to perform a /// state transition in [`ParseState::parse_token`]. /// /// This newtype was created to produce clear, self-documenting code; /// parsers can get confusing to read with all of the types involved, /// so this provides a mental synchronization point. /// /// This also provides some convenience methods to help remove boilerplate /// and further improve code clarity. #[derive(Debug, PartialEq, Eq)] pub struct Transition(pub S); impl Transition { /// A state transition with corresponding data. /// /// This allows [`ParseState::parse_token`] to emit a parsed object and /// corresponds to [`ParseStatus::Object`]. pub fn ok(self, obj: T) -> TransitionResult where T: Into>, { TransitionResult(self, TransitionData::Result(Ok(obj.into()), None)) } /// A transition with corresponding error. /// /// This indicates a parsing failure. /// The state ought to be suitable for error recovery. pub fn err>(self, err: E) -> TransitionResult { TransitionResult(self, TransitionData::Result(Err(err.into()), None)) } /// A state transition with corresponding [`Result`]. /// /// This translates the provided [`Result`] in a manner equivalent to /// [`Transition::ok`] and [`Transition::err`]. pub fn result(self, result: Result) -> TransitionResult where T: Into>, E: Into, { TransitionResult( self, TransitionData::Result( result.map(Into::into).map_err(Into::into), None, ), ) } /// A state transition indicating that more data is needed before an /// object can be emitted. /// /// This corresponds to [`ParseStatus::Incomplete`]. pub fn incomplete(self) -> TransitionResult { TransitionResult( self, TransitionData::Result(Ok(ParseStatus::Incomplete), None), ) } /// A state transition could not be performed and parsing will not /// continue. /// /// A dead state represents an _accepting state_ that has no edge to /// another state for the given `tok`. /// Rather than throw an error, /// a parser uses this status to indicate that it has completed /// parsing and that the token should be utilized elsewhere; /// the provided token will be used as a token of [`Lookahead`]. /// /// If a parser is not prepared to be finalized and needs to yield an /// object first, /// use [`Transition::result`] or other methods along with a token /// of [`Lookahead`]. pub fn dead(self, tok: S::Token) -> TransitionResult { TransitionResult(self, TransitionData::Dead(Lookahead(tok))) } } impl FromResidual<(Transition, ParseStateResult)> for TransitionResult { fn from_residual(residual: (Transition, ParseStateResult)) -> Self { match residual { (st, result) => Self(st, TransitionData::Result(result, None)), } } } impl FromResidual>> for TransitionResult { fn from_residual( residual: Result>, ) -> Self { match residual { Err(e) => e, // SAFETY: This match arm doesn't seem to be required in // core::result::Result's FromResidual implementation, // but as of 1.61 nightly it is here. // Since this is Infallable, // it cannot occur. Ok(_) => unsafe { unreachable_unchecked() }, } } } impl FromResidual, Infallible>> for TransitionResult { fn from_residual( residual: ControlFlow, Infallible>, ) -> Self { match residual { ControlFlow::Break(result) => result, // SAFETY: Infallible, so cannot hit. ControlFlow::Continue(_) => unsafe { unreachable_unchecked() }, } } } /// An object able to be used as data for a state [`Transition`]. /// /// This flips the usual order of things: /// rather than using a method of [`Transition`] to provide data, /// this starts with the data and produces a transition from it. /// This is sometimes necessary to satisfy ownership/borrowing rules. /// /// This trait simply removes boilerplate associated with storing /// intermediate values and translating into the resulting type. pub trait Transitionable { /// Perform a state transition to `S` using [`Self`] as the associated /// data. /// /// This may be necessary to satisfy ownership/borrowing rules when /// state data from `S` is used to compute [`Self`]. fn transition(self, to: S) -> TransitionResult; } impl Transitionable for Result, E> where S: ParseState, ::Error: From, { fn transition(self, to: S) -> TransitionResult { Transition(to).result(self) } } impl Transitionable for Result<(), E> where S: ParseState, ::Error: From, { fn transition(self, to: S) -> TransitionResult { Transition(to).result(self.map(|_| ParseStatus::Incomplete)) } }