tamer: parse::util: Introduce StitchableExpansionState

This parser really just allows me to continue developing the NIR
interpolation system using `Expansion` terminology, and avoid having to use
dead states in tests.  This allows for the appropriate level of abstraction
to be used in isolation, and then only be stripped when stitching is
necessary.

Future commits will show how this is actually integrated and may introduce
additional abstraction to help.

DEV-13156
main
Mike Gerwitz 2022-11-14 16:15:52 -05:00
parent 4117efc50c
commit 03cf652c41
4 changed files with 323 additions and 4 deletions

View File

@ -50,7 +50,8 @@ use std::{
/// parser.
pub mod prelude {
pub use super::{
Context, Object, ParseState, Token, Transition, TransitionResult,
ClosedParseState, Context, Object, ParseError, ParseState, ParseStatus,
Parsed, Token, Transition, TransitionResult,
};
}

View File

@ -204,6 +204,48 @@ impl<S: ParseState> TransitionData<S> {
_ => None,
}
}
/// Map [`TransitionData`] when the inner result is of type
/// [`ParseStatus::Object`].
///
/// This will map over `self` within the context of an inner
/// [`ParseStatus::Object`] and an associated optional token of
/// [`Lookahead`].
/// This allows using objects to influence parser operations more
/// broadly.
///
/// _This method is private to this module because it requires that the
/// caller be diligent in not discarding the provided token of
/// lookahead._
/// Since this token may be stored and later emitted,
/// there is no reliable automated way at present to ensure that this
/// invariant is upheld;
/// such an effort is far beyond the scope of current work at the
/// time of writing.
pub(in super::super) fn map_when_obj<SB: ParseState>(
self,
f: impl FnOnce(S::Object, Option<Lookahead<S::Token>>) -> TransitionData<SB>,
) -> TransitionData<SB>
where
SB: ParseState<Token = S::Token, Error = S::Error>,
{
// Ideally this will be decomposed into finer-grained functions
// (as in a more traditional functional style),
// but such wasn't needed at the time of writing.
// But this is dizzying.
match self {
TransitionData::Result(Ok(ParseStatus::Object(obj)), la) => {
f(obj, la)
}
TransitionData::Result(Ok(ParseStatus::Incomplete), la) => {
TransitionData::Result(Ok(ParseStatus::Incomplete), la)
}
TransitionData::Result(Err(e), la) => {
TransitionData::Result(Err(e), la)
}
TransitionData::Dead(la) => TransitionData::Dead(la),
}
}
}
/// A verb denoting a state transition.

View File

@ -25,10 +25,13 @@
//! they provide wrappers around core functionality that make it easier
//! to use outside of the domain of the parsing system itself.
use crate::{span::Span, sym::SymbolId};
use crate::{diagnose::Annotate, diagnostic_panic, span::Span, sym::SymbolId};
use super::{Object, ParseState, Token};
use std::fmt::Display;
use super::{
prelude::*,
state::{Lookahead, TransitionData},
};
use std::{fmt::Display, marker::PhantomData};
pub trait ExpandingParseState<T: Token, O: Object> =
ParseState<Token = T, Object = Expansion<T, O>>;
@ -49,6 +52,133 @@ pub enum Expansion<T, O: Object> {
impl<T: Token, O: Object> Object for Expansion<T, O> {}
/// Convert a [`ClosedParseState`] yielding an [`Expansion<T,O>`](Expansion)
/// object into a parser yielding `O` with a dead state yielding `T`.
///
/// It is more convenient and clear to write parsers using [`Expansion`],
/// since those variants not only state directly what the intent of the
/// operations are,
/// but also avoid having to work with dead states.
/// However,
/// their wrapping in [`Expansion`] makes them difficult to delegate to
/// (compose with)
/// other parsers using [`ParseState`]'s `delegate_*` family of
/// functions.
///
/// This parser handles this translation by stripping away the
/// [`Expansion`] abstraction and producing a [`ParseState`] that looks
/// and acts like what would have been implemented in the absence of such
/// an abstraction.
#[derive(Debug, PartialEq, Eq)]
pub struct StitchableExpansionState<S: ClosedParseState, O: Object> {
st: S,
_phantom: PhantomData<O>,
}
// We implement Default if the parser `S` that we're wrapping does.
impl<S: ClosedParseState, O: Object> Default for StitchableExpansionState<S, O>
where
S: Default,
{
fn default() -> Self {
Self {
st: Default::default(),
_phantom: Default::default(),
}
}
}
impl<S: ClosedParseState, O: Object> ParseState
for StitchableExpansionState<S, O>
where
O: Token + Eq,
S: ParseState<Object = Expansion<<S as ParseState>::Token, O>>,
{
type Token = S::Token;
type Object = O;
type Error = S::Error;
type Context = S::Context;
#[inline]
fn parse_token(
self,
tok: Self::Token,
ctx: &mut Self::Context,
) -> TransitionResult<Self::Super> {
use Expansion::*;
match self {
Self { st, _phantom } => {
let TransitionResult(Transition(st_new), data) =
st.parse_token(tok, ctx);
let data_new = data.map_when_obj(|obj, la| match (obj, la) {
(Expanded(obj), la) => {
TransitionData::Result(Ok(ParseStatus::Object(obj)), la)
}
// A parser must never throw away lookahead tokens.
// Since we are converting the `DoneExpanding` variant
// into a lookahead token,
// we would have nothing to do with a token of
// lookahead if one were provided to us.
// Ideally this would be prevented using types,
// but such a change is too much effort at the time of
// writing.
(DoneExpanding(tok), Some(Lookahead(la_tok))) => {
let desc = vec![
tok.span().note(
"while processing this \
Expansion::DoneExpanding token",
),
la_tok.span().internal_error(
"encountered this unexpected lookahead token",
),
];
diagnostic_panic!(
desc,
"cannot provide lookahead token with \
Expansion::DoneExpanding",
)
}
(DoneExpanding(tok), None) => {
TransitionData::Dead(Lookahead(tok))
}
});
TransitionResult(
Transition(Self {
st: st_new,
_phantom,
}),
data_new,
)
}
}
}
fn is_accepting(&self, ctx: &Self::Context) -> bool {
self.st.is_accepting(ctx)
}
}
impl<S: ClosedParseState, O: Object> Display
for StitchableExpansionState<S, O>
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self {
st: parser,
_phantom,
} => {
write!(f, "{parser}, with Expansion stripped")
}
}
}
}
/// A [`SymbolId`] with a corresponding [`Span`].
///
/// This newtype is required because foreign traits
@ -90,3 +220,6 @@ impl Into<(SymbolId, Span)> for SPair {
}
}
}
#[cfg(test)]
mod test;

View File

@ -0,0 +1,143 @@
// Tests for TAMER parsing framework utilities
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::*;
use crate::{span::dummy::*, sym::st::raw};
use std::{assert_matches::assert_matches, convert::Infallible};
#[derive(Debug, PartialEq, Eq)]
struct TestObject(SPair);
impl Token for TestObject {
fn ir_name() -> &'static str {
"TestObject"
}
fn span(&self) -> Span {
match self {
Self(SPair(_, span)) => *span,
}
}
}
impl Display for TestObject {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self(spair) => Display::fmt(spair, f),
}
}
}
impl Object for TestObject {}
/// Just some parser to wrap for our tests.
///
/// Eventually we'll be able to more easily create these on-demand without so
/// so much boilerplate,
/// but that hasn't evolved yet.
#[derive(Debug, PartialEq, Eq, Default)]
struct TestParseState;
impl ParseState for TestParseState {
type Token = SPair;
type Object = Expansion<Self::Token, TestObject>;
type Error = Infallible;
fn parse_token(
self,
tok: Self::Token,
_ctx: &mut Self::Context,
) -> TransitionResult<Self::Super> {
match tok {
tok @ SPair(sym @ (STOP | DEAD_SYM), span) => {
let st = Transition(self).ok(Expansion::DoneExpanding(tok));
st.maybe_with_lookahead(if sym == DEAD_SYM {
// It doesn't matter what this token is for our tests.
Some(Lookahead(SPair(sym, span)))
} else {
None
})
}
_ => Transition(self).ok(Expansion::Expanded(TestObject(tok))),
}
}
fn is_accepting(&self, _ctx: &Self::Context) -> bool {
true
}
}
impl Display for TestParseState {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "doing its thing") // well, it is
}
}
const STOP: SymbolId = raw::L_YIELD;
const DEAD_SYM: SymbolId = raw::L_WARNING;
type ExpansionSut = StitchableExpansionState<TestParseState, TestObject>;
#[test]
fn expansion_can_be_stripped_for_stitching() {
let syma = "foo".into();
let symb = "bar".into();
let toks = vec![SPair(syma, S1), SPair(symb, S2), SPair(STOP, S3)];
// The wraps the above TestParseState to strip Expansion.
let mut sut = ExpansionSut::parse(toks.into_iter());
// Our test parser echoes back the tokens wrapped in an "expanded"
// `TestObject` until we reach `STOP`.
// The first two are expanded,
// and our SUT strips the expansion.
assert_eq!(
sut.next(),
Some(Ok(Parsed::Object(TestObject(SPair(syma, S1))))),
);
assert_eq!(
sut.next(),
Some(Ok(Parsed::Object(TestObject(SPair(symb, S2))))),
);
// The final `Expansion::DoneExpanding` is converted into a dead state
// transition.
// That manifests here as an `UnexpectedToken` error because nothing
// handled it within our parser,
// but this is expected to stitched via delegation,
// which _would_ handle this case.
assert_matches!(
sut.next(),
Some(Err(ParseError::UnexpectedToken(dead_tok, _)))
if dead_tok == SPair(STOP, S3)
);
}
// We must not lose lookahead tokens;
// see SUT for more information.
#[should_panic]
#[test]
fn expansion_stripping_panics_if_lookahead() {
let toks = vec![SPair(DEAD_SYM, S1)];
// The above token will trigger the panic on the first call.
let _ = ExpansionSut::parse(toks.into_iter()).next();
}