tamer: nir::desugar::interp: Generalize without NIR symbol types

This is a shift in approach.

My original idea was to try to keep NIR parsing the way it was, since it's
already hard enough to reason about with the `ele_parse!` parser-generator
macro mess.  The idea was to produce an IR that would explicitly be denoted
as "maybe sugared", and have a desugaring operation as part of the lowering
pipeline that would perform interpolation and lower the symbol into a plain
version.

The problem with that is:

  1. The use of the type was going to introduce a lot of mapping for all the
     NIR token variants there are going to be; and
  2. _The types weren't even utilized for interpolation._

Instead, if we interpolated _as attributes are encountered_ while parsing
NIR, then we'd be able to expand directly into that NIR token stream and
handle _all_ symbols in a generic way, without any mapping beyond the
definition of NIR's grammar using `ele_parse!`.

This is a step in that direction---it removes `NirSymbolTy` and introduces a
generic abstraction for the concept of expansion, which will be utilized
soon by the attribute parser to allow replacing `TryFrom` with something
akin to `ParseFrom`, or something like that, which is able to produce a
token stream before finally yielding the value of the attribute (which will
be either the original symbol or the replacement metavariable, in the case
of interpolation).

(Note that interpolation isn't yet finished---errors still need to be
implemented.  But I want a working vertical slice first.)

DEV-13156
main
Mike Gerwitz 2022-11-10 12:14:56 -05:00
parent 8a430a52bc
commit 4117efc50c
4 changed files with 135 additions and 102 deletions

View File

@ -39,7 +39,7 @@
//! The string `foo{@bar@}baz` is the interpolation specification.
//! This ends up desugaring into the [`PlainNir`] equivalent of this:
//!
//! ```xm
//! ```xml
//! <param name="@___dsgr_01@"
//! desc="Generated from interpolated string `foo{@bar@}baz`">
//! <text>foo</text>
@ -69,12 +69,6 @@
//! then it is interpreted as a literal within the context of the template
//! system and is echoed back unchanged.
//!
//! NB: All attributes are reasoned about as string literals until they
//! contain no metavariables,
//! which may require expansion via the template system;
//! the [`NirSymbolTy`] represents the type that the literal will
//! _ultimately_ be parsed as once that time comes.
//!
//! Desugared Spans
//! ---------------
//! [`Span`]s for the generated tokens are derived from the specification
@ -105,11 +99,15 @@
use memchr::memchr;
use super::super::{NirSymbolTy, PlainNir, PlainNirSymbol, SugaredNirSymbol};
use super::super::{PlainNir, PlainNirSymbol};
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
fmt::{DisplayWrapper, TtQuote},
parse::{prelude::*, NoContext},
parse::{
prelude::*,
util::{Expansion, SPair},
NoContext,
},
span::Span,
sym::{
st::quick_contains_byte, GlobalSymbolIntern, GlobalSymbolResolve,
@ -119,60 +117,9 @@ use crate::{
use std::{error::Error, fmt::Display};
// Expose variants for enums defined in this module to reduce verbosity.
use InterpObject::*;
use Expansion::*;
use InterpState::*;
/// Object resulting from interpolation.
///
/// The provided [`SugaredNirSymbol`] is interpreted as a specification for
/// interpolation.
/// This specification is expanded into a sequence of [`PlainNir`] tokens
/// via the [`Expanded`](Self::Expanded) variant,
/// representing the definition of a template parameter whose default
/// value will yield the equivalent of the specification.
///
/// After expansion,
/// the original [`SugaredNirSymbol`] is expected to be replaced with a
/// [`PlainNirSymbol`] via the [`ReplaceSym`](Self::ReplaceSym) variant,
/// containing the name of the newly-generated metavariable.
#[derive(Debug, PartialEq, Eq)]
pub enum InterpObject<const TY: NirSymbolTy> {
/// A token generated as part of interpolation which is to be merged
/// into the NIR token stream.
Expanded(PlainNir),
/// Interpolation has resulted in the creation of a new metavariable
/// which should take place of the original NIR symbol containing the
/// interpolation specification.
ReplaceSym(PlainNirSymbol<TY>),
}
impl<const TY: NirSymbolTy> Token for InterpObject<TY> {
fn ir_name() -> &'static str {
"Interpolation"
}
fn span(&self) -> Span {
match self {
Self::Expanded(nir) => nir.span(),
Self::ReplaceSym(nir_sym) => nir_sym.span(),
}
}
}
impl<const TY: NirSymbolTy> Object for InterpObject<TY> {}
impl<const TY: NirSymbolTy> Display for InterpObject<TY> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
InterpObject::Expanded(nir) => write!(f, "interpolated {nir}"),
InterpObject::ReplaceSym(nir_sym) => {
write!(f, "interpolation specification replacement {nir_sym}")
}
}
}
}
/// A generated identifier.
#[derive(Debug, PartialEq, Eq)]
pub struct GenIdentSymbolId(SymbolId);
@ -203,7 +150,7 @@ type SpecOffset = usize;
/// For more information,
/// see the [parent module](super).
#[derive(Debug, PartialEq, Eq, Default)]
pub enum InterpState<const TY: NirSymbolTy> {
pub enum InterpState {
/// The next token will be inspected to determine whether it requires
/// interpolation.
#[default]
@ -224,7 +171,7 @@ pub enum InterpState<const TY: NirSymbolTy> {
FinishSym(SpecSlice, GenIdentSymbolId),
}
impl<const TY: NirSymbolTy> Display for InterpState<TY> {
impl Display for InterpState {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use InterpState::*;
@ -260,9 +207,9 @@ impl<const TY: NirSymbolTy> Display for InterpState<TY> {
}
}
impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
type Token = SugaredNirSymbol<TY>;
type Object = InterpObject<TY>;
impl ParseState for InterpState {
type Token = SPair;
type Object = Expansion<SPair, PlainNir>;
type Error = InterpError;
fn parse_token(
@ -270,7 +217,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
tok: Self::Token,
_: NoContext,
) -> TransitionResult<Self> {
match (self, tok) {
match (self, tok.into()) {
// When receiving a new symbol,
// we must make a quick determination as to whether it
// requires desugaring.
@ -280,7 +227,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
// filter out non-interpolated strings quickly,
// before we start to parse.
// Symbols that require no interpoolation are simply echoed back.
(Ready, SugaredNirSymbol(sym, span)) => {
(Ready, (sym, span)) => {
if needs_interpolation(sym) {
Self::begin_expansion(sym, span)
} else {
@ -292,10 +239,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
// The outermost parsing context is that of the literal,
// where a sequence of characters up to `{` stand for
// themselves.
(
ParseLiteralAt(s, gen_param, offset),
SugaredNirSymbol(sym, span),
) => {
(ParseLiteralAt(s, gen_param, offset), (sym, span)) => {
if offset == s.len() {
// We've reached the end of the specification string.
// Since we're in the outermost (literal) context,
@ -312,7 +256,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
Some(0) => {
Transition(ParseInterpAt(s, gen_param, offset + 1))
.incomplete()
.with_lookahead(SugaredNirSymbol(sym, span))
.with_lookahead((sym, span).into())
}
// Everything from the offset until the curly brace is a
@ -329,7 +273,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
Transition(ParseInterpAt(s, gen_param, end + 1))
.ok(Expanded(text))
.with_lookahead(SugaredNirSymbol(sym, span))
.with_lookahead((sym, span).into())
}
// The remainder of the specification is a literal.
@ -345,7 +289,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
// we'll complete parsing next pass.
Transition(ParseLiteralAt(s, gen_param, s.len()))
.ok(Expanded(text))
.with_lookahead(SugaredNirSymbol(sym, span))
.with_lookahead((sym, span).into())
}
}
}
@ -355,10 +299,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
// This is an inner context that cannot complete without being
// explicitly closed,
// and cannot not be nested.
(
ParseInterpAt(s, gen_param, offset),
SugaredNirSymbol(sym, span),
) => {
(ParseInterpAt(s, gen_param, offset), (sym, span)) => {
// TODO: Make sure offset exists, avoid panic
// TODO: Prevent nested `{`.
@ -385,7 +326,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
// back in a literal context.
Transition(ParseLiteralAt(s, gen_param, end + 1))
.ok(Expanded(param_value))
.with_lookahead(SugaredNirSymbol(sym, span))
.with_lookahead((sym, span).into())
}
None => todo!("missing closing '}}'"),
@ -397,10 +338,9 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
// (the interpolation specification)
// with a metavariable referencing the parameter that we just
// generated.
(
FinishSym(_, GenIdentSymbolId(gen_param)),
SugaredNirSymbol(_, span),
) => Self::yield_symbol(gen_param, span),
(FinishSym(_, GenIdentSymbolId(gen_param)), (_, span)) => {
Self::yield_symbol(gen_param, span)
}
}
}
@ -409,7 +349,7 @@ impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
}
}
impl<const TY: NirSymbolTy> InterpState<TY> {
impl InterpState {
/// Yield the final result of this operation in place of the original
/// specification string,
/// which may or may not have required interpolation.
@ -423,7 +363,7 @@ impl<const TY: NirSymbolTy> InterpState<TY> {
/// This transitions back to [`Ready`] and finally releases the
/// lookahead symbol.
fn yield_symbol(sym: SymbolId, span: Span) -> TransitionResult<Self> {
Transition(Ready).ok(ReplaceSym(PlainNirSymbol::Todo(sym, span)))
Transition(Ready).ok(DoneExpanding((sym, span).into()))
}
/// Begin expansion of an interpolation specification by generating a
@ -457,7 +397,7 @@ impl<const TY: NirSymbolTy> InterpState<TY> {
// prefixes.
Transition(ParseLiteralAt(sym.lookup_str(), gen_param, 0))
.ok(Expanded(open))
.with_lookahead(SugaredNirSymbol(sym, span))
.with_lookahead((sym, span).into())
}
/// Complete expansion of an interpolation specification string.
@ -478,7 +418,7 @@ impl<const TY: NirSymbolTy> InterpState<TY> {
// (the specification string).
Transition(FinishSym(s, gen_param))
.ok(Expanded(close))
.with_lookahead(SugaredNirSymbol(sym, span))
.with_lookahead((sym, span).into())
}
}

View File

@ -17,18 +17,17 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::{super::super::NirSymbolTy::*, *};
use super::*;
use crate::{
nir::{PlainNirSymbol, SugaredNirSymbol},
nir::PlainNirSymbol,
parse::Parsed,
span::dummy::{DUMMY_CONTEXT as DC, *},
sym::GlobalSymbolResolve,
};
use std::assert_matches::assert_matches;
use Parsed::*;
type Sut<const TY: NirSymbolTy> = InterpState<TY>;
type Sut = InterpState;
// While it'd be semantically valid to desugar a literal into a template
// param,
@ -41,10 +40,10 @@ fn does_not_desugar_literal_only() {
// but it's also a literal because it's not enclosed in braces.
for literal in ["foo", "@bar@"] {
let sym = literal.into();
let toks = vec![SugaredNirSymbol::<{ StringLiteral }>(sym, S1)];
let toks = vec![SPair(sym, S1)];
assert_eq!(
Ok(vec![Object(ReplaceSym(PlainNirSymbol::Todo(sym, S1)))]),
Ok(vec![Object(DoneExpanding(SPair(sym, S1)))]),
Sut::parse(toks.into_iter()).collect(),
"literal `{literal}` must not desugar",
);
@ -70,7 +69,7 @@ fn desugars_literal_with_ending_var() {
let b = DC.span(10, 3);
let c = DC.span(14, 5);
let given_sym = SugaredNirSymbol::<{ StringLiteral }>(given_val.into(), a);
let given_sym = SPair(given_val.into(), a);
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
@ -124,7 +123,7 @@ fn desugars_literal_with_ending_var() {
// with a metavariable reference to the generated parameter.
assert_matches!(
sut.next(),
Some(Ok(Object(ReplaceSym(PlainNirSymbol::Todo(given_replace, given_span)))))
Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);
@ -149,7 +148,7 @@ fn desugars_var_with_ending_literal() {
let b = DC.span(21, 5);
let c = DC.span(27, 3);
let given_sym = SugaredNirSymbol::<{ StringLiteral }>(given_val.into(), a);
let given_sym = SPair(given_val.into(), a);
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
@ -190,7 +189,7 @@ fn desugars_var_with_ending_literal() {
assert_matches!(
sut.next(),
Some(Ok(Object(ReplaceSym(PlainNirSymbol::Todo(given_replace, given_span)))))
Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);
@ -216,7 +215,7 @@ fn desugars_many_vars_and_literals() {
let d = DC.span(40, 3);
let e = DC.span(44, 6);
let given_sym = SugaredNirSymbol::<{ StringLiteral }>(given_val.into(), a);
let given_sym = SPair(given_val.into(), a);
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
@ -264,7 +263,7 @@ fn desugars_many_vars_and_literals() {
assert_matches!(
sut.next(),
Some(Ok(Object(ReplaceSym(PlainNirSymbol::Todo(given_replace, given_span)))))
Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);
@ -287,7 +286,7 @@ fn desugars_adjacent_interpolated_vars() {
let c = DC.span(48, 5);
let d = DC.span(55, 5);
let given_sym = SugaredNirSymbol::<{ StringLiteral }>(given_val.into(), a);
let given_sym = SPair(given_val.into(), a);
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
@ -329,7 +328,7 @@ fn desugars_adjacent_interpolated_vars() {
assert_matches!(
sut.next(),
Some(Ok(Object(ReplaceSym(PlainNirSymbol::Todo(given_replace, given_span)))))
Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);

View File

@ -27,6 +27,8 @@ mod parser;
mod state;
mod trace;
pub mod util;
pub use error::{FinalizeError, ParseError};
pub use lower::{Lower, LowerIter, ParsedObject};
pub use parser::{FinalizedParser, Parsed, ParsedResult, Parser};

View File

@ -0,0 +1,92 @@
// TAMER parsing framework utilities
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Utilities that make parsing practical and convenient in different
//! contexts.
//!
//! The utilities presented here do not introduce any new capabilities into
//! the system;
//! they provide wrappers around core functionality that make it easier
//! to use outside of the domain of the parsing system itself.
use crate::{span::Span, sym::SymbolId};
use super::{Object, ParseState, Token};
use std::fmt::Display;
pub trait ExpandingParseState<T: Token, O: Object> =
ParseState<Token = T, Object = Expansion<T, O>>;
/// Represents an expansion operation on some source token of type `T`.
///
/// See variants and [`ExpandingParseState`] for more information.
#[derive(Debug, PartialEq, Eq)]
pub enum Expansion<T, O: Object> {
/// A token of type `O` has been derived from the source token and
/// should be merged into the target token stream.
Expanded(O),
/// Expansion is complete and the source token should be replaced with
/// the inner `T`.
DoneExpanding(T),
}
impl<T: Token, O: Object> Object for Expansion<T, O> {}
/// A [`SymbolId`] with a corresponding [`Span`].
///
/// This newtype is required because foreign traits
/// (such as [`Display`])
/// cannot be implemented on tuples at the time of writing.
#[derive(Debug, PartialEq, Eq)]
pub struct SPair(pub SymbolId, pub Span);
impl Token for SPair {
fn ir_name() -> &'static str {
"Generic Symbol"
}
fn span(&self) -> Span {
match self {
Self(_, span) => *span,
}
}
}
impl Display for SPair {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self(sym, _) => Display::fmt(sym, f),
}
}
}
impl From<(SymbolId, Span)> for SPair {
fn from((sym, span): (SymbolId, Span)) -> Self {
Self(sym, span)
}
}
impl Into<(SymbolId, Span)> for SPair {
fn into(self) -> (SymbolId, Span) {
match self {
Self(sym, span) => (sym, span),
}
}
}