tamer: nir::desugar: Initial interpolation desugaring
This demonstrates how desugaring of interpolated strings will work, testing one of the happy paths. The remaining work to be done is largely refactoring; handling some other cases; and errors. Each of those items are marked with `todo!`s. I'm pleased with how this is turning out, and I'm excited to see diagnostic reporting within the specification string using the derived spans once I get a bit further along; this robust system is going to be much more helpful to developers than the existing system in XSLT. This also eliminates the ~50% performance degredation mentioned in a recent commit by eliminating the SugaredNirSymbol enum and replacing it with a newtype; this is a much better approach, though it doesn't change that I do need to eventually address the excessive `memcpy`s on hot code paths. DEV-13156main
parent
66f09fa4c9
commit
4a7fe887d5
167
tamer/src/nir.rs
167
tamer/src/nir.rs
|
@ -57,14 +57,13 @@ use crate::{
|
|||
fmt::{DisplayWrapper, TtQuote},
|
||||
parse::{Object, Token},
|
||||
span::{Span, UNKNOWN_SPAN},
|
||||
sym::{st::quick_contains_byte, GlobalSymbolResolve, SymbolId},
|
||||
sym::SymbolId,
|
||||
xir::{
|
||||
attr::{Attr, AttrSpan},
|
||||
fmt::TtXmlAttr,
|
||||
QName,
|
||||
},
|
||||
};
|
||||
use memchr::memchr;
|
||||
use std::{
|
||||
convert::Infallible,
|
||||
error::Error,
|
||||
|
@ -76,6 +75,8 @@ pub use parse::{
|
|||
NirParseState as XirfToNir, NirParseStateError_ as XirfToNirError,
|
||||
};
|
||||
|
||||
use NirSymbolTy::*;
|
||||
|
||||
/// IR that is "near" the source code,
|
||||
/// without its syntactic sugar.
|
||||
///
|
||||
|
@ -89,18 +90,34 @@ pub use parse::{
|
|||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum PlainNir {
|
||||
Todo,
|
||||
|
||||
TplParamOpen(Plain<{ TplParamIdent }>, Plain<{ DescLiteral }>),
|
||||
TplParamClose(Span),
|
||||
TplParamText(Plain<{ StringLiteral }>),
|
||||
TplParamValue(Plain<{ TplParamIdent }>),
|
||||
}
|
||||
|
||||
type Plain<const TY: NirSymbolTy> = PlainNirSymbol<TY>;
|
||||
|
||||
impl Token for PlainNir {
|
||||
fn ir_name() -> &'static str {
|
||||
"Plain NIR"
|
||||
}
|
||||
|
||||
/// Identifying span of a token.
|
||||
///
|
||||
/// An _identifying span_ is a selection of one of the (potentially
|
||||
/// many) spans associated with a token that is most likely to be
|
||||
/// associated with the identity of that token.
|
||||
fn span(&self) -> Span {
|
||||
use PlainNir::*;
|
||||
|
||||
match self {
|
||||
Todo => UNKNOWN_SPAN,
|
||||
TplParamOpen(dfn, _) => dfn.span(),
|
||||
TplParamClose(span) => *span,
|
||||
TplParamText(text) => text.span(),
|
||||
TplParamValue(ident) => ident.span(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -113,6 +130,16 @@ impl Display for PlainNir {
|
|||
|
||||
match self {
|
||||
Todo => write!(f, "TODO"),
|
||||
TplParamOpen(dfn, desc) => {
|
||||
write!(f, "open template param {dfn} ({desc})")
|
||||
}
|
||||
TplParamClose(_span) => write!(f, "close template param"),
|
||||
TplParamText(text) => {
|
||||
write!(f, "open template param default text {text}")
|
||||
}
|
||||
TplParamValue(ident) => {
|
||||
write!(f, "value of template param {ident}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -247,12 +274,9 @@ impl Display for NirSymbolTy {
|
|||
}
|
||||
}
|
||||
|
||||
/// A ([`SymbolId`], [`Span`]) pair in an attribute value context that may
|
||||
/// require desugaring and interpretation within the context of a template
|
||||
/// application.
|
||||
///
|
||||
/// Interpolated values require desugaring;
|
||||
/// see [`DesugarNir`] for more information.
|
||||
/// A plain (desugared) ([`SymbolId`], [`Span`]) pair representing an
|
||||
/// attribute value that may need to be interpreted within the context of
|
||||
/// a template application.
|
||||
///
|
||||
/// _This object must be kept small_,
|
||||
/// since it is used in objects that aggregate portions of the token
|
||||
|
@ -261,26 +285,79 @@ impl Display for NirSymbolTy {
|
|||
/// and therefore cannot be optimized away as other portions of the IR.
|
||||
/// As such,
|
||||
/// this does not nest enums.
|
||||
///
|
||||
/// For the sugared form that the user may have entered themselves,
|
||||
/// see [`SugaredNirSymbol`].
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum SugaredNirSymbol<const TY: NirSymbolTy> {
|
||||
/// The symbol contains an expression representing the concatenation of
|
||||
/// any number of literals and metavariables
|
||||
/// (referred to as "string interpolation" in many languages).
|
||||
Interpolate(SymbolId, Span),
|
||||
|
||||
/// It's not ripe yet.
|
||||
///
|
||||
/// No parsing has been performed.
|
||||
pub enum PlainNirSymbol<const TY: NirSymbolTy> {
|
||||
Todo(SymbolId, Span),
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> PlainNirSymbol<TY> {
|
||||
pub fn span(&self) -> Span {
|
||||
match self {
|
||||
Self::Todo(_, span) => *span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> Display for PlainNirSymbol<TY> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Todo(sym, _) => write!(
|
||||
f,
|
||||
"TODO plain {TY} {fmt_sym}",
|
||||
fmt_sym = TtQuote::wrap(sym),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A ([`SymbolId`], [`Span`]) pair in an attribute value context that may
|
||||
/// require desugaring.
|
||||
///
|
||||
/// For more information on desugaring,
|
||||
/// see [`DesugarNir`].
|
||||
///
|
||||
/// _This object must be kept small_,
|
||||
/// since it is used in objects that aggregate portions of the token
|
||||
/// stream,
|
||||
/// which must persist in memory for a short period of time,
|
||||
/// and therefore cannot be optimized away as other portions of the IR.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct SugaredNirSymbol<const TY: NirSymbolTy>(SymbolId, Span);
|
||||
|
||||
impl<const TY: NirSymbolTy> Token for SugaredNirSymbol<TY> {
|
||||
fn ir_name() -> &'static str {
|
||||
// TODO: Include type?
|
||||
"Sugared NIR Symbol"
|
||||
}
|
||||
|
||||
fn span(&self) -> Span {
|
||||
match self {
|
||||
Self(_, span) => *span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> Display for SugaredNirSymbol<TY> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Self(sym, _span) => write!(
|
||||
f,
|
||||
"possibly-sugared {TY} {fmt_sym}",
|
||||
fmt_sym = TtQuote::wrap(sym),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Force developer to be conscious of any changes in size;
|
||||
// see `SugaredNirSymbol` docs for more information.
|
||||
assert_eq_size!(SugaredNirSymbol<{ NirSymbolTy::AnyIdent }>, u128);
|
||||
|
||||
/// Character whose presence in a string indicates that interpolation
|
||||
/// parsing must occur.
|
||||
pub const INTERPOLATE_CHAR: u8 = b'{';
|
||||
assert_eq_size!(
|
||||
SugaredNirSymbol<{ NirSymbolTy::AnyIdent }>,
|
||||
(SymbolId, Span)
|
||||
);
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum PkgType {
|
||||
|
@ -293,47 +370,16 @@ pub enum PkgType {
|
|||
Mod,
|
||||
}
|
||||
|
||||
/// Whether a value represented by the provided [`SymbolId`] requires
|
||||
/// interpolation.
|
||||
///
|
||||
/// _NB: This dereferences the provided [`SymbolId`] if it is dynamically
|
||||
/// allocated._
|
||||
///
|
||||
/// The provided value requires interpolation if it contains,
|
||||
/// anywhere in the string,
|
||||
/// the character [`INTERPOLATE_CHAR`].
|
||||
/// This does not know if the string will parse correctly;
|
||||
/// that job is left for desugaring,
|
||||
/// and so this will flag syntactically invalid interpolated strings
|
||||
/// (which is expected).
|
||||
#[inline]
|
||||
fn needs_interpolation(val: SymbolId) -> bool {
|
||||
// We can skip pre-interned symbols that we know cannot include the
|
||||
// interpolation character.
|
||||
// TODO: Abstract into `sym::symbol` module.
|
||||
let ch = INTERPOLATE_CHAR;
|
||||
quick_contains_byte(val, ch)
|
||||
.or_else(|| memchr(ch, val.lookup_str().as_bytes()).map(|_| true))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> TryFrom<(SymbolId, Span)> for SugaredNirSymbol<TY> {
|
||||
type Error = NirAttrParseError;
|
||||
|
||||
fn try_from((val, span): (SymbolId, Span)) -> Result<Self, Self::Error> {
|
||||
match needs_interpolation(val) {
|
||||
true => Ok(SugaredNirSymbol::Interpolate(val, span)),
|
||||
false => Ok(SugaredNirSymbol::Todo(val, span)),
|
||||
}
|
||||
impl<const TY: NirSymbolTy> From<(SymbolId, Span)> for SugaredNirSymbol<TY> {
|
||||
fn from((val, span): (SymbolId, Span)) -> Self {
|
||||
Self(val, span)
|
||||
}
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> TryFrom<Attr> for SugaredNirSymbol<TY> {
|
||||
type Error = NirAttrParseError;
|
||||
|
||||
fn try_from(attr: Attr) -> Result<Self, Self::Error> {
|
||||
impl<const TY: NirSymbolTy> From<Attr> for SugaredNirSymbol<TY> {
|
||||
fn from(attr: Attr) -> Self {
|
||||
match attr {
|
||||
Attr(_, val, AttrSpan(_, vspan)) => (val, vspan).try_into(),
|
||||
Attr(_, val, AttrSpan(_, vspan)) => (val, vspan).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -394,6 +440,3 @@ impl Diagnostic for NirAttrParseError {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
|
|
@ -22,14 +22,14 @@
|
|||
//! For more information on the flavors of NIR,
|
||||
//! see [the parent module](super).
|
||||
|
||||
use std::{error::Error, fmt::Display};
|
||||
|
||||
use crate::{
|
||||
diagnose::{AnnotatedSpan, Diagnostic},
|
||||
parse::{NoContext, ParseState, Transition, TransitionResult},
|
||||
};
|
||||
mod interp;
|
||||
|
||||
use super::{PlainNir, SugaredNir};
|
||||
use crate::{
|
||||
diagnose::{AnnotatedSpan, Diagnostic},
|
||||
parse::{prelude::*, NoContext},
|
||||
};
|
||||
use std::{error::Error, fmt::Display};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Default)]
|
||||
pub enum DesugarNir {
|
||||
|
|
|
@ -0,0 +1,541 @@
|
|||
// Interpolation parser for desugaring NIR
|
||||
//
|
||||
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Interpolation parser for desugaring NIR.
|
||||
//!
|
||||
//! String interpolation occurs for attributes containing curly braces
|
||||
//! (`{` and `}`)
|
||||
//! during TAME's parsing phase,
|
||||
//! before template expansion.
|
||||
//! An attribute containing curly braces is referred to in TAME as an
|
||||
//! _interpolation specification_.
|
||||
//!
|
||||
//! Interpolation is used as a form of short-hand syntactic sugar for
|
||||
//! concatenation of string literals and template metavariables,
|
||||
//! whose result is then processed by the template system.
|
||||
//! For example,
|
||||
//! consider the following code:
|
||||
//!
|
||||
//! ```xml
|
||||
//! <c:value-of name="foo{@bar@}baz" />
|
||||
//! ```
|
||||
//!
|
||||
//! The string `foo{@bar@}baz` is the interpolation specification.
|
||||
//! This ends up desugaring into the [`PlainNir`] equivalent of this:
|
||||
//!
|
||||
//! ```xm
|
||||
//! <param name="@___dsgr_01@"
|
||||
//! desc="Generated from interpolated string `foo{@bar@}baz`">
|
||||
//! <text>foo</text>
|
||||
//! <param-value name="@bar@" />
|
||||
//! <text>baz</text>
|
||||
//! </param>
|
||||
//!
|
||||
//! <c:value-of name="@___dsgr_01@" />
|
||||
//! <!-- ^^^^^^^^^^^^
|
||||
//! replacement -->
|
||||
//! ```
|
||||
//!
|
||||
//! Since interpolation currently supports only string literals and template
|
||||
//! metavariables within specifications,
|
||||
//! they are only semantically valid within the context of a template
|
||||
//! definition.
|
||||
//! This desugaring process does not check for this context;
|
||||
//! errors would occur later on in the lowering pipeline.
|
||||
//!
|
||||
//! Since interpolation desugars into [`PlainNir`],
|
||||
//! and not source XML,
|
||||
//! generated `param`s will be automatically be interpreted downstream in
|
||||
//! the lowering pipeline as if they were hoisted to the template
|
||||
//! definition header.
|
||||
//!
|
||||
//! If a string does not require interpolation,
|
||||
//! then it is interpreted as a literal within the context of the template
|
||||
//! system and is echoed back unchanged.
|
||||
//!
|
||||
//! NB: All attributes are reasoned about as string literals until they
|
||||
//! contain no metavariables,
|
||||
//! which may require expansion via the template system;
|
||||
//! the [`NirSymbolTy`] represents the type that the literal will
|
||||
//! _ultimately_ be parsed as once that time comes.
|
||||
//!
|
||||
//! Desugared Spans
|
||||
//! ---------------
|
||||
//! [`Span`]s for the generated tokens are derived from the specification
|
||||
//! string.
|
||||
//! In the above example,
|
||||
//! we have:
|
||||
//!
|
||||
//! ```xml
|
||||
//! <!--
|
||||
//! foo{@bar@}baz
|
||||
//! [-] [---] [-]
|
||||
//! A B C
|
||||
//! -->
|
||||
//!
|
||||
//! <text>foo</text>
|
||||
//! <!-- A -->
|
||||
//!
|
||||
//! <param-value name="@bar@">
|
||||
//! <!-- B -->
|
||||
//!
|
||||
//! <text>baz</text>
|
||||
//! <!-- C -->
|
||||
//! ```
|
||||
//!
|
||||
//! This means that any errors that subsequently occur due to contextual
|
||||
//! issues will be mapped back to a source location that makes sense to
|
||||
//! the user with a high level of granularity.
|
||||
|
||||
use memchr::memchr;
|
||||
|
||||
use super::super::{NirSymbolTy, PlainNir, PlainNirSymbol, SugaredNirSymbol};
|
||||
use crate::{
|
||||
diagnose::{AnnotatedSpan, Diagnostic},
|
||||
fmt::{DisplayWrapper, TtQuote},
|
||||
parse::{prelude::*, NoContext},
|
||||
span::Span,
|
||||
sym::{
|
||||
st::quick_contains_byte, GlobalSymbolIntern, GlobalSymbolResolve,
|
||||
SymbolId,
|
||||
},
|
||||
};
|
||||
use std::{error::Error, fmt::Display};
|
||||
|
||||
// Expose variants for enums defined in this module to reduce verbosity.
|
||||
use InterpObject::*;
|
||||
use InterpState::*;
|
||||
|
||||
/// Object resulting from interpolation.
|
||||
///
|
||||
/// The provided [`SugaredNirSymbol`] is interpreted as a specification for
|
||||
/// interpolation.
|
||||
/// This specification is expanded into a sequence of [`PlainNir`] tokens
|
||||
/// via the [`Expanded`](Self::Expanded) variant,
|
||||
/// representing the definition of a template parameter whose default
|
||||
/// value will yield the equivalent of the specification.
|
||||
///
|
||||
/// After expansion,
|
||||
/// the original [`SugaredNirSymbol`] is expected to be replaced with a
|
||||
/// [`PlainNirSymbol`] via the [`ReplaceSym`](Self::ReplaceSym) variant,
|
||||
/// containing the name of the newly-generated metavariable.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum InterpObject<const TY: NirSymbolTy> {
|
||||
/// A token generated as part of interpolation which is to be merged
|
||||
/// into the NIR token stream.
|
||||
Expanded(PlainNir),
|
||||
|
||||
/// Interpolation has resulted in the creation of a new metavariable
|
||||
/// which should take place of the original NIR symbol containing the
|
||||
/// interpolation specification.
|
||||
ReplaceSym(PlainNirSymbol<TY>),
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> Token for InterpObject<TY> {
|
||||
fn ir_name() -> &'static str {
|
||||
"Interpolation"
|
||||
}
|
||||
|
||||
fn span(&self) -> Span {
|
||||
match self {
|
||||
Self::Expanded(nir) => nir.span(),
|
||||
Self::ReplaceSym(nir_sym) => nir_sym.span(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> Object for InterpObject<TY> {}
|
||||
|
||||
impl<const TY: NirSymbolTy> Display for InterpObject<TY> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
InterpObject::Expanded(nir) => write!(f, "interpolated {nir}"),
|
||||
InterpObject::ReplaceSym(nir_sym) => {
|
||||
write!(f, "interpolation specification replacement {nir_sym}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A generated identifier.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct GenIdentSymbolId(SymbolId);
|
||||
|
||||
/// A dereferenced [`SymbolId`] representing an interpolation specification.
|
||||
///
|
||||
/// This saves us from having to continuously dereference the symbol for
|
||||
/// each state change.
|
||||
type SpecSlice = &'static str;
|
||||
|
||||
/// Offset within a [`SpecSlice`] to begin parsing at for the current
|
||||
/// [`InterpState`].
|
||||
type SpecOffset = usize;
|
||||
|
||||
/// Interpolation desugaring operation.
|
||||
///
|
||||
/// This parser continuously yields the provided interpolation specification
|
||||
/// token as lookahead until it has completed its parsing,
|
||||
/// allowing it to stream without buffering expansion tokens.
|
||||
///
|
||||
/// The parser has two primary contexts:
|
||||
///
|
||||
/// 1. The outer literal context represented by [`ParseLiteralAt`]; and
|
||||
/// 2. The inner interpolation context
|
||||
/// (conceptually between curly braces)
|
||||
/// represented by [`ParseInterpAt`].
|
||||
///
|
||||
/// For more information,
|
||||
/// see the [parent module](super).
|
||||
#[derive(Debug, PartialEq, Eq, Default)]
|
||||
pub enum InterpState<const TY: NirSymbolTy> {
|
||||
/// The next token will be inspected to determine whether it requires
|
||||
/// interpolation.
|
||||
#[default]
|
||||
Ready,
|
||||
|
||||
/// Interpolation will continue in a literal context at the provided
|
||||
/// offset relative to the start of the specification string.
|
||||
ParseLiteralAt(SpecSlice, GenIdentSymbolId, SpecOffset),
|
||||
|
||||
/// Like [`ParseLiteralAt`],
|
||||
/// except in the context of an interpolated value
|
||||
/// (after having encountered a curly brace).
|
||||
ParseInterpAt(SpecSlice, GenIdentSymbolId, SpecOffset),
|
||||
|
||||
/// Expansion has completed;
|
||||
/// the final step is to replace the provided specification string
|
||||
/// with a reference to the generated template param.
|
||||
FinishSym(SpecSlice, GenIdentSymbolId),
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> Display for InterpState<TY> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
use InterpState::*;
|
||||
|
||||
match self {
|
||||
Ready => write!(
|
||||
f,
|
||||
"expecting a new symbol to determine whether \
|
||||
interpolation is necessary"
|
||||
),
|
||||
|
||||
ParseLiteralAt(spec, _, x) => write!(
|
||||
f,
|
||||
"parsing specification {fmt_spec} at offset {x} \
|
||||
in a literal context",
|
||||
fmt_spec = TtQuote::wrap(spec),
|
||||
),
|
||||
|
||||
ParseInterpAt(spec, _, x) => write!(
|
||||
f,
|
||||
"parsing specification {fmt_spec} at offset {x} \
|
||||
in an interpolated value context",
|
||||
fmt_spec = TtQuote::wrap(spec),
|
||||
),
|
||||
|
||||
FinishSym(spec, GenIdentSymbolId(gen)) => write!(
|
||||
f,
|
||||
"ready to replace specification {fmt_spec} \
|
||||
with expanded metavariable reference {fmt_gen}",
|
||||
fmt_spec = TtQuote::wrap(spec),
|
||||
fmt_gen = TtQuote::wrap(gen),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
|
||||
type Token = SugaredNirSymbol<TY>;
|
||||
type Object = InterpObject<TY>;
|
||||
type Error = InterpError;
|
||||
|
||||
fn parse_token(
|
||||
self,
|
||||
tok: Self::Token,
|
||||
_: NoContext,
|
||||
) -> TransitionResult<Self> {
|
||||
match (self, tok) {
|
||||
// When receiving a new symbol,
|
||||
// we must make a quick determination as to whether it
|
||||
// requires desugaring.
|
||||
// Since the vast majority of symbols we encounter will require
|
||||
// no interpolation,
|
||||
// we first perform a separate check that is designed to
|
||||
// filter out non-interpolated strings quickly,
|
||||
// before we start to parse.
|
||||
// Symbols that require no interpoolation are simply echoed back.
|
||||
(Ready, SugaredNirSymbol(sym, span)) => {
|
||||
if needs_interpolation(sym) {
|
||||
Self::begin_expansion(sym, span)
|
||||
} else {
|
||||
// No desugaring is needed.
|
||||
Self::yield_symbol(sym, span)
|
||||
}
|
||||
}
|
||||
|
||||
// The outermost parsing context is that of the literal,
|
||||
// where a sequence of characters up to `{` stand for
|
||||
// themselves.
|
||||
(
|
||||
ParseLiteralAt(s, gen_param, offset),
|
||||
SugaredNirSymbol(sym, span),
|
||||
) => {
|
||||
if offset == s.len() {
|
||||
// We've reached the end of the specification string.
|
||||
// Since we're in the outermost (literal) context,
|
||||
// we're safe to complete.
|
||||
return Self::end_expansion(s, gen_param, sym, span);
|
||||
}
|
||||
|
||||
// Note that this is the position _relative to the offset_,
|
||||
// not the beginning of the string.
|
||||
match s[offset..].chars().position(|ch| ch == '{') {
|
||||
Some(0) => todo!("no literal prefix"),
|
||||
|
||||
// Everything from the offset until the curly brace is a
|
||||
// literal.
|
||||
Some(pos) => {
|
||||
let literal = s[offset..pos].intern();
|
||||
let span_text =
|
||||
span.context().span_or_zz(offset, pos - offset);
|
||||
|
||||
let text = PlainNir::TplParamText(
|
||||
PlainNirSymbol::Todo(literal, span_text),
|
||||
);
|
||||
|
||||
Transition(ParseInterpAt(s, gen_param, pos + 1))
|
||||
.ok(Expanded(text))
|
||||
.with_lookahead(SugaredNirSymbol(sym, span))
|
||||
}
|
||||
|
||||
None => todo!("remaining literal"),
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing is continuing after having encountered an
|
||||
// interpolation delimiter `{`.
|
||||
// This is an inner context that cannot complete without being
|
||||
// explicitly closed,
|
||||
// and cannot not be nested.
|
||||
(
|
||||
ParseInterpAt(s, gen_param, offset),
|
||||
SugaredNirSymbol(sym, span),
|
||||
) => {
|
||||
// TODO: Make sure offset exists, avoid panic
|
||||
// TODO: Prevent nested `{`.
|
||||
|
||||
// Note that this is the position _relative to the offset_,
|
||||
// not the beginning of the string.
|
||||
match s[offset..].chars().position(|ch| ch == '}') {
|
||||
Some(0) => todo!("empty interp"),
|
||||
|
||||
Some(rel_pos) => {
|
||||
let end = offset + rel_pos;
|
||||
|
||||
// The value `@foo` in `{@foo@}`.
|
||||
let value = s[offset..end].intern();
|
||||
|
||||
// Since rel_pos is 0-indexed,
|
||||
// it is also the length of the value string.
|
||||
let span_value =
|
||||
span.context().span_or_zz(offset, rel_pos);
|
||||
|
||||
let param_value = PlainNir::TplParamValue(
|
||||
PlainNirSymbol::Todo(value, span_value),
|
||||
);
|
||||
|
||||
// Continue parsing one character past the '}',
|
||||
// back in a literal context.
|
||||
Transition(ParseLiteralAt(s, gen_param, end + 1))
|
||||
.ok(Expanded(param_value))
|
||||
.with_lookahead(SugaredNirSymbol(sym, span))
|
||||
}
|
||||
|
||||
None => todo!("missing closing '}}'"),
|
||||
}
|
||||
}
|
||||
|
||||
// Interpolation has completed,
|
||||
// and we're ready to replace the provided symbol
|
||||
// (the interpolation specification)
|
||||
// with a metavariable referencing the parameter that we just
|
||||
// generated.
|
||||
(
|
||||
FinishSym(_, GenIdentSymbolId(gen_param)),
|
||||
SugaredNirSymbol(_, span),
|
||||
) => Self::yield_symbol(gen_param, span),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_accepting(&self, _: &Self::Context) -> bool {
|
||||
self == &Self::Ready
|
||||
}
|
||||
}
|
||||
|
||||
impl<const TY: NirSymbolTy> InterpState<TY> {
|
||||
/// Yield the final result of this operation in place of the original
|
||||
/// specification string,
|
||||
/// which may or may not have required interpolation.
|
||||
///
|
||||
/// If no interpolation was required,
|
||||
/// `sym` will be the original string;
|
||||
/// otherwise,
|
||||
/// `sym` ought to be a metavariable referencing the generated
|
||||
/// template param.
|
||||
///
|
||||
/// This transitions back to [`Ready`] and finally releases the
|
||||
/// lookahead symbol.
|
||||
fn yield_symbol(sym: SymbolId, span: Span) -> TransitionResult<Self> {
|
||||
Transition(Ready).ok(ReplaceSym(PlainNirSymbol::Todo(sym, span)))
|
||||
}
|
||||
|
||||
/// Begin expansion of an interpolation specification by generating a
|
||||
/// new template parameter that will hold the interpolated body.
|
||||
///
|
||||
/// For more information on identifier generation,
|
||||
/// see [`gen_tpl_param_ident_at_offset`].
|
||||
fn begin_expansion(sym: SymbolId, span: Span) -> TransitionResult<Self> {
|
||||
let gen_param = gen_tpl_param_ident_at_offset(span);
|
||||
|
||||
// Description is not interned since there's no use in
|
||||
// wasting time hashing something that will not be
|
||||
// referenced
|
||||
// (it's just informative for a human).
|
||||
// Note that this means that tests cannot compare SymbolId.
|
||||
let gen_desc = format!(
|
||||
"Generated from interpolated string {}",
|
||||
TtQuote::wrap(sym)
|
||||
)
|
||||
.clone_uninterned();
|
||||
|
||||
let GenIdentSymbolId(gen_param_sym) = gen_param;
|
||||
|
||||
let open = PlainNir::TplParamOpen(
|
||||
PlainNirSymbol::Todo(gen_param_sym, span),
|
||||
PlainNirSymbol::Todo(gen_desc, span),
|
||||
);
|
||||
|
||||
Transition(ParseLiteralAt(sym.lookup_str(), gen_param, 0))
|
||||
.ok(Expanded(open))
|
||||
.with_lookahead(SugaredNirSymbol(sym, span))
|
||||
}
|
||||
|
||||
/// Complete expansion of an interpolation specification string.
|
||||
///
|
||||
/// This closes the newly generated template param `gen_param`,
|
||||
/// and then transitions to [`FinishSym`].
|
||||
fn end_expansion(
|
||||
s: SpecSlice,
|
||||
gen_param: GenIdentSymbolId,
|
||||
sym: SymbolId,
|
||||
span: Span,
|
||||
) -> TransitionResult<Self> {
|
||||
let close = PlainNir::TplParamClose(span);
|
||||
|
||||
// We have one last thing to do before we're complete,
|
||||
// which is to perform the final replacement of the original
|
||||
// symbol that we've been fed
|
||||
// (the specification string).
|
||||
Transition(FinishSym(s, gen_param))
|
||||
.ok(Expanded(close))
|
||||
.with_lookahead(SugaredNirSymbol(sym, span))
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a value represented by the provided [`SymbolId`] requires
|
||||
/// interpolation.
|
||||
///
|
||||
/// _NB: This dereferences the provided [`SymbolId`] if it is dynamically
|
||||
/// allocated._
|
||||
///
|
||||
/// The provided value requires interpolation if it contains,
|
||||
/// anywhere in the string,
|
||||
/// the character [`}`].
|
||||
/// This uses [`memchr()`] on the raw byte representation of the symbol to
|
||||
/// quickly determine whether a string is only a literal and does not
|
||||
/// require any interpolation,
|
||||
/// which will be the case the vast majority of the time.
|
||||
///
|
||||
/// Since this operates on raw bytes,
|
||||
/// but we later operate on the symbol as a [`str`],
|
||||
/// it is not useful to return the located byte offset if an opening brace
|
||||
/// is found;
|
||||
/// that can be re-located quickly enough.
|
||||
#[inline]
|
||||
fn needs_interpolation(val: SymbolId) -> bool {
|
||||
let ch = b'{';
|
||||
|
||||
// We can skip pre-interned symbols that we know cannot include the
|
||||
// interpolation character.
|
||||
// TODO: Abstract into `sym::symbol` module.
|
||||
quick_contains_byte(val, ch)
|
||||
.or_else(|| memchr(ch, val.lookup_str().as_bytes()).map(|_| true))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Generate a deterministic template param identifier name that is unique
|
||||
/// relative to the offset in the source context (file) of the given
|
||||
/// [`Span`].
|
||||
///
|
||||
/// Since template params are local to the containing template,
|
||||
/// this is always safe.
|
||||
/// We are able to simply use the offset of the provided span since we will
|
||||
/// never generate more than one unique identifier at the exact same offset.
|
||||
///
|
||||
/// The identifier will include `"___dsgr"`,
|
||||
/// meaning "desugar",
|
||||
/// and serves as a unique string that can be used to track down this code
|
||||
/// that generates it.
|
||||
///
|
||||
/// Hygiene is not a concern since identifiers cannot be redeclared,
|
||||
/// so conflicts with manually-created identifiers will result in a
|
||||
/// compilation error
|
||||
/// (albeit a cryptic one);
|
||||
/// the hope is that the informally-compiler-reserved `___` convention
|
||||
/// mitigates that unlikely occurrence.
|
||||
/// Consequently,
|
||||
/// we _must_ intern to ensure that error can occur
|
||||
/// (we cannot use [`GlobalSymbolIntern::clone_uninterned`]).
|
||||
#[inline]
|
||||
fn gen_tpl_param_ident_at_offset(span: Span) -> GenIdentSymbolId {
|
||||
GenIdentSymbolId(format!("@___dsgr_{:x}@", span.offset()).intern())
|
||||
}
|
||||
|
||||
/// Error while desugaring an interpolation specification.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum InterpError {}
|
||||
|
||||
impl Display for InterpError {
|
||||
fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
// No errors yet.
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for InterpError {}
|
||||
|
||||
impl Diagnostic for InterpError {
|
||||
fn describe(&self) -> Vec<AnnotatedSpan> {
|
||||
// No errors yet.
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
|
@ -0,0 +1,125 @@
|
|||
// Interpolation parser for desugaring NIR
|
||||
//
|
||||
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use super::{super::super::NirSymbolTy::*, *};
|
||||
use crate::{
|
||||
nir::{PlainNirSymbol, SugaredNirSymbol},
|
||||
parse::Parsed,
|
||||
span::dummy::{DUMMY_CONTEXT as DC, *},
|
||||
sym::GlobalSymbolResolve,
|
||||
};
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use Parsed::*;
|
||||
|
||||
type Sut<const TY: NirSymbolTy> = InterpState<TY>;
|
||||
|
||||
// While it'd be semantically valid to desugar a literal into a template
|
||||
// param,
|
||||
// it'd certainly be wasteful
|
||||
// (and would only be optimized away by a future lowering operation).
|
||||
// Best to just leave it be.
|
||||
#[test]
|
||||
fn does_not_desugar_literal_only() {
|
||||
let sym = "foo".into();
|
||||
let toks = vec![SugaredNirSymbol::<{ StringLiteral }>(sym, S1)];
|
||||
|
||||
assert_eq!(
|
||||
Ok(vec![Object(ReplaceSym(PlainNirSymbol::Todo(sym, S1)))]),
|
||||
Sut::parse(toks.into_iter()).collect(),
|
||||
);
|
||||
}
|
||||
|
||||
// When ending with an interpolated variable,
|
||||
// the parser should recognize that we've returned to the outer literal
|
||||
// context and permit successful termination of the specification string.
|
||||
#[test]
|
||||
fn desugars_literal_with_ending_var() {
|
||||
let given_val = "foo{@bar@}";
|
||||
// [-] [---]|
|
||||
// 0 2 4 8|
|
||||
// |B C |
|
||||
// [--------]
|
||||
// 0 9
|
||||
// A
|
||||
|
||||
let a = DC.span(0, 10);
|
||||
let b = DC.span(0, 3);
|
||||
let c = DC.span(4, 5);
|
||||
|
||||
let given_sym = SugaredNirSymbol::<{ StringLiteral }>(given_val.into(), a);
|
||||
let toks = vec![given_sym];
|
||||
|
||||
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
|
||||
let expect_dfn = PlainNirSymbol::Todo(expect_name.into(), a);
|
||||
let expect_text = PlainNirSymbol::Todo("foo".into(), b);
|
||||
let expect_param = PlainNirSymbol::Todo("@bar@".into(), c);
|
||||
|
||||
let mut sut = Sut::parse(toks.into_iter());
|
||||
|
||||
// This is the template param generated from the interpolated string.
|
||||
// The generated string is not interned,
|
||||
// so we cannot match on its symbol,
|
||||
// but that's okay since we don't entirely care what it says beyond
|
||||
// containing the original string that it was derived from to provide
|
||||
// helpful information to a human reader.
|
||||
assert_matches!(
|
||||
sut.next(),
|
||||
Some(Ok(Object(Expanded(PlainNir::TplParamOpen(
|
||||
dfn,
|
||||
PlainNirSymbol::Todo(desc_str, desc_span)
|
||||
))))) if dfn == expect_dfn
|
||||
&& desc_str.lookup_str().contains(given_val)
|
||||
&& desc_span == a
|
||||
);
|
||||
|
||||
// Note how the span associated with this is `B`,
|
||||
// which is derived from the relevant portion of the original
|
||||
// specification string.
|
||||
assert_eq!(
|
||||
sut.next(),
|
||||
Some(Ok(Object(Expanded(PlainNir::TplParamText(expect_text)))))
|
||||
);
|
||||
|
||||
// This is the actual metavariable reference,
|
||||
// pulled out of the interpolated portion of the given value.
|
||||
assert_eq!(
|
||||
sut.next(),
|
||||
Some(Ok(Object(Expanded(PlainNir::TplParamValue(expect_param))))),
|
||||
);
|
||||
|
||||
// This is an object generated from user input,
|
||||
// so the closing span has to identify what were generated from.
|
||||
assert_eq!(
|
||||
sut.next(),
|
||||
Some(Ok(Object(Expanded(PlainNir::TplParamClose(a)))))
|
||||
);
|
||||
|
||||
// Finally,
|
||||
// we replace the original provided attribute
|
||||
// (the interpolation specification)
|
||||
// with a metavariable reference to the generated parameter.
|
||||
assert_matches!(
|
||||
sut.next(),
|
||||
Some(Ok(Object(ReplaceSym(PlainNirSymbol::Todo(given_replace, given_span)))))
|
||||
if given_replace == expect_name && given_span == a
|
||||
);
|
||||
|
||||
assert_eq!(sut.next(), None);
|
||||
}
|
|
@ -1,92 +0,0 @@
|
|||
// Base tests for NIR
|
||||
//
|
||||
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use super::NirSymbolTy::*;
|
||||
use crate::{
|
||||
nir::{NirSymbolTy, SugaredNirSymbol},
|
||||
span::dummy::*,
|
||||
};
|
||||
|
||||
/// A type for testing that can hold any [`SymbolId`] without worry of type
|
||||
/// validations
|
||||
/// (so that [`NirSymbolTy`] can be ignored to test other logic).
|
||||
type AnySugaredNirSymbol = SugaredNirSymbol<{ StringLiteral }>;
|
||||
|
||||
/// Sugared NIR should recognize when there will be no need for desugaring
|
||||
/// (by consequence of it detecting when there _is_ such a need).
|
||||
#[test]
|
||||
fn from_pair_plain_string() {
|
||||
// No sugar added.
|
||||
let sym = "foo".into();
|
||||
|
||||
assert_eq!(Ok(AnySugaredNirSymbol::Todo(sym, S1)), (sym, S1).try_into(),);
|
||||
}
|
||||
|
||||
/// Strings requiring interpolation should be detected,
|
||||
/// but not yet parsed.
|
||||
/// This means that we detect strings that contain the interpolation
|
||||
/// character `{` and mark them for further processing _even if it is not
|
||||
/// balanced_.
|
||||
///
|
||||
/// A separate test checks whether type parsing is deferred.
|
||||
#[test]
|
||||
fn from_pair_interpolation_string() {
|
||||
let tests = [
|
||||
// This is the form that we'd expect.
|
||||
"foo{@bar@}baz",
|
||||
// This doesn't make sense,
|
||||
// but we don't know that yet;
|
||||
// it still requires interpolation to parse.
|
||||
"foo{bar}baz",
|
||||
// This is not even valid syntax,
|
||||
// but hey,
|
||||
// we still have to mark it so that we can find that out when we
|
||||
// go to interpolate during desugaring.
|
||||
"foo{",
|
||||
// This will be a trivial replacement,
|
||||
// but it's still interpolation.
|
||||
"{@foo@}",
|
||||
// Absolute nonsense,
|
||||
// but you get the drill.
|
||||
"{",
|
||||
];
|
||||
|
||||
tests.into_iter().map(Into::into).for_each(|sym| {
|
||||
assert_eq!(
|
||||
Ok(AnySugaredNirSymbol::Interpolate(sym, S1)),
|
||||
(sym, S1).try_into(),
|
||||
"must recognize `{sym}` as needing interpolation",
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
// We cannot possibly validate whether a string can be parsed into its
|
||||
// target type until we've interpolated it.
|
||||
#[test]
|
||||
fn from_pair_interpolation_delays_type_validation() {
|
||||
// This is the type we're hoping to parse into,
|
||||
const DEST_TY: NirSymbolTy = NumLiteral;
|
||||
// but we cannot know yet because interpolation is needed.
|
||||
let sym = "{@maybe_a_number@}".into();
|
||||
|
||||
assert_eq!(
|
||||
Ok(SugaredNirSymbol::<{ DEST_TY }>::Interpolate(sym, S2)),
|
||||
(sym, S2).try_into(),
|
||||
);
|
||||
}
|
Loading…
Reference in New Issue