tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156
main
Mike Gerwitz 2022-11-05 00:08:50 -04:00
parent 66f09fa4c9
commit 4a7fe887d5
5 changed files with 777 additions and 160 deletions

View File

@ -57,14 +57,13 @@ use crate::{
fmt::{DisplayWrapper, TtQuote},
parse::{Object, Token},
span::{Span, UNKNOWN_SPAN},
sym::{st::quick_contains_byte, GlobalSymbolResolve, SymbolId},
sym::SymbolId,
xir::{
attr::{Attr, AttrSpan},
fmt::TtXmlAttr,
QName,
},
};
use memchr::memchr;
use std::{
convert::Infallible,
error::Error,
@ -76,6 +75,8 @@ pub use parse::{
NirParseState as XirfToNir, NirParseStateError_ as XirfToNirError,
};
use NirSymbolTy::*;
/// IR that is "near" the source code,
/// without its syntactic sugar.
///
@ -89,18 +90,34 @@ pub use parse::{
#[derive(Debug, PartialEq, Eq)]
pub enum PlainNir {
Todo,
TplParamOpen(Plain<{ TplParamIdent }>, Plain<{ DescLiteral }>),
TplParamClose(Span),
TplParamText(Plain<{ StringLiteral }>),
TplParamValue(Plain<{ TplParamIdent }>),
}
type Plain<const TY: NirSymbolTy> = PlainNirSymbol<TY>;
impl Token for PlainNir {
fn ir_name() -> &'static str {
"Plain NIR"
}
/// Identifying span of a token.
///
/// An _identifying span_ is a selection of one of the (potentially
/// many) spans associated with a token that is most likely to be
/// associated with the identity of that token.
fn span(&self) -> Span {
use PlainNir::*;
match self {
Todo => UNKNOWN_SPAN,
TplParamOpen(dfn, _) => dfn.span(),
TplParamClose(span) => *span,
TplParamText(text) => text.span(),
TplParamValue(ident) => ident.span(),
}
}
}
@ -113,6 +130,16 @@ impl Display for PlainNir {
match self {
Todo => write!(f, "TODO"),
TplParamOpen(dfn, desc) => {
write!(f, "open template param {dfn} ({desc})")
}
TplParamClose(_span) => write!(f, "close template param"),
TplParamText(text) => {
write!(f, "open template param default text {text}")
}
TplParamValue(ident) => {
write!(f, "value of template param {ident}")
}
}
}
}
@ -247,12 +274,9 @@ impl Display for NirSymbolTy {
}
}
/// A ([`SymbolId`], [`Span`]) pair in an attribute value context that may
/// require desugaring and interpretation within the context of a template
/// application.
///
/// Interpolated values require desugaring;
/// see [`DesugarNir`] for more information.
/// A plain (desugared) ([`SymbolId`], [`Span`]) pair representing an
/// attribute value that may need to be interpreted within the context of
/// a template application.
///
/// _This object must be kept small_,
/// since it is used in objects that aggregate portions of the token
@ -261,26 +285,79 @@ impl Display for NirSymbolTy {
/// and therefore cannot be optimized away as other portions of the IR.
/// As such,
/// this does not nest enums.
///
/// For the sugared form that the user may have entered themselves,
/// see [`SugaredNirSymbol`].
#[derive(Debug, PartialEq, Eq)]
pub enum SugaredNirSymbol<const TY: NirSymbolTy> {
/// The symbol contains an expression representing the concatenation of
/// any number of literals and metavariables
/// (referred to as "string interpolation" in many languages).
Interpolate(SymbolId, Span),
/// It's not ripe yet.
///
/// No parsing has been performed.
pub enum PlainNirSymbol<const TY: NirSymbolTy> {
Todo(SymbolId, Span),
}
impl<const TY: NirSymbolTy> PlainNirSymbol<TY> {
pub fn span(&self) -> Span {
match self {
Self::Todo(_, span) => *span,
}
}
}
impl<const TY: NirSymbolTy> Display for PlainNirSymbol<TY> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Todo(sym, _) => write!(
f,
"TODO plain {TY} {fmt_sym}",
fmt_sym = TtQuote::wrap(sym),
),
}
}
}
/// A ([`SymbolId`], [`Span`]) pair in an attribute value context that may
/// require desugaring.
///
/// For more information on desugaring,
/// see [`DesugarNir`].
///
/// _This object must be kept small_,
/// since it is used in objects that aggregate portions of the token
/// stream,
/// which must persist in memory for a short period of time,
/// and therefore cannot be optimized away as other portions of the IR.
#[derive(Debug, PartialEq, Eq)]
pub struct SugaredNirSymbol<const TY: NirSymbolTy>(SymbolId, Span);
impl<const TY: NirSymbolTy> Token for SugaredNirSymbol<TY> {
fn ir_name() -> &'static str {
// TODO: Include type?
"Sugared NIR Symbol"
}
fn span(&self) -> Span {
match self {
Self(_, span) => *span,
}
}
}
impl<const TY: NirSymbolTy> Display for SugaredNirSymbol<TY> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self(sym, _span) => write!(
f,
"possibly-sugared {TY} {fmt_sym}",
fmt_sym = TtQuote::wrap(sym),
),
}
}
}
// Force developer to be conscious of any changes in size;
// see `SugaredNirSymbol` docs for more information.
assert_eq_size!(SugaredNirSymbol<{ NirSymbolTy::AnyIdent }>, u128);
/// Character whose presence in a string indicates that interpolation
/// parsing must occur.
pub const INTERPOLATE_CHAR: u8 = b'{';
assert_eq_size!(
SugaredNirSymbol<{ NirSymbolTy::AnyIdent }>,
(SymbolId, Span)
);
#[derive(Debug, PartialEq, Eq)]
pub enum PkgType {
@ -293,47 +370,16 @@ pub enum PkgType {
Mod,
}
/// Whether a value represented by the provided [`SymbolId`] requires
/// interpolation.
///
/// _NB: This dereferences the provided [`SymbolId`] if it is dynamically
/// allocated._
///
/// The provided value requires interpolation if it contains,
/// anywhere in the string,
/// the character [`INTERPOLATE_CHAR`].
/// This does not know if the string will parse correctly;
/// that job is left for desugaring,
/// and so this will flag syntactically invalid interpolated strings
/// (which is expected).
#[inline]
fn needs_interpolation(val: SymbolId) -> bool {
// We can skip pre-interned symbols that we know cannot include the
// interpolation character.
// TODO: Abstract into `sym::symbol` module.
let ch = INTERPOLATE_CHAR;
quick_contains_byte(val, ch)
.or_else(|| memchr(ch, val.lookup_str().as_bytes()).map(|_| true))
.unwrap_or(false)
}
impl<const TY: NirSymbolTy> TryFrom<(SymbolId, Span)> for SugaredNirSymbol<TY> {
type Error = NirAttrParseError;
fn try_from((val, span): (SymbolId, Span)) -> Result<Self, Self::Error> {
match needs_interpolation(val) {
true => Ok(SugaredNirSymbol::Interpolate(val, span)),
false => Ok(SugaredNirSymbol::Todo(val, span)),
}
impl<const TY: NirSymbolTy> From<(SymbolId, Span)> for SugaredNirSymbol<TY> {
fn from((val, span): (SymbolId, Span)) -> Self {
Self(val, span)
}
}
impl<const TY: NirSymbolTy> TryFrom<Attr> for SugaredNirSymbol<TY> {
type Error = NirAttrParseError;
fn try_from(attr: Attr) -> Result<Self, Self::Error> {
impl<const TY: NirSymbolTy> From<Attr> for SugaredNirSymbol<TY> {
fn from(attr: Attr) -> Self {
match attr {
Attr(_, val, AttrSpan(_, vspan)) => (val, vspan).try_into(),
Attr(_, val, AttrSpan(_, vspan)) => (val, vspan).into(),
}
}
}
@ -394,6 +440,3 @@ impl Diagnostic for NirAttrParseError {
}
}
}
#[cfg(test)]
mod test;

View File

@ -22,14 +22,14 @@
//! For more information on the flavors of NIR,
//! see [the parent module](super).
use std::{error::Error, fmt::Display};
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
parse::{NoContext, ParseState, Transition, TransitionResult},
};
mod interp;
use super::{PlainNir, SugaredNir};
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
parse::{prelude::*, NoContext},
};
use std::{error::Error, fmt::Display};
#[derive(Debug, PartialEq, Eq, Default)]
pub enum DesugarNir {

View File

@ -0,0 +1,541 @@
// Interpolation parser for desugaring NIR
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Interpolation parser for desugaring NIR.
//!
//! String interpolation occurs for attributes containing curly braces
//! (`{` and `}`)
//! during TAME's parsing phase,
//! before template expansion.
//! An attribute containing curly braces is referred to in TAME as an
//! _interpolation specification_.
//!
//! Interpolation is used as a form of short-hand syntactic sugar for
//! concatenation of string literals and template metavariables,
//! whose result is then processed by the template system.
//! For example,
//! consider the following code:
//!
//! ```xml
//! <c:value-of name="foo{@bar@}baz" />
//! ```
//!
//! The string `foo{@bar@}baz` is the interpolation specification.
//! This ends up desugaring into the [`PlainNir`] equivalent of this:
//!
//! ```xm
//! <param name="@___dsgr_01@"
//! desc="Generated from interpolated string `foo{@bar@}baz`">
//! <text>foo</text>
//! <param-value name="@bar@" />
//! <text>baz</text>
//! </param>
//!
//! <c:value-of name="@___dsgr_01@" />
//! <!-- ^^^^^^^^^^^^
//! replacement -->
//! ```
//!
//! Since interpolation currently supports only string literals and template
//! metavariables within specifications,
//! they are only semantically valid within the context of a template
//! definition.
//! This desugaring process does not check for this context;
//! errors would occur later on in the lowering pipeline.
//!
//! Since interpolation desugars into [`PlainNir`],
//! and not source XML,
//! generated `param`s will be automatically be interpreted downstream in
//! the lowering pipeline as if they were hoisted to the template
//! definition header.
//!
//! If a string does not require interpolation,
//! then it is interpreted as a literal within the context of the template
//! system and is echoed back unchanged.
//!
//! NB: All attributes are reasoned about as string literals until they
//! contain no metavariables,
//! which may require expansion via the template system;
//! the [`NirSymbolTy`] represents the type that the literal will
//! _ultimately_ be parsed as once that time comes.
//!
//! Desugared Spans
//! ---------------
//! [`Span`]s for the generated tokens are derived from the specification
//! string.
//! In the above example,
//! we have:
//!
//! ```xml
//! <!--
//! foo{@bar@}baz
//! [-] [---] [-]
//! A B C
//! -->
//!
//! <text>foo</text>
//! <!-- A -->
//!
//! <param-value name="@bar@">
//! <!-- B -->
//!
//! <text>baz</text>
//! <!-- C -->
//! ```
//!
//! This means that any errors that subsequently occur due to contextual
//! issues will be mapped back to a source location that makes sense to
//! the user with a high level of granularity.
use memchr::memchr;
use super::super::{NirSymbolTy, PlainNir, PlainNirSymbol, SugaredNirSymbol};
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
fmt::{DisplayWrapper, TtQuote},
parse::{prelude::*, NoContext},
span::Span,
sym::{
st::quick_contains_byte, GlobalSymbolIntern, GlobalSymbolResolve,
SymbolId,
},
};
use std::{error::Error, fmt::Display};
// Expose variants for enums defined in this module to reduce verbosity.
use InterpObject::*;
use InterpState::*;
/// Object resulting from interpolation.
///
/// The provided [`SugaredNirSymbol`] is interpreted as a specification for
/// interpolation.
/// This specification is expanded into a sequence of [`PlainNir`] tokens
/// via the [`Expanded`](Self::Expanded) variant,
/// representing the definition of a template parameter whose default
/// value will yield the equivalent of the specification.
///
/// After expansion,
/// the original [`SugaredNirSymbol`] is expected to be replaced with a
/// [`PlainNirSymbol`] via the [`ReplaceSym`](Self::ReplaceSym) variant,
/// containing the name of the newly-generated metavariable.
#[derive(Debug, PartialEq, Eq)]
pub enum InterpObject<const TY: NirSymbolTy> {
/// A token generated as part of interpolation which is to be merged
/// into the NIR token stream.
Expanded(PlainNir),
/// Interpolation has resulted in the creation of a new metavariable
/// which should take place of the original NIR symbol containing the
/// interpolation specification.
ReplaceSym(PlainNirSymbol<TY>),
}
impl<const TY: NirSymbolTy> Token for InterpObject<TY> {
fn ir_name() -> &'static str {
"Interpolation"
}
fn span(&self) -> Span {
match self {
Self::Expanded(nir) => nir.span(),
Self::ReplaceSym(nir_sym) => nir_sym.span(),
}
}
}
impl<const TY: NirSymbolTy> Object for InterpObject<TY> {}
impl<const TY: NirSymbolTy> Display for InterpObject<TY> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
InterpObject::Expanded(nir) => write!(f, "interpolated {nir}"),
InterpObject::ReplaceSym(nir_sym) => {
write!(f, "interpolation specification replacement {nir_sym}")
}
}
}
}
/// A generated identifier.
#[derive(Debug, PartialEq, Eq)]
pub struct GenIdentSymbolId(SymbolId);
/// A dereferenced [`SymbolId`] representing an interpolation specification.
///
/// This saves us from having to continuously dereference the symbol for
/// each state change.
type SpecSlice = &'static str;
/// Offset within a [`SpecSlice`] to begin parsing at for the current
/// [`InterpState`].
type SpecOffset = usize;
/// Interpolation desugaring operation.
///
/// This parser continuously yields the provided interpolation specification
/// token as lookahead until it has completed its parsing,
/// allowing it to stream without buffering expansion tokens.
///
/// The parser has two primary contexts:
///
/// 1. The outer literal context represented by [`ParseLiteralAt`]; and
/// 2. The inner interpolation context
/// (conceptually between curly braces)
/// represented by [`ParseInterpAt`].
///
/// For more information,
/// see the [parent module](super).
#[derive(Debug, PartialEq, Eq, Default)]
pub enum InterpState<const TY: NirSymbolTy> {
/// The next token will be inspected to determine whether it requires
/// interpolation.
#[default]
Ready,
/// Interpolation will continue in a literal context at the provided
/// offset relative to the start of the specification string.
ParseLiteralAt(SpecSlice, GenIdentSymbolId, SpecOffset),
/// Like [`ParseLiteralAt`],
/// except in the context of an interpolated value
/// (after having encountered a curly brace).
ParseInterpAt(SpecSlice, GenIdentSymbolId, SpecOffset),
/// Expansion has completed;
/// the final step is to replace the provided specification string
/// with a reference to the generated template param.
FinishSym(SpecSlice, GenIdentSymbolId),
}
impl<const TY: NirSymbolTy> Display for InterpState<TY> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use InterpState::*;
match self {
Ready => write!(
f,
"expecting a new symbol to determine whether \
interpolation is necessary"
),
ParseLiteralAt(spec, _, x) => write!(
f,
"parsing specification {fmt_spec} at offset {x} \
in a literal context",
fmt_spec = TtQuote::wrap(spec),
),
ParseInterpAt(spec, _, x) => write!(
f,
"parsing specification {fmt_spec} at offset {x} \
in an interpolated value context",
fmt_spec = TtQuote::wrap(spec),
),
FinishSym(spec, GenIdentSymbolId(gen)) => write!(
f,
"ready to replace specification {fmt_spec} \
with expanded metavariable reference {fmt_gen}",
fmt_spec = TtQuote::wrap(spec),
fmt_gen = TtQuote::wrap(gen),
),
}
}
}
impl<const TY: NirSymbolTy> ParseState for InterpState<TY> {
type Token = SugaredNirSymbol<TY>;
type Object = InterpObject<TY>;
type Error = InterpError;
fn parse_token(
self,
tok: Self::Token,
_: NoContext,
) -> TransitionResult<Self> {
match (self, tok) {
// When receiving a new symbol,
// we must make a quick determination as to whether it
// requires desugaring.
// Since the vast majority of symbols we encounter will require
// no interpolation,
// we first perform a separate check that is designed to
// filter out non-interpolated strings quickly,
// before we start to parse.
// Symbols that require no interpoolation are simply echoed back.
(Ready, SugaredNirSymbol(sym, span)) => {
if needs_interpolation(sym) {
Self::begin_expansion(sym, span)
} else {
// No desugaring is needed.
Self::yield_symbol(sym, span)
}
}
// The outermost parsing context is that of the literal,
// where a sequence of characters up to `{` stand for
// themselves.
(
ParseLiteralAt(s, gen_param, offset),
SugaredNirSymbol(sym, span),
) => {
if offset == s.len() {
// We've reached the end of the specification string.
// Since we're in the outermost (literal) context,
// we're safe to complete.
return Self::end_expansion(s, gen_param, sym, span);
}
// Note that this is the position _relative to the offset_,
// not the beginning of the string.
match s[offset..].chars().position(|ch| ch == '{') {
Some(0) => todo!("no literal prefix"),
// Everything from the offset until the curly brace is a
// literal.
Some(pos) => {
let literal = s[offset..pos].intern();
let span_text =
span.context().span_or_zz(offset, pos - offset);
let text = PlainNir::TplParamText(
PlainNirSymbol::Todo(literal, span_text),
);
Transition(ParseInterpAt(s, gen_param, pos + 1))
.ok(Expanded(text))
.with_lookahead(SugaredNirSymbol(sym, span))
}
None => todo!("remaining literal"),
}
}
// Parsing is continuing after having encountered an
// interpolation delimiter `{`.
// This is an inner context that cannot complete without being
// explicitly closed,
// and cannot not be nested.
(
ParseInterpAt(s, gen_param, offset),
SugaredNirSymbol(sym, span),
) => {
// TODO: Make sure offset exists, avoid panic
// TODO: Prevent nested `{`.
// Note that this is the position _relative to the offset_,
// not the beginning of the string.
match s[offset..].chars().position(|ch| ch == '}') {
Some(0) => todo!("empty interp"),
Some(rel_pos) => {
let end = offset + rel_pos;
// The value `@foo` in `{@foo@}`.
let value = s[offset..end].intern();
// Since rel_pos is 0-indexed,
// it is also the length of the value string.
let span_value =
span.context().span_or_zz(offset, rel_pos);
let param_value = PlainNir::TplParamValue(
PlainNirSymbol::Todo(value, span_value),
);
// Continue parsing one character past the '}',
// back in a literal context.
Transition(ParseLiteralAt(s, gen_param, end + 1))
.ok(Expanded(param_value))
.with_lookahead(SugaredNirSymbol(sym, span))
}
None => todo!("missing closing '}}'"),
}
}
// Interpolation has completed,
// and we're ready to replace the provided symbol
// (the interpolation specification)
// with a metavariable referencing the parameter that we just
// generated.
(
FinishSym(_, GenIdentSymbolId(gen_param)),
SugaredNirSymbol(_, span),
) => Self::yield_symbol(gen_param, span),
}
}
fn is_accepting(&self, _: &Self::Context) -> bool {
self == &Self::Ready
}
}
impl<const TY: NirSymbolTy> InterpState<TY> {
/// Yield the final result of this operation in place of the original
/// specification string,
/// which may or may not have required interpolation.
///
/// If no interpolation was required,
/// `sym` will be the original string;
/// otherwise,
/// `sym` ought to be a metavariable referencing the generated
/// template param.
///
/// This transitions back to [`Ready`] and finally releases the
/// lookahead symbol.
fn yield_symbol(sym: SymbolId, span: Span) -> TransitionResult<Self> {
Transition(Ready).ok(ReplaceSym(PlainNirSymbol::Todo(sym, span)))
}
/// Begin expansion of an interpolation specification by generating a
/// new template parameter that will hold the interpolated body.
///
/// For more information on identifier generation,
/// see [`gen_tpl_param_ident_at_offset`].
fn begin_expansion(sym: SymbolId, span: Span) -> TransitionResult<Self> {
let gen_param = gen_tpl_param_ident_at_offset(span);
// Description is not interned since there's no use in
// wasting time hashing something that will not be
// referenced
// (it's just informative for a human).
// Note that this means that tests cannot compare SymbolId.
let gen_desc = format!(
"Generated from interpolated string {}",
TtQuote::wrap(sym)
)
.clone_uninterned();
let GenIdentSymbolId(gen_param_sym) = gen_param;
let open = PlainNir::TplParamOpen(
PlainNirSymbol::Todo(gen_param_sym, span),
PlainNirSymbol::Todo(gen_desc, span),
);
Transition(ParseLiteralAt(sym.lookup_str(), gen_param, 0))
.ok(Expanded(open))
.with_lookahead(SugaredNirSymbol(sym, span))
}
/// Complete expansion of an interpolation specification string.
///
/// This closes the newly generated template param `gen_param`,
/// and then transitions to [`FinishSym`].
fn end_expansion(
s: SpecSlice,
gen_param: GenIdentSymbolId,
sym: SymbolId,
span: Span,
) -> TransitionResult<Self> {
let close = PlainNir::TplParamClose(span);
// We have one last thing to do before we're complete,
// which is to perform the final replacement of the original
// symbol that we've been fed
// (the specification string).
Transition(FinishSym(s, gen_param))
.ok(Expanded(close))
.with_lookahead(SugaredNirSymbol(sym, span))
}
}
/// Whether a value represented by the provided [`SymbolId`] requires
/// interpolation.
///
/// _NB: This dereferences the provided [`SymbolId`] if it is dynamically
/// allocated._
///
/// The provided value requires interpolation if it contains,
/// anywhere in the string,
/// the character [`}`].
/// This uses [`memchr()`] on the raw byte representation of the symbol to
/// quickly determine whether a string is only a literal and does not
/// require any interpolation,
/// which will be the case the vast majority of the time.
///
/// Since this operates on raw bytes,
/// but we later operate on the symbol as a [`str`],
/// it is not useful to return the located byte offset if an opening brace
/// is found;
/// that can be re-located quickly enough.
#[inline]
fn needs_interpolation(val: SymbolId) -> bool {
let ch = b'{';
// We can skip pre-interned symbols that we know cannot include the
// interpolation character.
// TODO: Abstract into `sym::symbol` module.
quick_contains_byte(val, ch)
.or_else(|| memchr(ch, val.lookup_str().as_bytes()).map(|_| true))
.unwrap_or(false)
}
/// Generate a deterministic template param identifier name that is unique
/// relative to the offset in the source context (file) of the given
/// [`Span`].
///
/// Since template params are local to the containing template,
/// this is always safe.
/// We are able to simply use the offset of the provided span since we will
/// never generate more than one unique identifier at the exact same offset.
///
/// The identifier will include `"___dsgr"`,
/// meaning "desugar",
/// and serves as a unique string that can be used to track down this code
/// that generates it.
///
/// Hygiene is not a concern since identifiers cannot be redeclared,
/// so conflicts with manually-created identifiers will result in a
/// compilation error
/// (albeit a cryptic one);
/// the hope is that the informally-compiler-reserved `___` convention
/// mitigates that unlikely occurrence.
/// Consequently,
/// we _must_ intern to ensure that error can occur
/// (we cannot use [`GlobalSymbolIntern::clone_uninterned`]).
#[inline]
fn gen_tpl_param_ident_at_offset(span: Span) -> GenIdentSymbolId {
GenIdentSymbolId(format!("@___dsgr_{:x}@", span.offset()).intern())
}
/// Error while desugaring an interpolation specification.
#[derive(Debug, PartialEq)]
pub enum InterpError {}
impl Display for InterpError {
fn fmt(&self, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
// No errors yet.
Ok(())
}
}
impl Error for InterpError {}
impl Diagnostic for InterpError {
fn describe(&self) -> Vec<AnnotatedSpan> {
// No errors yet.
vec![]
}
}
#[cfg(test)]
mod test;

View File

@ -0,0 +1,125 @@
// Interpolation parser for desugaring NIR
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::{super::super::NirSymbolTy::*, *};
use crate::{
nir::{PlainNirSymbol, SugaredNirSymbol},
parse::Parsed,
span::dummy::{DUMMY_CONTEXT as DC, *},
sym::GlobalSymbolResolve,
};
use std::assert_matches::assert_matches;
use Parsed::*;
type Sut<const TY: NirSymbolTy> = InterpState<TY>;
// While it'd be semantically valid to desugar a literal into a template
// param,
// it'd certainly be wasteful
// (and would only be optimized away by a future lowering operation).
// Best to just leave it be.
#[test]
fn does_not_desugar_literal_only() {
let sym = "foo".into();
let toks = vec![SugaredNirSymbol::<{ StringLiteral }>(sym, S1)];
assert_eq!(
Ok(vec![Object(ReplaceSym(PlainNirSymbol::Todo(sym, S1)))]),
Sut::parse(toks.into_iter()).collect(),
);
}
// When ending with an interpolated variable,
// the parser should recognize that we've returned to the outer literal
// context and permit successful termination of the specification string.
#[test]
fn desugars_literal_with_ending_var() {
let given_val = "foo{@bar@}";
// [-] [---]|
// 0 2 4 8|
// |B C |
// [--------]
// 0 9
// A
let a = DC.span(0, 10);
let b = DC.span(0, 3);
let c = DC.span(4, 5);
let given_sym = SugaredNirSymbol::<{ StringLiteral }>(given_val.into(), a);
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
let expect_dfn = PlainNirSymbol::Todo(expect_name.into(), a);
let expect_text = PlainNirSymbol::Todo("foo".into(), b);
let expect_param = PlainNirSymbol::Todo("@bar@".into(), c);
let mut sut = Sut::parse(toks.into_iter());
// This is the template param generated from the interpolated string.
// The generated string is not interned,
// so we cannot match on its symbol,
// but that's okay since we don't entirely care what it says beyond
// containing the original string that it was derived from to provide
// helpful information to a human reader.
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(PlainNir::TplParamOpen(
dfn,
PlainNirSymbol::Todo(desc_str, desc_span)
))))) if dfn == expect_dfn
&& desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
// Note how the span associated with this is `B`,
// which is derived from the relevant portion of the original
// specification string.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(PlainNir::TplParamText(expect_text)))))
);
// This is the actual metavariable reference,
// pulled out of the interpolated portion of the given value.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(PlainNir::TplParamValue(expect_param))))),
);
// This is an object generated from user input,
// so the closing span has to identify what were generated from.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(PlainNir::TplParamClose(a)))))
);
// Finally,
// we replace the original provided attribute
// (the interpolation specification)
// with a metavariable reference to the generated parameter.
assert_matches!(
sut.next(),
Some(Ok(Object(ReplaceSym(PlainNirSymbol::Todo(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);
assert_eq!(sut.next(), None);
}

View File

@ -1,92 +0,0 @@
// Base tests for NIR
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::NirSymbolTy::*;
use crate::{
nir::{NirSymbolTy, SugaredNirSymbol},
span::dummy::*,
};
/// A type for testing that can hold any [`SymbolId`] without worry of type
/// validations
/// (so that [`NirSymbolTy`] can be ignored to test other logic).
type AnySugaredNirSymbol = SugaredNirSymbol<{ StringLiteral }>;
/// Sugared NIR should recognize when there will be no need for desugaring
/// (by consequence of it detecting when there _is_ such a need).
#[test]
fn from_pair_plain_string() {
// No sugar added.
let sym = "foo".into();
assert_eq!(Ok(AnySugaredNirSymbol::Todo(sym, S1)), (sym, S1).try_into(),);
}
/// Strings requiring interpolation should be detected,
/// but not yet parsed.
/// This means that we detect strings that contain the interpolation
/// character `{` and mark them for further processing _even if it is not
/// balanced_.
///
/// A separate test checks whether type parsing is deferred.
#[test]
fn from_pair_interpolation_string() {
let tests = [
// This is the form that we'd expect.
"foo{@bar@}baz",
// This doesn't make sense,
// but we don't know that yet;
// it still requires interpolation to parse.
"foo{bar}baz",
// This is not even valid syntax,
// but hey,
// we still have to mark it so that we can find that out when we
// go to interpolate during desugaring.
"foo{",
// This will be a trivial replacement,
// but it's still interpolation.
"{@foo@}",
// Absolute nonsense,
// but you get the drill.
"{",
];
tests.into_iter().map(Into::into).for_each(|sym| {
assert_eq!(
Ok(AnySugaredNirSymbol::Interpolate(sym, S1)),
(sym, S1).try_into(),
"must recognize `{sym}` as needing interpolation",
);
});
}
// We cannot possibly validate whether a string can be parsed into its
// target type until we've interpolated it.
#[test]
fn from_pair_interpolation_delays_type_validation() {
// This is the type we're hoping to parse into,
const DEST_TY: NirSymbolTy = NumLiteral;
// but we cannot know yet because interpolation is needed.
let sym = "{@maybe_a_number@}".into();
assert_eq!(
Ok(SugaredNirSymbol::<{ DEST_TY }>::Interpolate(sym, S2)),
(sym, S2).try_into(),
);
}