tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156
main
Mike Gerwitz 2022-12-02 00:15:31 -05:00
parent 07dff3ba4e
commit 3050566062
3 changed files with 209 additions and 173 deletions

View File

@ -74,8 +74,6 @@ pub use parse::{
NirParseState as XirfToNir, NirParseStateError_ as XirfToNirError,
};
use NirSymbolTy::*;
/// IR that is "near" the source code.
///
/// This represents the language of TAME after it has been extracted from
@ -86,13 +84,51 @@ pub enum Nir {
Todo,
TodoAttr(SPair),
TplParamOpen(Plain<{ TplParamIdent }>, Plain<{ DescLiteral }>),
TplParamClose(Span),
TplParamText(Plain<{ StringLiteral }>),
TplParamValue(Plain<{ TplParamIdent }>),
/// Begin the definition of some [`NirEntity`] and place it atop of the
/// stack.
Open(NirEntity, Span),
/// Finish definition of a [`NirEntity`] atop of the stack and pop it.
Close(Span),
/// Bind the given name as an identifier for the entity atop of the
/// stack.
BindIdent(SPair),
/// Reference the value of the given identifier.
///
/// Permissible identifiers and values depend on the context in which
/// this appears.
Ref(SPair),
/// Describe the [`NirEntity`] atop of the stack.
Desc(SPair),
/// A string literal.
///
/// The meaning of this string depends on context.
/// For example,
/// it may represent literate documentation or a literal in a
/// metavariable definition.
Text(SPair),
}
type Plain<const TY: NirSymbolTy> = NirSymbol<TY>;
/// An object upon which other [`Nir`] tokens act.
#[derive(Debug, PartialEq, Eq)]
pub enum NirEntity {
/// Template parameter (metavariable).
TplParam,
}
impl Display for NirEntity {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use NirEntity::*;
match self {
TplParam => write!(f, "template param (metavariable)"),
}
}
}
impl Token for Nir {
fn ir_name() -> &'static str {
@ -109,11 +145,14 @@ impl Token for Nir {
match self {
Todo => UNKNOWN_SPAN,
TodoAttr(SPair(_, span)) => *span,
TplParamOpen(dfn, _) => dfn.span(),
TplParamClose(span) => *span,
TplParamText(text) => text.span(),
TplParamValue(ident) => ident.span(),
TodoAttr(spair) => spair.span(),
Open(_, span) => *span,
Close(span) => *span,
BindIdent(spair) | Ref(spair) | Desc(spair) | Text(spair) => {
spair.span()
}
}
}
}
@ -126,17 +165,22 @@ impl Display for Nir {
match self {
Todo => write!(f, "TODO"),
TodoAttr(SPair(sym, _)) => write!(f, "TODO Attr {sym}"),
TplParamOpen(dfn, desc) => {
write!(f, "open template param {dfn} ({desc})")
}
TplParamClose(_span) => write!(f, "close template param"),
TplParamText(text) => {
write!(f, "open template param default text {text}")
}
TplParamValue(ident) => {
write!(f, "value of template param {ident}")
TodoAttr(spair) => write!(f, "TODO Attr {spair}"),
Open(entity, _) => write!(f, "open {entity} entity"),
Close(_) => write!(f, "close entity"),
BindIdent(spair) => {
write!(f, "bind identifier {}", TtQuote::wrap(spair))
}
Ref(spair) => write!(f, "ref {}", TtQuote::wrap(spair)),
// TODO: TtQuote doesn't yet escape quotes at the time of writing!
Desc(spair) => write!(f, "description {}", TtQuote::wrap(spair)),
// TODO: Not yet safe to output arbitrary text;
// need to determine how to handle newlines and other types of
// output.
Text(_) => write!(f, "text"),
}
}
}

View File

@ -99,7 +99,7 @@
use memchr::memchr;
use super::{Nir, NirSymbol};
use super::{Nir, NirEntity};
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
fmt::{DisplayWrapper, TtQuote},
@ -156,6 +156,13 @@ pub enum InterpState {
#[default]
Ready,
/// Genearate an identifier for the expansion template parameter.
GenIdent,
/// Generate a description for the expansion template parameter that is
/// intended as a human-readable debugging string.
GenDesc(GenIdentSymbolId),
/// Interpolation will continue in a literal context at the provided
/// offset relative to the start of the specification string.
ParseLiteralAt(SpecSlice, GenIdentSymbolId, SpecOffset),
@ -182,6 +189,17 @@ impl Display for InterpState {
interpolation is necessary"
),
GenIdent => {
write!(f, "ready to generate template param identifier")
}
GenDesc(GenIdentSymbolId(sym)) => write!(
f,
"ready to generate debug description for generated \
template param {}",
TtQuote::wrap(sym),
),
ParseLiteralAt(spec, _, x) => write!(
f,
"parsing specification {fmt_spec} at offset {x} \
@ -229,13 +247,46 @@ impl ParseState for InterpState {
// Symbols that require no interpoolation are simply echoed back.
(Ready, (sym, span)) => {
if needs_interpolation(sym) {
Self::begin_expansion(sym, span)
Transition(GenIdent)
.ok(Expanded(Nir::Open(NirEntity::TplParam, span)))
.with_lookahead((sym, span).into())
} else {
// No desugaring is needed.
Self::yield_symbol(sym, span)
Transition(Ready).ok(DoneExpanding((sym, span).into()))
}
}
(GenIdent, (sym, span)) => {
let gen_ident = gen_tpl_param_ident_at_offset(span);
let GenIdentSymbolId(ident_sym) = gen_ident;
Transition(GenDesc(gen_ident))
.ok(Expanded(Nir::BindIdent(SPair(ident_sym, span))))
.with_lookahead((sym, span).into())
}
(GenDesc(gen_ident), (sym, span)) => {
let s = sym.lookup_str();
// Description is not interned since there's no use in
// wasting time hashing something that will not be
// referenced
// (it's just informative for a human).
// Note that this means that tests cannot compare SymbolId.
let gen_desc = format!(
"Generated from interpolated string {}",
TtQuote::wrap(s)
)
.clone_uninterned();
// Begin parsing in a _literal_ context,
// since interpolation is most commonly utilized with literal
// prefixes.
Transition(ParseLiteralAt(s, gen_ident, 0))
.ok(Expanded(Nir::Desc(SPair(gen_desc, span))))
.with_lookahead((sym, span).into())
}
// The outermost parsing context is that of the literal,
// where a sequence of characters up to `{` stand for
// themselves.
@ -244,7 +295,15 @@ impl ParseState for InterpState {
// We've reached the end of the specification string.
// Since we're in the outermost (literal) context,
// we're safe to complete.
return Self::end_expansion(s, gen_param, sym, span);
return {
// We have one last thing to do before we're complete,
// which is to perform the final replacement of the original
// symbol that we've been fed
// (the specification string).
Transition(FinishSym(s, gen_param))
.ok(Expanded(Nir::Close(span)))
.with_lookahead((sym, span).into())
};
}
// Note that this is the position _relative to the offset_,
@ -267,8 +326,7 @@ impl ParseState for InterpState {
let literal = s[offset..end].intern();
let span_text = span.slice(offset, rel_pos);
let text =
Nir::TplParamText(NirSymbol(literal, span_text));
let text = Nir::Text(SPair(literal, span_text));
Transition(ParseInterpAt(s, gen_param, end + 1))
.ok(Expanded(text))
@ -280,8 +338,7 @@ impl ParseState for InterpState {
let literal = s[offset..].intern();
let span_text = span.slice(offset, s.len() - offset);
let text =
Nir::TplParamText(NirSymbol(literal, span_text));
let text = Nir::Text(SPair(literal, span_text));
// Keep in the current state but update the offset;
// we'll complete parsing next pass.
@ -316,8 +373,7 @@ impl ParseState for InterpState {
// it is also the length of the value string.
let span_value = span.slice(offset, rel_pos);
let param_value =
Nir::TplParamValue(NirSymbol(value, span_value));
let param_value = Nir::Ref(SPair(value, span_value));
// Continue parsing one character past the '}',
// back in a literal context.
@ -335,8 +391,9 @@ impl ParseState for InterpState {
// (the interpolation specification)
// with a metavariable referencing the parameter that we just
// generated.
// We finally release the lookahead symbol.
(FinishSym(_, GenIdentSymbolId(gen_param)), (_, span)) => {
Self::yield_symbol(gen_param, span)
Transition(Ready).ok(DoneExpanding((gen_param, span).into()))
}
}
}
@ -346,79 +403,6 @@ impl ParseState for InterpState {
}
}
impl InterpState {
/// Yield the final result of this operation in place of the original
/// specification string,
/// which may or may not have required interpolation.
///
/// If no interpolation was required,
/// `sym` will be the original string;
/// otherwise,
/// `sym` ought to be a metavariable referencing the generated
/// template param.
///
/// This transitions back to [`Ready`] and finally releases the
/// lookahead symbol.
fn yield_symbol(sym: SymbolId, span: Span) -> TransitionResult<Self> {
Transition(Ready).ok(DoneExpanding((sym, span).into()))
}
/// Begin expansion of an interpolation specification by generating a
/// new template parameter that will hold the interpolated body.
///
/// For more information on identifier generation,
/// see [`gen_tpl_param_ident_at_offset`].
fn begin_expansion(sym: SymbolId, span: Span) -> TransitionResult<Self> {
let gen_param = gen_tpl_param_ident_at_offset(span);
// Description is not interned since there's no use in
// wasting time hashing something that will not be
// referenced
// (it's just informative for a human).
// Note that this means that tests cannot compare SymbolId.
let gen_desc = format!(
"Generated from interpolated string {}",
TtQuote::wrap(sym)
)
.clone_uninterned();
let GenIdentSymbolId(gen_param_sym) = gen_param;
let open = Nir::TplParamOpen(
NirSymbol(gen_param_sym, span),
NirSymbol(gen_desc, span),
);
// Begin parsing in a _literal_ context,
// since interpolation is most commonly utilized with literal
// prefixes.
Transition(ParseLiteralAt(sym.lookup_str(), gen_param, 0))
.ok(Expanded(open))
.with_lookahead((sym, span).into())
}
/// Complete expansion of an interpolation specification string.
///
/// This closes the newly generated template param `gen_param`,
/// and then transitions to [`FinishSym`].
fn end_expansion(
s: SpecSlice,
gen_param: GenIdentSymbolId,
sym: SymbolId,
span: Span,
) -> TransitionResult<Self> {
let close = Nir::TplParamClose(span);
// We have one last thing to do before we're complete,
// which is to perform the final replacement of the original
// symbol that we've been fed
// (the specification string).
Transition(FinishSym(s, gen_param))
.ok(Expanded(close))
.with_lookahead((sym, span).into())
}
}
/// Whether a value represented by the provided [`SymbolId`] requires
/// interpolation.
///

View File

@ -19,7 +19,7 @@
use super::*;
use crate::{
nir::NirSymbol,
nir::NirEntity,
parse::Parsed,
span::dummy::{DUMMY_CONTEXT as DC, *},
sym::GlobalSymbolResolve,
@ -73,9 +73,9 @@ fn desugars_literal_with_ending_var() {
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
let expect_dfn = NirSymbol(expect_name.into(), a);
let expect_text = NirSymbol("foo".into(), b);
let expect_param = NirSymbol("@bar@".into(), c);
let expect_dfn = SPair(expect_name.into(), a);
let expect_text = SPair("foo".into(), b);
let expect_param = SPair("@bar@".into(), c);
let mut sut = Sut::parse(toks.into_iter());
@ -85,14 +85,19 @@ fn desugars_literal_with_ending_var() {
// but that's okay since we don't entirely care what it says beyond
// containing the original string that it was derived from to provide
// helpful information to a human reader.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Open(NirEntity::TplParam, a))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::BindIdent(expect_dfn))))),
);
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamOpen(
dfn,
NirSymbol(desc_str, desc_span)
))))) if dfn == expect_dfn
&& desc_str.lookup_str().contains(given_val)
&& desc_span == a
Some(Ok(Object(Expanded(Nir::Desc(SPair(desc_str, desc_span))))))
if desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
// Note how the span associated with this is `B`,
@ -100,22 +105,19 @@ fn desugars_literal_with_ending_var() {
// specification string.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamText(expect_text)))))
Some(Ok(Object(Expanded(Nir::Text(expect_text)))))
);
// This is the actual metavariable reference,
// pulled out of the interpolated portion of the given value.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamValue(expect_param))))),
Some(Ok(Object(Expanded(Nir::Ref(expect_param))))),
);
// This is an object generated from user input,
// so the closing span has to identify what were generated from.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamClose(a)))))
);
assert_eq!(sut.next(), Some(Ok(Object(Expanded(Nir::Close(a))))));
// Finally,
// we replace the original provided attribute
@ -152,9 +154,9 @@ fn desugars_var_with_ending_literal() {
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
let expect_dfn = NirSymbol(expect_name.into(), a);
let expect_param = NirSymbol("@foo@".into(), b);
let expect_text = NirSymbol("bar".into(), c);
let expect_dfn = SPair(expect_name.into(), a);
let expect_param = SPair("@foo@".into(), b);
let expect_text = SPair("bar".into(), c);
let mut sut = Sut::parse(toks.into_iter());
@ -162,30 +164,32 @@ fn desugars_var_with_ending_literal() {
// See above test for explanations that are not repeated here.
//
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Open(NirEntity::TplParam, a))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::BindIdent(expect_dfn))))),
);
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamOpen(
dfn,
NirSymbol(desc_str, desc_span)
))))) if dfn == expect_dfn
&& desc_str.lookup_str().contains(given_val)
&& desc_span == a
Some(Ok(Object(Expanded(Nir::Desc(SPair(desc_str, desc_span))))))
if desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamValue(expect_param))))),
Some(Ok(Object(Expanded(Nir::Ref(expect_param))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamText(expect_text)))))
Some(Ok(Object(Expanded(Nir::Text(expect_text)))))
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamClose(a)))))
);
assert_eq!(sut.next(), Some(Ok(Object(Expanded(Nir::Close(a))))));
assert_matches!(
sut.next(),
@ -219,11 +223,11 @@ fn desugars_many_vars_and_literals() {
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
let expect_dfn = NirSymbol(expect_name.into(), a);
let expect_text1 = NirSymbol("foo".into(), b);
let expect_param1 = NirSymbol("@bar@".into(), c);
let expect_text2 = NirSymbol("baz".into(), d);
let expect_param2 = NirSymbol("@quux@".into(), e);
let expect_dfn = SPair(expect_name.into(), a);
let expect_text1 = SPair("foo".into(), b);
let expect_param1 = SPair("@bar@".into(), c);
let expect_text2 = SPair("baz".into(), d);
let expect_param2 = SPair("@quux@".into(), e);
let mut sut = Sut::parse(toks.into_iter());
@ -231,35 +235,37 @@ fn desugars_many_vars_and_literals() {
// See above tests for explanations that are not repeated here.
//
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Open(NirEntity::TplParam, a))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::BindIdent(expect_dfn))))),
);
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamOpen(
dfn,
NirSymbol(desc_str, desc_span)
))))) if dfn == expect_dfn
&& desc_str.lookup_str().contains(given_val)
&& desc_span == a
Some(Ok(Object(Expanded(Nir::Desc(SPair(desc_str, desc_span))))))
if desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
assert_eq!(
Ok(vec![
// These two are the as previous tests.
Object(Expanded(Nir::TplParamText(expect_text1))),
Object(Expanded(Nir::TplParamValue(expect_param1))),
Object(Expanded(Nir::Text(expect_text1))),
Object(Expanded(Nir::Ref(expect_param1))),
// This pair repeats literals and vars further into the pattern
// to ensure that the parser is able to handle returning to
// previous states and is able to handle inputs at different
// offsets.
Object(Expanded(Nir::TplParamText(expect_text2))),
Object(Expanded(Nir::TplParamValue(expect_param2))),
Object(Expanded(Nir::Text(expect_text2))),
Object(Expanded(Nir::Ref(expect_param2))),
]),
sut.by_ref().take(4).collect(),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamClose(a)))))
);
assert_eq!(sut.next(), Some(Ok(Object(Expanded(Nir::Close(a))))));
assert_matches!(
sut.next(),
@ -290,10 +296,10 @@ fn desugars_adjacent_interpolated_vars() {
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
let expect_dfn = NirSymbol(expect_name.into(), a);
let expect_param1 = NirSymbol("@foo@".into(), b);
let expect_param2 = NirSymbol("@bar@".into(), c);
let expect_param3 = NirSymbol("@baz@".into(), d);
let expect_dfn = SPair(expect_name.into(), a);
let expect_param1 = SPair("@foo@".into(), b);
let expect_param2 = SPair("@bar@".into(), c);
let expect_param3 = SPair("@baz@".into(), d);
let mut sut = Sut::parse(toks.into_iter());
@ -301,30 +307,32 @@ fn desugars_adjacent_interpolated_vars() {
// See above tests for explanations that are not repeated here.
//
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Open(NirEntity::TplParam, a))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::BindIdent(expect_dfn))))),
);
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamOpen(
dfn,
NirSymbol(desc_str, desc_span)
))))) if dfn == expect_dfn
&& desc_str.lookup_str().contains(given_val)
&& desc_span == a
Some(Ok(Object(Expanded(Nir::Desc(SPair(desc_str, desc_span))))))
if desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
// These are the three adjacent vars.
assert_eq!(
Ok(vec![
Object(Expanded(Nir::TplParamValue(expect_param1))),
Object(Expanded(Nir::TplParamValue(expect_param2))),
Object(Expanded(Nir::TplParamValue(expect_param3))),
Object(Expanded(Nir::Ref(expect_param1))),
Object(Expanded(Nir::Ref(expect_param2))),
Object(Expanded(Nir::Ref(expect_param3))),
]),
sut.by_ref().take(3).collect(),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::TplParamClose(a)))))
);
assert_eq!(sut.next(), Some(Ok(Object(Expanded(Nir::Close(a))))));
assert_matches!(
sut.next(),