tamer: nir::interp: Integrate NIR interpolation into lowering pipeline

This is the culmination of all the recent work---the third attempt at trying
to integrate this.  It ended up much cleaner than what was originally going
to be done, but only after gutting portions of the system and changing my
approach to how NIR is parsed (WRT attributes).  See prior commits for more
information.

The final step is to fill the error branches with actual errors rather than
`todo!`s.

What a relief.

DEV-13156
main
Mike Gerwitz 2022-12-05 16:32:00 -05:00
parent 3050566062
commit 8d2d273932
5 changed files with 203 additions and 114 deletions

View File

@ -37,7 +37,7 @@ use tamer::{
diagnose::{
AnnotatedSpan, Diagnostic, FsSpanResolver, Reporter, VisualReporter,
},
nir::{XirfToNir, XirfToNirError},
nir::{InterpError, InterpolateNir, Nir, XirfToNir, XirfToNirError},
parse::{
Lower, ParseError, Parsed, ParsedObject, ParsedResult, UnknownToken,
},
@ -136,12 +136,14 @@ fn compile<R: Reporter>(
_,
>::lower::<_, UnrecoverableError>(src, |toks| {
Lower::<XirToXirf<64, RefinedText>, XirfToNir, _>::lower(toks, |nir| {
nir.fold(Ok(()), |x, result| match result {
Ok(_) => x,
Err(e) => {
report_err(&e, reporter, &mut ebuf)?;
x
}
Lower::<XirfToNir, InterpolateNir, _>::lower(nir, |nir| {
nir.fold(Ok(()), |x, result| match result {
Ok(_) => x,
Err(e) => {
report_err(&e, reporter, &mut ebuf)?;
x
}
})
})
})
})?;
@ -295,6 +297,7 @@ pub enum RecoverableError {
XirParseError(ParseError<UnknownToken, xir::Error>),
XirfParseError(ParseError<XirToken, XirToXirfError>),
NirParseError(ParseError<XirfToken<RefinedText>, XirfToNirError>),
InterpError(ParseError<Nir, InterpError>),
}
impl From<io::Error> for UnrecoverableError {
@ -335,6 +338,12 @@ impl From<ParseError<XirfToken<RefinedText>, XirfToNirError>>
}
}
impl From<ParseError<Nir, InterpError>> for RecoverableError {
fn from(e: ParseError<Nir, InterpError>) -> Self {
Self::InterpError(e)
}
}
impl Display for UnrecoverableError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@ -356,6 +365,7 @@ impl Display for RecoverableError {
Self::XirParseError(e) => Display::fmt(e, f),
Self::XirfParseError(e) => Display::fmt(e, f),
Self::NirParseError(e) => Display::fmt(e, f),
Self::InterpError(e) => Display::fmt(e, f),
}
}
}
@ -377,6 +387,7 @@ impl Error for RecoverableError {
Self::XirParseError(e) => Some(e),
Self::XirfParseError(e) => Some(e),
Self::NirParseError(e) => Some(e),
Self::InterpError(e) => Some(e),
}
}
}
@ -396,6 +407,7 @@ impl Diagnostic for RecoverableError {
Self::XirParseError(e) => e.describe(),
Self::XirfParseError(e) => e.describe(),
Self::NirParseError(e) => e.describe(),
Self::InterpError(e) => e.describe(),
}
}
}

View File

@ -70,6 +70,7 @@ use std::{
fmt::{Debug, Display},
};
pub use interp::{InterpError, InterpState as InterpolateNir};
pub use parse::{
NirParseState as XirfToNir, NirParseStateError_ as XirfToNirError,
};
@ -113,6 +114,61 @@ pub enum Nir {
Text(SPair),
}
impl Nir {
/// Retrieve inner [`SymbolId`] that this token represents,
/// if any.
///
/// Not all NIR tokens contain associated symbols;
/// a token's [`SymbolId`] is retained only if it provides additional
/// information over the token itself.
///
/// See also [`Nir::map`] if you wish to change the symbol.
pub fn symbol(&self) -> Option<SymbolId> {
use Nir::*;
match self {
Todo => None,
TodoAttr(spair) => Some(spair.symbol()),
Open(_, _) | Close(_) => None,
BindIdent(spair) | Ref(spair) | Desc(spair) | Text(spair) => {
Some(spair.symbol())
}
}
}
/// Map over a token's [`SymbolId`].
///
/// This allows modifying a token's [`SymbolId`] while retaining the
/// associated [`Span`].
/// This is the desired behavior when modifying the source code the user
/// entered,
/// since diagnostic messages will reference the original source
/// location that the modification was derived from.
///
/// If a token does not contain a symbol,
/// this returns the token unchanged.
///
/// See also [`Nir::symbol`] if you only wish to retrieve the symbol
/// rather than map over it.
pub fn map(self, f: impl FnOnce(SymbolId) -> SymbolId) -> Self {
use Nir::*;
match self {
Todo => self,
TodoAttr(spair) => TodoAttr(spair.map(f)),
Open(_, _) | Close(_) => self,
BindIdent(spair) => BindIdent(spair.map(f)),
Ref(spair) => Ref(spair.map(f)),
Desc(spair) => Desc(spair.map(f)),
Text(spair) => Text(spair.map(f)),
}
}
}
/// An object upon which other [`Nir`] tokens act.
#[derive(Debug, PartialEq, Eq)]
pub enum NirEntity {

View File

@ -103,11 +103,7 @@ use super::{Nir, NirEntity};
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
fmt::{DisplayWrapper, TtQuote},
parse::{
prelude::*,
util::{expand::Expansion, SPair},
NoContext,
},
parse::{prelude::*, util::SPair, NoContext},
span::Span,
sym::{
st::quick_contains_byte, GlobalSymbolIntern, GlobalSymbolResolve,
@ -117,7 +113,6 @@ use crate::{
use std::{error::Error, fmt::Display};
// Expose variants for enums defined in this module to reduce verbosity.
use Expansion::*;
use InterpState::*;
/// A generated identifier.
@ -156,12 +151,13 @@ pub enum InterpState {
#[default]
Ready,
/// Genearate an identifier for the expansion template parameter.
GenIdent,
/// Genearate an identifier for the expansion template parameter to be
/// generated from the provided interpolation string.
GenIdent(SymbolId),
/// Generate a description for the expansion template parameter that is
/// intended as a human-readable debugging string.
GenDesc(GenIdentSymbolId),
GenDesc(SymbolId, GenIdentSymbolId),
/// Interpolation will continue in a literal context at the provided
/// offset relative to the start of the specification string.
@ -189,15 +185,21 @@ impl Display for InterpState {
interpolation is necessary"
),
GenIdent => {
write!(f, "ready to generate template param identifier")
GenIdent(sym) => {
write!(
f,
"ready to generate template param identifier \
for specification {}",
TtQuote::wrap(sym),
)
}
GenDesc(GenIdentSymbolId(sym)) => write!(
GenDesc(sym, GenIdentSymbolId(gen_sym)) => write!(
f,
"ready to generate debug description for generated \
template param {}",
TtQuote::wrap(sym),
template param {param} from specification {spec}",
param = TtQuote::wrap(gen_sym),
spec = TtQuote::wrap(sym),
),
ParseLiteralAt(spec, _, x) => write!(
@ -226,8 +228,8 @@ impl Display for InterpState {
}
impl ParseState for InterpState {
type Token = SPair;
type Object = Expansion<SPair, Nir>;
type Token = Nir;
type Object = Nir;
type Error = InterpError;
fn parse_token(
@ -235,7 +237,13 @@ impl ParseState for InterpState {
tok: Self::Token,
_: NoContext,
) -> TransitionResult<Self> {
match (self, tok.into()) {
// Every expansion token that we emit must be derived from the span
// of the source token,
// ensuring that diagnostics reference the source code that can
// actually be acted upon by the user.
let span = tok.span();
match self {
// When receiving a new symbol,
// we must make a quick determination as to whether it
// requires desugaring.
@ -245,27 +253,27 @@ impl ParseState for InterpState {
// filter out non-interpolated strings quickly,
// before we start to parse.
// Symbols that require no interpoolation are simply echoed back.
(Ready, (sym, span)) => {
if needs_interpolation(sym) {
Transition(GenIdent)
.ok(Expanded(Nir::Open(NirEntity::TplParam, span)))
.with_lookahead((sym, span).into())
} else {
// No desugaring is needed.
Transition(Ready).ok(DoneExpanding((sym, span).into()))
Ready => match tok.symbol() {
Some(sym) if needs_interpolation(sym) => {
Transition(GenIdent(sym))
.ok(Nir::Open(NirEntity::TplParam, span))
.with_lookahead(tok)
}
}
(GenIdent, (sym, span)) => {
// No desugaring is needed.
_ => Transition(Ready).ok(tok),
},
GenIdent(sym) => {
let gen_ident = gen_tpl_param_ident_at_offset(span);
let GenIdentSymbolId(ident_sym) = gen_ident;
Transition(GenDesc(gen_ident))
.ok(Expanded(Nir::BindIdent(SPair(ident_sym, span))))
.with_lookahead((sym, span).into())
Transition(GenDesc(sym, gen_ident))
.ok(Nir::BindIdent(SPair(ident_sym, span)))
.with_lookahead(tok)
}
(GenDesc(gen_ident), (sym, span)) => {
GenDesc(sym, gen_ident) => {
let s = sym.lookup_str();
// Description is not interned since there's no use in
@ -283,14 +291,14 @@ impl ParseState for InterpState {
// since interpolation is most commonly utilized with literal
// prefixes.
Transition(ParseLiteralAt(s, gen_ident, 0))
.ok(Expanded(Nir::Desc(SPair(gen_desc, span))))
.with_lookahead((sym, span).into())
.ok(Nir::Desc(SPair(gen_desc, span)))
.with_lookahead(tok)
}
// The outermost parsing context is that of the literal,
// where a sequence of characters up to `{` stand for
// themselves.
(ParseLiteralAt(s, gen_param, offset), (sym, span)) => {
ParseLiteralAt(s, gen_param, offset) => {
if offset == s.len() {
// We've reached the end of the specification string.
// Since we're in the outermost (literal) context,
@ -301,8 +309,8 @@ impl ParseState for InterpState {
// symbol that we've been fed
// (the specification string).
Transition(FinishSym(s, gen_param))
.ok(Expanded(Nir::Close(span)))
.with_lookahead((sym, span).into())
.ok(Nir::Close(span))
.with_lookahead(tok)
};
}
@ -315,7 +323,7 @@ impl ParseState for InterpState {
Some(0) => {
Transition(ParseInterpAt(s, gen_param, offset + 1))
.incomplete()
.with_lookahead((sym, span).into())
.with_lookahead(tok)
}
// Everything from the offset until the curly brace is a
@ -329,8 +337,8 @@ impl ParseState for InterpState {
let text = Nir::Text(SPair(literal, span_text));
Transition(ParseInterpAt(s, gen_param, end + 1))
.ok(Expanded(text))
.with_lookahead((sym, span).into())
.ok(text)
.with_lookahead(tok)
}
// The remainder of the specification is a literal.
@ -343,8 +351,8 @@ impl ParseState for InterpState {
// Keep in the current state but update the offset;
// we'll complete parsing next pass.
Transition(ParseLiteralAt(s, gen_param, s.len()))
.ok(Expanded(text))
.with_lookahead((sym, span).into())
.ok(text)
.with_lookahead(tok)
}
}
}
@ -354,7 +362,7 @@ impl ParseState for InterpState {
// This is an inner context that cannot complete without being
// explicitly closed,
// and cannot not be nested.
(ParseInterpAt(s, gen_param, offset), (sym, span)) => {
ParseInterpAt(s, gen_param, offset) => {
// TODO: Make sure offset exists, avoid panic
// TODO: Prevent nested `{`.
@ -378,8 +386,8 @@ impl ParseState for InterpState {
// Continue parsing one character past the '}',
// back in a literal context.
Transition(ParseLiteralAt(s, gen_param, end + 1))
.ok(Expanded(param_value))
.with_lookahead((sym, span).into())
.ok(param_value)
.with_lookahead(tok)
}
None => todo!("missing closing '}}'"),
@ -392,8 +400,8 @@ impl ParseState for InterpState {
// with a metavariable referencing the parameter that we just
// generated.
// We finally release the lookahead symbol.
(FinishSym(_, GenIdentSymbolId(gen_param)), (_, span)) => {
Transition(Ready).ok(DoneExpanding((gen_param, span).into()))
FinishSym(_, GenIdentSymbolId(gen_param)) => {
Transition(Ready).ok(tok.map(|_| gen_param))
}
}
}

View File

@ -40,16 +40,29 @@ fn does_not_desugar_literal_only() {
// but it's also a literal because it's not enclosed in braces.
for literal in ["foo", "@bar@"] {
let sym = literal.into();
let toks = vec![SPair(sym, S1)];
// Arbitrary token type that supports symbols
let toks = vec![Nir::Ref(SPair(sym, S1))];
assert_eq!(
Ok(vec![Object(DoneExpanding(SPair(sym, S1)))]),
Ok(vec![Object(Nir::Ref(SPair(sym, S1)))]),
Sut::parse(toks.into_iter()).collect(),
"literal `{literal}` must not desugar",
);
}
}
// ...not that it could.
#[test]
fn does_not_desugar_tokens_without_symbols() {
let toks = vec![Nir::Close(S1)];
assert_eq!(
Ok(vec![Object(Nir::Close(S1))]),
Sut::parse(toks.into_iter()).collect(),
);
}
// When ending with an interpolated variable,
// the parser should recognize that we've returned to the outer literal
// context and permit successful termination of the specification string.
@ -69,7 +82,7 @@ fn desugars_literal_with_ending_var() {
let b = DC.span(10, 3);
let c = DC.span(14, 5);
let given_sym = SPair(given_val.into(), a);
let given_sym = Nir::Ref(SPair(given_val.into(), a));
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
@ -87,15 +100,12 @@ fn desugars_literal_with_ending_var() {
// helpful information to a human reader.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Open(NirEntity::TplParam, a))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::BindIdent(expect_dfn))))),
Some(Ok(Object(Nir::Open(NirEntity::TplParam, a)))),
);
assert_eq!(sut.next(), Some(Ok(Object(Nir::BindIdent(expect_dfn)))),);
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Desc(SPair(desc_str, desc_span))))))
Some(Ok(Object(Nir::Desc(SPair(desc_str, desc_span)))))
if desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
@ -103,21 +113,15 @@ fn desugars_literal_with_ending_var() {
// Note how the span associated with this is `B`,
// which is derived from the relevant portion of the original
// specification string.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Text(expect_text)))))
);
assert_eq!(sut.next(), Some(Ok(Object(Nir::Text(expect_text)))));
// This is the actual metavariable reference,
// pulled out of the interpolated portion of the given value.
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Ref(expect_param))))),
);
assert_eq!(sut.next(), Some(Ok(Object(Nir::Ref(expect_param)))),);
// This is an object generated from user input,
// so the closing span has to identify what were generated from.
assert_eq!(sut.next(), Some(Ok(Object(Expanded(Nir::Close(a))))));
assert_eq!(sut.next(), Some(Ok(Object(Nir::Close(a)))));
// Finally,
// we replace the original provided attribute
@ -125,7 +129,7 @@ fn desugars_literal_with_ending_var() {
// with a metavariable reference to the generated parameter.
assert_matches!(
sut.next(),
Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
Some(Ok(Object(Nir::Ref(SPair(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);
@ -150,7 +154,7 @@ fn desugars_var_with_ending_literal() {
let b = DC.span(21, 5);
let c = DC.span(27, 3);
let given_sym = SPair(given_val.into(), a);
let given_sym = Nir::Ref(SPair(given_val.into(), a));
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
@ -166,34 +170,25 @@ fn desugars_var_with_ending_literal() {
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Open(NirEntity::TplParam, a))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::BindIdent(expect_dfn))))),
Some(Ok(Object(Nir::Open(NirEntity::TplParam, a)))),
);
assert_eq!(sut.next(), Some(Ok(Object(Nir::BindIdent(expect_dfn)))),);
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Desc(SPair(desc_str, desc_span))))))
Some(Ok(Object(Nir::Desc(SPair(desc_str, desc_span)))))
if desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Ref(expect_param))))),
);
assert_eq!(sut.next(), Some(Ok(Object(Nir::Ref(expect_param)))),);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Text(expect_text)))))
);
assert_eq!(sut.next(), Some(Ok(Object(Nir::Text(expect_text)))));
assert_eq!(sut.next(), Some(Ok(Object(Expanded(Nir::Close(a))))));
assert_eq!(sut.next(), Some(Ok(Object(Nir::Close(a)))));
assert_matches!(
sut.next(),
Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
Some(Ok(Object(Nir::Ref(SPair(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);
@ -219,7 +214,7 @@ fn desugars_many_vars_and_literals() {
let d = DC.span(40, 3);
let e = DC.span(44, 6);
let given_sym = SPair(given_val.into(), a);
let given_sym = Nir::Ref(SPair(given_val.into(), a));
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
@ -237,15 +232,12 @@ fn desugars_many_vars_and_literals() {
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Open(NirEntity::TplParam, a))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::BindIdent(expect_dfn))))),
Some(Ok(Object(Nir::Open(NirEntity::TplParam, a)))),
);
assert_eq!(sut.next(), Some(Ok(Object(Nir::BindIdent(expect_dfn)))),);
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Desc(SPair(desc_str, desc_span))))))
Some(Ok(Object(Nir::Desc(SPair(desc_str, desc_span)))))
if desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
@ -253,23 +245,23 @@ fn desugars_many_vars_and_literals() {
assert_eq!(
Ok(vec![
// These two are the as previous tests.
Object(Expanded(Nir::Text(expect_text1))),
Object(Expanded(Nir::Ref(expect_param1))),
Object(Nir::Text(expect_text1)),
Object(Nir::Ref(expect_param1)),
// This pair repeats literals and vars further into the pattern
// to ensure that the parser is able to handle returning to
// previous states and is able to handle inputs at different
// offsets.
Object(Expanded(Nir::Text(expect_text2))),
Object(Expanded(Nir::Ref(expect_param2))),
Object(Nir::Text(expect_text2)),
Object(Nir::Ref(expect_param2)),
]),
sut.by_ref().take(4).collect(),
);
assert_eq!(sut.next(), Some(Ok(Object(Expanded(Nir::Close(a))))));
assert_eq!(sut.next(), Some(Ok(Object(Nir::Close(a)))));
assert_matches!(
sut.next(),
Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
Some(Ok(Object(Nir::Ref(SPair(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);
@ -292,7 +284,7 @@ fn desugars_adjacent_interpolated_vars() {
let c = DC.span(48, 5);
let d = DC.span(55, 5);
let given_sym = SPair(given_val.into(), a);
let given_sym = Nir::Ref(SPair(given_val.into(), a));
let toks = vec![given_sym];
let GenIdentSymbolId(expect_name) = gen_tpl_param_ident_at_offset(a);
@ -309,15 +301,12 @@ fn desugars_adjacent_interpolated_vars() {
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Open(NirEntity::TplParam, a))))),
);
assert_eq!(
sut.next(),
Some(Ok(Object(Expanded(Nir::BindIdent(expect_dfn))))),
Some(Ok(Object(Nir::Open(NirEntity::TplParam, a)))),
);
assert_eq!(sut.next(), Some(Ok(Object(Nir::BindIdent(expect_dfn)))),);
assert_matches!(
sut.next(),
Some(Ok(Object(Expanded(Nir::Desc(SPair(desc_str, desc_span))))))
Some(Ok(Object(Nir::Desc(SPair(desc_str, desc_span)))))
if desc_str.lookup_str().contains(given_val)
&& desc_span == a
);
@ -325,18 +314,18 @@ fn desugars_adjacent_interpolated_vars() {
// These are the three adjacent vars.
assert_eq!(
Ok(vec![
Object(Expanded(Nir::Ref(expect_param1))),
Object(Expanded(Nir::Ref(expect_param2))),
Object(Expanded(Nir::Ref(expect_param3))),
Object(Nir::Ref(expect_param1)),
Object(Nir::Ref(expect_param2)),
Object(Nir::Ref(expect_param3)),
]),
sut.by_ref().take(3).collect(),
);
assert_eq!(sut.next(), Some(Ok(Object(Expanded(Nir::Close(a))))));
assert_eq!(sut.next(), Some(Ok(Object(Nir::Close(a)))));
assert_matches!(
sut.next(),
Some(Ok(Object(DoneExpanding(SPair(given_replace, given_span)))))
Some(Ok(Object(Nir::Ref(SPair(given_replace, given_span)))))
if given_replace == expect_name && given_span == a
);

View File

@ -39,6 +39,30 @@ use std::fmt::Display;
#[derive(Debug, PartialEq, Eq)]
pub struct SPair(pub SymbolId, pub Span);
impl SPair {
/// Retrieve the [`SymbolId`] of this pair.
///
/// This is an alternative to pattern matching.
pub fn symbol(&self) -> SymbolId {
match self {
Self(sym, _) => *sym,
}
}
/// Map over the [`SymbolId`] of the pair while retaining the original
/// associated [`Span`].
///
/// Span retention is the desired behavior when modifying the source
/// code the user entered,
/// since diagnostic messages will reference the original source
/// location that the modification was derived from.
pub fn map(self, f: impl FnOnce(SymbolId) -> SymbolId) -> Self {
match self {
Self(sym, span) => Self(f(sym), span),
}
}
}
impl Token for SPair {
fn ir_name() -> &'static str {
"Generic Symbol"