tamer: nir: Interpolate concrete binds into abstract binds

This introduces the notion of an abstract identifier, where the previous
identifiers are concrete.  This serves as a compromise to either introducing
a new object type (another `Ident`), or having every `Ident` name be defined
by a `Meta` edge, which would bloat the graph significantly.

This change causes interpolation within a bind context to desugar into a new
`BindIdentAbstract` token, but AIR will throw an error if it encounters it
for now; that implementation will come soon.

This does not yet handle non-interpolation cases,
e.g. `<classify as="@foo@">`.  This is a well-established shorthand for
`as="{@foo@}"`, but is unfortunately ambiguous in the context of
metavariable definitions (template parameters).  This language ambiguity
will have to be handled here, and will have to fall back to today's behavior
of assuming concrete in that `param/@name` context but abstract every else,
unless of course interpolation is triggered using `{}` to disambiguate (as
in `<param name="{@foo@}"`).

I was going to handle the short-hand meta binding case as part of
interpolation, but I decided it may be appropriate for its own lowering
operation, since it is intended to work regardless of whether interpolation
takes place; it's a _translation_ of a binding into an abstract one, and it
can clearly delineate the awkward syntactic rules that we have to inherit,
as mentioned above.

DEV-13163
main
Mike Gerwitz 2023-06-27 12:37:44 -04:00
parent 828d8918a3
commit 15071a1824
10 changed files with 402 additions and 28 deletions

View File

@ -34,6 +34,7 @@ use crate::{
graph::object::{ObjectIndexRelTo, ObjectIndexTo},
ObjectKind,
},
diagnose::Annotate,
f::Functor,
parse::prelude::*,
};
@ -138,6 +139,19 @@ impl ParseState for AirExprAggregate {
}
}
(BuildingExpr(_, oi), AirBind(BindIdentAbstract(meta_name))) => {
diagnostic_todo!(
vec![
oi.note("for this expression"),
meta_name.note(
"attempting to bind an abstract identifier with \
this metavariable"
),
],
"attempt to bind abstract identifier to expression",
)
}
(BuildingExpr(es, oi), AirBind(RefIdent(name))) => {
let oi_ident = ctx.lookup_lexical_or_missing(name);
Transition(BuildingExpr(

View File

@ -542,6 +542,10 @@ sum_ir! {
/// Assign an identifier to the active object.
///
/// The "active" object depends on the current parsing state.
///
/// See also [`Self::BindIdentAbstract`] if the name of the
/// identifier cannot be know until future expansion based on
/// the value of a metavariable.
BindIdent(id: SPair) => {
span: id,
display: |f| write!(
@ -551,6 +555,26 @@ sum_ir! {
),
},
/// Assign an abstract identifier to the active object.
///
/// This differs from [`Self::BindIdent`] in that the name of
/// the identifier will not be known until expansion time.
/// The identifier is bound to a metavariable of the
/// name `meta`,
/// from which its name will eventually be derived.
///
/// If the name is known,
/// use [`Self::BindIdent`] to bind a concrete identifier.
BindIdentAbstract(meta: SPair) => {
span: meta,
display: |f| write!(
f,
"identify active object by the value of the \
metavariable {} during future expansion",
TtQuote::wrap(meta),
),
},
/// Reference another object identified by the given [`SPair`].
///
/// Objects can be referenced before they are declared or defined,
@ -747,6 +771,18 @@ sum_ir! {
},
}
/// Metasyntactic objects.
///
/// TAME's metalanguage supports the generation of lexemes using
/// metavariables.
/// Those generated lexemes are utilized by the template system
/// (via [`AirTpl`])
/// during expansion,
/// yielding objects as if the user had entered the lexemes
/// statically.
///
/// [`AirBind`] is able to utilize metasyntactic variables for
/// dynamically generated bindings.
enum AirMeta {
/// Begin a metavariable definition.
///
@ -857,17 +893,6 @@ sum_ir! {
pub sum enum AirBindableMeta = AirMeta | AirBind | AirDoc;
}
impl AirBind {
/// Name of the identifier described by this token.
pub fn name(&self) -> SPair {
use AirBind::*;
match self {
BindIdent(name) | RefIdent(name) => *name,
}
}
}
impl AirIdent {
/// Name of the identifier described by this token.
pub fn name(&self) -> SPair {

View File

@ -91,6 +91,19 @@ impl ParseState for AirMetaAggregate {
.map(|_| ())
.transition(TplMeta(oi_meta)),
(TplMeta(oi_meta), AirBind(BindIdentAbstract(meta_name))) => {
diagnostic_todo!(
vec![
oi_meta.note("for this metavariable"),
meta_name.note(
"attempting to bind an abstract identifier with \
this metavariable"
),
],
"attempt to bind abstract identifier to metavariable",
)
}
(TplMeta(oi_meta), AirDoc(DocIndepClause(clause))) => {
oi_meta.desc_short(ctx.asg_mut(), clause);
Transition(TplMeta(oi_meta)).incomplete()

View File

@ -27,6 +27,7 @@ use super::{
AirAggregate, AirAggregateCtx,
};
use crate::{
diagnose::Annotate,
fmt::{DisplayWrapper, TtQuote},
parse::prelude::*,
span::Span,
@ -186,6 +187,19 @@ impl ParseState for AirTplAggregate {
.map(|_| ())
.transition(Toplevel(tpl.identify(id))),
(Toplevel(tpl), AirBind(BindIdentAbstract(meta_name))) => {
diagnostic_todo!(
vec![
tpl.oi().note("for this template"),
meta_name.note(
"attempting to bind an abstract identifier with \
this metavariable"
),
],
"attempt to bind abstract identifier to template",
)
}
(Toplevel(tpl), AirBind(RefIdent(name))) => {
let tpl_oi = tpl.oi();
let ref_oi = ctx.lookup_lexical_or_missing(name);

View File

@ -143,6 +143,20 @@ pub enum Ident {
/// itself;
/// this is safe since identifiers in TAME are immutable.
Transparent(SPair),
/// The name of the identifier is not yet known and will be determined
/// by the lexical value of a metavariable.
///
/// This is intended for use by identifiers that will be generated as a
/// result of template expansion---
/// it represents the abstract _idea_ of an identifier,
/// to be made concrete at a later time,
/// and is not valid outside of a metasyntactic context.
///
/// The associated span represents the location that the identifier
/// was defined,
/// e.g. within a template body.
Abstract(Span),
}
impl Display for Ident {
@ -165,6 +179,9 @@ impl Display for Ident {
Transparent(id) => {
write!(f, "transparent identifier {}", TtQuote::wrap(id))
}
Abstract(_) => {
write!(f, "pending identifier (to be named during expansion)")
}
}
}
}
@ -182,6 +199,8 @@ impl Ident {
| Extern(name, ..)
| IdentFragment(name, ..)
| Transparent(name) => Some(*name),
Abstract(_) => None,
}
}
@ -192,6 +211,8 @@ impl Ident {
| Extern(name, ..)
| IdentFragment(name, ..)
| Transparent(name) => name.span(),
Abstract(span) => *span,
}
}
@ -202,7 +223,7 @@ impl Ident {
/// [`None`] is returned.
pub fn kind(&self) -> Option<&IdentKind> {
match self {
Missing(_) | Transparent(_) => None,
Missing(_) | Transparent(_) | Abstract(_) => None,
Opaque(_, kind, _)
| Extern(_, kind, _)
@ -217,7 +238,7 @@ impl Ident {
/// [`None`] is returned.
pub fn src(&self) -> Option<&Source> {
match self {
Missing(_) | Extern(_, _, _) | Transparent(_) => None,
Missing(_) | Extern(_, _, _) | Transparent(_) | Abstract(_) => None,
Opaque(_, _, src) | IdentFragment(_, _, src, _) => Some(src),
}
@ -229,9 +250,11 @@ impl Ident {
/// [`None`] is returned.
pub fn fragment(&self) -> Option<FragmentText> {
match self {
Missing(_) | Opaque(_, _, _) | Extern(_, _, _) | Transparent(_) => {
None
}
Missing(_)
| Opaque(_, _, _)
| Extern(_, _, _)
| Transparent(_)
| Abstract(_) => None,
IdentFragment(_, _, _, text) => Some(*text),
}
@ -368,6 +391,17 @@ impl Ident {
let err = TransitionError::Redeclare(name, span);
Err((self, err))
}
// This really should never happen at the time of writing,
// since to resolve an identifier it first needs to be located
// on the graph,
// and abstract identifiers do not have an indexed name.
// Does the system now discover identifiers through other means,
// e.g. by trying to pre-draw edges within template bodies?
Abstract(abstract_span) => Err((
self,
TransitionError::ResolveAbstract(abstract_span, span),
)),
}
}
@ -394,6 +428,8 @@ impl Ident {
Err(UnresolvedError::Extern(*name, kind.clone()))
}
Abstract(span) => Err(UnresolvedError::Abstract(*span)),
Opaque(name, ..)
| IdentFragment(name, ..)
| Transparent(name, ..) => Ok((self, *name)),
@ -442,6 +478,12 @@ impl Ident {
Ok(self)
}
}
// See notes on `resolve()` for this arm.
Abstract(abstract_span) => Err((
self,
TransitionError::ResolveAbstract(abstract_span, span),
)),
}
}
@ -453,6 +495,11 @@ impl Ident {
/// Note, however, that an identifier's fragment may be cleared under
/// certain circumstances (such as symbol overrides),
/// making way for a new fragment to be set.
///
/// Fragments cannot be attached to abstract identifiers,
/// nor does it make sense to,
/// since fragment code generation only takes place on expanded
/// objects.
pub fn set_fragment(self, text: FragmentText) -> TransitionResult<Ident> {
match self {
Opaque(sym, kind, src) => Ok(IdentFragment(sym, kind, src, text)),
@ -487,6 +534,10 @@ impl Ident {
| Transparent(name) => {
Err((self, TransitionError::BadFragmentDest(name)))
}
Abstract(span) => {
Err((self, TransitionError::AbstractFragmentDest(span)))
}
}
}
}
@ -520,6 +571,16 @@ pub enum TransitionError {
///
/// See [`Ident::set_fragment`].
BadFragmentDest(SPair),
/// Attempted to resolve an abstract identifier.
///
/// An abstract identifier must be made to be concrete before any
/// resolution can occur.
ResolveAbstract(Span, Span),
/// Like [`Self::BadFragmentDest`] but for abstract identifiers without
/// a name.
AbstractFragmentDest(Span),
}
impl std::fmt::Display for TransitionError {
@ -557,7 +618,15 @@ impl std::fmt::Display for TransitionError {
BadFragmentDest(name) => {
write!(fmt, "bad fragment destination: {}", TtQuote::wrap(name))
},
ResolveAbstract(_, _) => {
write!(fmt, "cannot resolve abstract identifier")
}
AbstractFragmentDest(_) => {
write!(fmt, "cannot attach fragment to abstract identifier")
},
}
}
}
@ -627,6 +696,27 @@ impl Diagnostic for TransitionError {
),
name.help(" object file; this error should never occur."),
],
ResolveAbstract(span, resolve_span) => vec![
span.note("for this abstract identifier"),
resolve_span.internal_error(
"attempted to resolve abstract identifier here",
),
resolve_span.help(
"this is a suspicious error that may represent \
a compiler bug",
),
],
AbstractFragmentDest(span) => vec![
span.internal_error(
"this abstract identifier cannot be assigned a text fragment",
),
span.help(
"the term 'text fragment' refers to compiled code from an \
object file; this error should never occur."
),
],
}
}
}
@ -646,6 +736,13 @@ pub enum UnresolvedError {
/// Expected identifier has not yet been resolved with a concrete
/// definition.
Extern(SPair, IdentKind),
/// The identifier at the given location is pending expansion and is not
/// yet a concrete identifier.
///
/// These identifiers represent a template for the creation of a future
/// identifier during template expansion.
Abstract(Span),
}
impl std::fmt::Display for UnresolvedError {
@ -663,6 +760,8 @@ impl std::fmt::Display for UnresolvedError {
TtQuote::wrap(name),
TtQuote::wrap(kind),
),
Abstract(_) => write!(fmt, "abstract (unexpanded) identifier"),
}
}
}
@ -701,6 +800,16 @@ impl Diagnostic for UnresolvedError {
" later provide a concrete definition for it."
)
],
// This should not occur under normal circumstances;
// the user is likely to hit a more helpful and
// context-specific error before this.
Abstract(span) => vec![
span.error("this identifier has not been expanded"),
span.help(
"are you using a metavariable outside of a template body?",
),
],
}
}
}
@ -1115,6 +1224,12 @@ impl ObjectIndex<Ident> {
)
}
// e.g. in a template body
Abstract(span) => diagnostic_todo!(
vec![span.note("abstract defintion bind here")],
"bind definition to abstract identifier",
),
// We are okay to proceed to add an edge to the `definition`.
// Discard the original span
// (which is the location of the first reference _to_ this

View File

@ -96,12 +96,25 @@ pub enum Nir {
/// Finish definition of a [`NirEntity`] atop of the stack and pop it.
Close(NirEntity, Span),
/// Bind the given name as an identifier for the entity atop of the
/// stack.
/// Bind the given name as a concrete identifier for the entity atop of
/// the stack.
///
/// [`Self::Ref`] references identifiers created using this token.
///
/// See also [`Self::BindIdentAbstract`].
BindIdent(SPair),
/// Bind entity atop of the stack to an abstract identifier whose name
/// will eventually be derived from the metavariable identifier by the
/// given [`SPair`].
///
/// The identifier is intended to become concrete when a lexical value
/// for the metavariable becomes available during expansion,
/// which is outside of the scope of NIR.
///
/// See also [`Self::BindIdent`] for a concrete identifier.
BindIdentAbstract(SPair),
/// Reference the value of the given identifier as the subject of the
/// current expression.
///
@ -163,15 +176,23 @@ pub enum Nir {
}
impl Nir {
/// Retrieve inner [`SymbolId`] that this token represents,
/// Retrieve a _concrete_ inner [`SymbolId`] that this token represents,
/// if any.
///
/// Not all NIR tokens contain associated symbols;
/// a token's [`SymbolId`] is retained only if it provides additional
/// information over the token itself.
///
/// See also [`Nir::map`] if you wish to change the symbol.
pub fn symbol(&self) -> Option<SymbolId> {
/// An abstract identifier will yield [`None`],
/// since its concrete symbol has yet to be defined;
/// the available symbol instead represents the name of the
/// metavariable from which the concrete symbol will eventually
/// have its value derived.
///
/// See also [`Nir::map`] if you wish to change the symbol,
/// noting however that it does not distinguish between notions of
/// concrete and abstract as this method does.
pub fn concrete_symbol(&self) -> Option<SymbolId> {
use Nir::*;
match self {
@ -183,6 +204,13 @@ impl Nir {
BindIdent(spair) | RefSubject(spair) | Ref(spair) | Desc(spair)
| Text(spair) | Import(spair) => Some(spair.symbol()),
// An abstract identifier does not yet have a concrete symbol
// assigned;
// the available symbol represents the metavariable from
// which a symbol will eventually be derived during
// expansion.
BindIdentAbstract(_) => None,
Noop(_) => None,
}
}
@ -201,8 +229,7 @@ impl Functor<SymbolId> for Nir {
/// If a token does not contain a symbol,
/// this returns the token unchanged.
///
/// See also [`Nir::symbol`] if you only wish to retrieve the symbol
/// rather than map over it.
/// See also [`Nir::concrete_symbol`].
fn map(self, f: impl FnOnce(SymbolId) -> SymbolId) -> Self {
use Nir::*;
@ -213,6 +240,7 @@ impl Functor<SymbolId> for Nir {
Open(_, _) | Close(_, _) => self,
BindIdent(spair) => BindIdent(spair.map(f)),
BindIdentAbstract(spair) => BindIdentAbstract(spair.map(f)),
RefSubject(spair) => RefSubject(spair.map(f)),
Ref(spair) => Ref(spair.map(f)),
Desc(spair) => Desc(spair.map(f)),
@ -339,8 +367,13 @@ impl Token for Nir {
Open(_, span) => *span,
Close(_, span) => *span,
BindIdent(spair) | RefSubject(spair) | Ref(spair) | Desc(spair)
| Text(spair) | Import(spair) => spair.span(),
BindIdent(spair)
| BindIdentAbstract(spair)
| RefSubject(spair)
| Ref(spair)
| Desc(spair)
| Text(spair)
| Import(spair) => spair.span(),
// A no-op is discarding user input,
// so we still want to know where that is so that we can
@ -363,7 +396,19 @@ impl Display for Nir {
Open(entity, _) => write!(f, "open {entity} entity"),
Close(entity, _) => write!(f, "close {entity} entity"),
BindIdent(spair) => {
write!(f, "bind identifier {}", TtQuote::wrap(spair))
write!(
f,
"bind to concrete identifier {}",
TtQuote::wrap(spair)
)
}
BindIdentAbstract(spair) => {
write!(
f,
"bind to abstract identifier with future value of \
metavariable {}",
TtQuote::wrap(spair)
)
}
RefSubject(spair) => {
write!(f, "subject ref {}", TtQuote::wrap(spair))

View File

@ -280,6 +280,9 @@ impl ParseState for NirToAir {
(st @ (Ready | Meta(_)), BindIdent(spair)) => {
Transition(st).ok(Air::BindIdent(spair))
}
(st @ (Ready | Meta(_)), BindIdentAbstract(spair)) => {
Transition(st).ok(Air::BindIdentAbstract(spair))
}
(Ready, Ref(spair) | RefSubject(spair)) => {
Transition(Ready).ok(Air::RefIdent(spair))
}

View File

@ -453,3 +453,49 @@ fn text_as_arbitrary_doc() {
sut_parse(toks.into_iter()).collect(),
);
}
// NIR's concept of abstract identifiers exists for the sake of
// disambiguation for AIR.
// While NIR's grammar does not explicitly utilize it,
// interpolation via `nir::interp` will desugar into it.
#[test]
fn abstract_idents_lowered_to_air_equivalent() {
let meta_id = SPair("@foo@".into(), S2);
let meta_meta_id = SPair("@bar@".into(), S5);
#[rustfmt::skip]
let toks = vec![
Open(Rate, S1),
// NIR does not know or care that this metavariable does not
// exist.
BindIdentAbstract(meta_id),
Close(Rate, S3),
// The XSLT-based TAME had a grammatical ambiguity that disallowed
// for this type of construction,
// but there's no reason we can't allow for abstract
// metavariables
// (which would make `meta_meta_id` a meta-metavariable).
// (See `nir::interp` for more information on the handling of
// `TplParam` and abstract identifiers.)
Open(TplParam, S4),
// NIR does not know or care that this metavariable does not
// exist.
BindIdentAbstract(meta_meta_id),
Close(TplParam, S6),
];
assert_eq!(
#[rustfmt::skip]
Ok(vec![
O(Air::ExprStart(ExprOp::Sum, S1)),
O(Air::BindIdentAbstract(meta_id)),
O(Air::ExprEnd(S3)),
O(Air::MetaStart(S4)),
O(Air::BindIdentAbstract(meta_meta_id)),
O(Air::MetaEnd(S6)),
]),
sut_parse(toks.into_iter()).collect(),
);
}

View File

@ -266,7 +266,7 @@ impl ParseState for InterpState {
// filter out non-interpolated strings quickly,
// before we start to parse.
// Symbols that require no interpoolation are simply echoed back.
Ready => match tok.symbol() {
Ready => match tok.concrete_symbol() {
Some(sym) if needs_interpolation(sym) => {
Transition(GenIdent(sym))
.ok(Nir::Open(NirEntity::TplParam, span))
@ -458,7 +458,19 @@ impl ParseState for InterpState {
// generated.
// We finally release the lookahead symbol.
FinishSym(_, GenIdentSymbolId(gen_param)) => {
Transition(Ready).ok(tok.map(|_| gen_param))
let replacement = match tok.map(|_| gen_param) {
// `BindIdent` represents a concrete identifier.
// Our interpolation has generated a metavariable,
// meaning that this identifier has become abstract
// since its name will not be known until expansion-time.
Nir::BindIdent(x) => Nir::BindIdentAbstract(x),
// All other tokens only have their symbols replaced by
// the above.
x => x,
};
Transition(Ready).ok(replacement)
}
}
}

View File

@ -111,6 +111,93 @@ fn expect_expanded_header(
expect_name_sym
}
// This allows for unambiguously requesting desugaring in situations where
// the default is to treat the name as concrete.
#[test]
fn desugars_spec_with_only_var() {
let given_val = "{@foo@}";
// |[---]|
// |1 5|
// | B |
// [-----]
// 0 6
// A
// Non-zero span offset ensures that derived spans properly consider
// parent offset.
let a = DC.span(10, 7);
let b = DC.span(11, 5);
let given_sym = Nir::Ref(SPair(given_val.into(), a));
let toks = vec![given_sym];
let mut sut = Sut::parse(toks.into_iter());
let expect_name = expect_expanded_header(&mut sut, given_val, a);
assert_eq!(
Ok(vec![
// This is the actual metavariable reference, pulled out of the
// interpolated portion of the given value.
Object(Nir::Ref(SPair("@foo@".into(), b))),
// This is an object generated from user input, so the closing
// span has to identify what were generated from.
Object(Nir::Close(NirEntity::TplParam, a)),
// Finally,
// we replace the original provided attribute
// (the interpolation specification)
// with a metavariable reference to the generated parameter.
Object(Nir::Ref(SPair(expect_name, a))),
]),
sut.collect(),
);
}
// This is like the above test,
// but with a `BindIdent` instead of a `Ref`,
// which desugars into `BindIdentAbstract`.
// We could handle that translation in a later lowering operation,
// but re-parsing the symbol would be wasteful.
#[test]
fn concrete_bind_ident_desugars_into_abstract_bind_after_interpolation() {
let given_val = "{@bindme@}";
// |[------]|
// |1 8|
// | B |
// [--------]
// 0 9
// A
// Non-zero span offset ensures that derived spans properly consider
// parent offset.
let a = DC.span(10, 10);
let b = DC.span(11, 8);
// This is a bind,
// unlike above.
let given_sym = Nir::BindIdent(SPair(given_val.into(), a));
let toks = vec![given_sym];
let mut sut = Sut::parse(toks.into_iter());
let expect_name = expect_expanded_header(&mut sut, given_val, a);
assert_eq!(
Ok(vec![
// The interpolation occurs the same as above.
Object(Nir::Ref(SPair("@bindme@".into(), b))),
Object(Nir::Close(NirEntity::TplParam, a)),
// But at the end,
// instead of keeping the original `BindIdent` token,
// we translate to `BindIdentAbstract`,
// indicating that the name of this identifier depends on the
// value of the metavariable during expansion
Object(Nir::BindIdentAbstract(SPair(expect_name, a))),
]),
sut.collect(),
);
}
// When ending with an interpolated variable,
// the parser should recognize that we've returned to the outer literal
// context and permit successful termination of the specification string.