tamer: asg::air: Support Meta::ConcatList with lexemes and refs

This handles the common cases for meta, which includes what interpolation
desugars into.  Most of this work was in testing and reasoning about the
issue; `asg::graph::visit:ontree::test` has a good summary of the structure
of the graph that results.

The last remaining steps to make this work end-to-end is for NIR->AIR to
lower `Nir::Ref` into `Air::BindIdent`, and then for `asg::graph::xmli` to
reconstruct concatenation lists.  I'll then be able to commit the xmli test
case I've been sitting on, whose errors have been guiding my development.

DEV-13163
main
Mike Gerwitz 2023-07-12 15:49:58 -04:00
parent d2d29d8957
commit b4b85a5e85
6 changed files with 600 additions and 13 deletions

View File

@ -46,6 +46,9 @@ use std::{
fmt::{Debug, Display},
};
#[cfg(test)]
use super::graph::object::ObjectRelTo;
#[macro_use]
mod ir;
use fxhash::FxHashMap;
@ -967,6 +970,26 @@ impl AirAggregateCtx {
.map(EnvScopeKind::into_inner)
}
/// Resolve an identifier at the scope of the provided environment and
/// retrieve its definition.
///
/// If the identifier is not in scope or does not have a definition,
/// [`None`] will be returned;
/// the caller cannot distinguish between the two using this method;
/// see [`Self::env_scope_lookup`] if that distinction is important.
#[cfg(test)]
fn env_scope_lookup_ident_dfn<O: ObjectRelatable>(
&self,
env: impl ObjectIndexRelTo<Ident>,
name: SPair,
) -> Option<ObjectIndex<O>>
where
Ident: ObjectRelTo<O>,
{
self.env_scope_lookup::<Ident>(env, name)
.and_then(|oi| oi.definition(self.asg_ref()))
}
/// Attempt to retrieve an identifier from the graph by name relative to
/// the immediate environment `imm_env`.
///

View File

@ -76,7 +76,7 @@ impl ParseState for AirMetaAggregate {
}
(TplMeta(oi_meta), AirMeta(MetaLexeme(lexeme))) => Transition(
TplMeta(oi_meta.assign_lexeme(ctx.asg_mut(), lexeme)),
TplMeta(oi_meta.append_lexeme(ctx.asg_mut(), lexeme)),
)
.incomplete(),
@ -106,6 +106,8 @@ impl ParseState for AirMetaAggregate {
Transition(TplMeta(oi_meta)).incomplete()
}
// TODO: The user _probably_ meant to use `<text>` in XML NIR,
// so maybe we should have an error to that effect.
(TplMeta(..), tok @ AirDoc(DocText(..))) => {
diagnostic_todo!(
vec![tok.note("this token")],
@ -114,11 +116,13 @@ impl ParseState for AirMetaAggregate {
)
}
(TplMeta(..), tok @ AirBind(RefIdent(..))) => {
diagnostic_todo!(
vec![tok.note("this token")],
"AirBind in metavar context (param-value)"
)
// Reference to another metavariable,
// e.g. using `<param-value>` in XML NIR.
(TplMeta(oi_meta), AirBind(RefIdent(name))) => {
let oi_ref = ctx.lookup_lexical_or_missing(name);
Transition(TplMeta(oi_meta.concat_ref(ctx.asg_mut(), oi_ref)))
.incomplete()
}
(TplMeta(..), tok @ AirMeta(MetaStart(..))) => {
@ -159,3 +163,6 @@ impl AirMetaAggregate {
Self::Ready
}
}
#[cfg(test)]
mod test;

View File

@ -0,0 +1,369 @@
// Tests for ASG IR metavariable parsing
//
// Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
/// Metavariable parsing tests.
///
/// Note that some metavariable-related tests are in
/// [`super::super::tpl::test`],
/// where the focus is on how they are integrated as template
/// parameters.
/// The tests here focus instead on the particulars of metavariables
/// themselves,
/// with templates being only incidental.
use super::*;
use crate::{
asg::{
air::{
test::{parse_as_pkg_body, pkg_lookup},
Air::{self, *},
},
graph::object::{meta::MetaRel, Meta, Tpl},
},
parse::{util::spair, Parser},
span::{dummy::*, Span},
sym::SymbolId,
};
type Sut = AirAggregate;
// Metavariables can reference the lexical value of other metavariables.
// This does not actually check that the target reference is a metavariable,
// at least not at the time of writing.
// TODO: But the system ought to,
// since if we defer that to expansion-time,
// then we don't have confidence that the template definition is actually
// valid as a part of this compilation unit.
#[test]
fn metavar_ref_only() {
let name_meta = "@foo@";
let name_other = "@other@";
#[rustfmt::skip]
assert_concat_list(
[
MetaStart(S1),
BindIdent(spair(name_meta, S2)),
// Reference to another metavariable,
// effectively creating an alias.
RefIdent(spair(name_other, S3)), // --.
MetaEnd(S4), // |
// |
MetaStart(S5), // |
BindIdent(spair(name_other, S6)), // <-'
MetaEnd(S7),
],
name_meta,
S1.merge(S4),
[
&Meta::Required(S5.merge(S7).unwrap()),
],
);
}
// Similar to above,
// but multiple references.
#[test]
fn metavar_ref_multiple() {
let name_meta = "@foo@";
let name_other_a = "@other-a@";
let name_other_b = "@other-b@";
#[rustfmt::skip]
assert_concat_list(
// Def/ref is commutative,
// so this defines with both orderings to demonstrate that.
[
MetaStart(S1),
BindIdent(spair(name_other_a, S2)), // <-.
MetaEnd(S3), // |
// |
MetaStart(S3), // |
BindIdent(spair(name_meta, S4)), // |
// |
RefIdent(spair(name_other_a, S5)), // --'
RefIdent(spair(name_other_b, S6)), // --.
MetaEnd(S7), // |
// |
MetaStart(S8), // |
BindIdent(spair(name_other_b, S9)), // <-'
MetaEnd(S10),
],
name_meta,
S3.merge(S7),
[
&Meta::Required(S1.merge(S3).unwrap()),
&Meta::Required(S8.merge(S10).unwrap()),
],
);
}
// If a metavariable already has a concrete lexical value,
// then appending a reference to it should convert it into a list.
#[test]
fn metavar_ref_after_lexeme() {
let name_meta = "@foo@";
let value = "foo value";
let name_other = "@other@";
#[rustfmt::skip]
assert_concat_list(
[
MetaStart(S1),
BindIdent(spair(name_meta, S2)),
// At this point,
// we have only a concrete lexeme,
// and so we are not a list.
MetaLexeme(spair(value, S3)),
// Upon encountering this token,
// we should convert into a list.
RefIdent(spair(name_other, S4)), // --.
MetaEnd(S5), // |
// |
MetaStart(S6), // |
BindIdent(spair(name_other, S7)), // <-'
MetaEnd(S8),
],
name_meta,
S1.merge(S5),
// Having been converted into a list,
// we should have a reference to _two_ metavariables:
// one holding the original lexeme and the reference.
[
&Meta::Lexeme(S3, spair(value, S3)),
&Meta::Required(S6.merge(S8).unwrap()),
],
);
}
// If we have a reference,
// then that means we already have a `ConcatList`,
// and therefore should just have to append to it.
// This is the same as the above test,
// with operations reversed.
// It is not commutative,
// though,
// since concatenation is ordered.
#[test]
fn lexeme_after_metavar_ref() {
let name_meta = "@foo@";
let value = "foo value";
let name_other = "@other@";
#[rustfmt::skip]
assert_concat_list(
[
MetaStart(S1),
BindIdent(spair(name_meta, S2)),
// We produce a list here...
RefIdent(spair(name_other, S3)), // --.
// |
// ...and should just append a // |
// `Lexeme` here. // |
MetaLexeme(spair(value, S4)), // |
MetaEnd(S5), // |
// |
MetaStart(S6), // |
BindIdent(spair(name_other, S7)), // <-'
MetaEnd(S8),
],
name_meta,
S1.merge(S5),
// Same as the previous test,
// but concatenation order is reversed.
[
&Meta::Required(S6.merge(S8).unwrap()),
// Note the first span here is not that of the parent
// metavariable,
// since we do not want diagnostics to suggest that this
// object represents the entirety of the parent.
&Meta::Lexeme(S4, spair(value, S4)),
],
);
}
// Multiple lexemes should _also_ produce a list.
// While this could have been replaced by a single lexeme,
// there are still uses:
// maybe this was the result of template expansion,
// or simply a multi-line formatting choice in the provided sources.
#[test]
fn lexeme_after_lexeme() {
let name_meta = "@foo@";
let value_a = "foo value a";
let value_b = "foo value b";
#[rustfmt::skip]
assert_concat_list(
[
MetaStart(S1),
BindIdent(spair(name_meta, S2)),
MetaLexeme(spair(value_a, S3)),
MetaLexeme(spair(value_b, S4)),
MetaEnd(S5),
],
name_meta,
S1.merge(S5),
[
// Since these are Meta objects derived from the original,
// our spans
// (the first value in the tuple)
// are not that of the containing metavariable;
// we don't want diagnostics implying that this represents
// the entirety of that the parent metavariable.
&Meta::Lexeme(S3, spair(value_a, S3)),
&Meta::Lexeme(S4, spair(value_b, S4)),
],
);
}
/////// ///////
/////// Tests above; plumbing begins below ///////
/////// ///////
fn assert_concat_list<'a, IT, IE: 'a>(
toks: IT,
meta_name: impl Into<SymbolId>,
expect_span: Option<Span>,
expect: IE,
) where
IT: IntoIterator<Item = Air>,
IT::IntoIter: Debug,
IE: IntoIterator<Item = &'a Meta>,
IE::IntoIter: Debug + DoubleEndedIterator,
{
let (ctx, oi_tpl) = air_ctx_from_tpl_body_toks(toks);
let asg = ctx.asg_ref();
let oi_meta = ctx
.env_scope_lookup_ident_dfn::<Meta>(
oi_tpl,
spair(meta_name.into(), DUMMY_SPAN),
)
.unwrap();
// Meta references are only supported through lexical concatenation.
assert_eq!(
&Meta::ConcatList(expect_span.expect("expected metavar span is None")),
oi_meta.resolve(asg),
"expected metavariable to be a ConcatList with the provided span",
);
// We `collect()` rather than using `Iterator::eq` so that the failure
// message includes the data.
// If we do this too often,
// we can consider a crate like `itertools` or write our own
// comparison,
// but it's not worth doing for these tests where the cost of
// collection is so low and insignificant.
assert_eq!(
// TODO: We need to modify edge methods to return in the proper
// order (not reversed) without a performance hit,
// which will involve investigating Petgraph further.
expect.into_iter().rev().collect::<Vec<_>>(),
// Each reference is to an Ident whose definition is the other
// metavariable.
oi_meta
.edges(asg)
.filter_map(|rel| match rel {
MetaRel::Meta(oi) => Some(oi),
MetaRel::Ident(oi) => oi.definition::<Meta>(asg),
MetaRel::Doc(_) => None,
})
.map(ObjectIndex::cresolve(asg))
.collect::<Vec<_>>(),
"note: expected tokens are in reverse order in this error message",
);
}
const STUB_TPL_NAME: &str = "__incidental-tpl__";
/// Parse the provided tokens within the context of a template body.
///
/// This allows tests to focus on testing of metavariables instead of
/// mudding tests with setup.
///
/// This creates a package and template that are purely incidental and serve
/// only as scaffolding to put the parser into the necessary state and
/// context.
/// This is an alternative to providing methods to force the parse into a
/// certain context,
/// since we're acting as users of the SUT's public API and are
/// therefore testing real-world situations.
fn parse_as_tpl_body<I: IntoIterator<Item = Air>>(
toks: I,
) -> Parser<Sut, impl Iterator<Item = Air> + Debug>
where
<I as IntoIterator>::IntoIter: Debug,
{
#[rustfmt::skip]
let head = [
TplStart(S1),
BindIdent(spair(STUB_TPL_NAME, S1)),
];
#[rustfmt::skip]
let tail = [
TplEnd(S1),
];
#[rustfmt::skip]
parse_as_pkg_body(
head.into_iter()
.chain(toks.into_iter())
.chain(tail.into_iter())
)
}
fn air_ctx_from_tpl_body_toks<I: IntoIterator<Item = Air>>(
toks: I,
) -> (<Sut as ParseState>::Context, ObjectIndex<Tpl>)
where
I::IntoIter: Debug,
{
let mut sut = parse_as_tpl_body(toks);
assert!(sut.all(|x| x.is_ok()));
let ctx = sut.finalize().unwrap().into_private_context();
let oi_tpl = pkg_lookup(&ctx, spair(STUB_TPL_NAME, S1))
.expect(
"could not locate stub template (did you call \
air_ctx_from_tpl_body_toks without parse_as_tpl_body?)",
)
.definition(ctx.asg_ref())
.expect("missing stub template definition (test setup bug?)");
(ctx, oi_tpl)
}

View File

@ -692,7 +692,7 @@ where
use std::iter;
Sut::parse(
iter::once(PkgStart(S1, spair("/pkg", S1)))
iter::once(PkgStart(S1, spair("/incidental-pkg", S1)))
.chain(toks.into_iter())
.chain(iter::once(PkgEnd(S1))),
)

View File

@ -30,13 +30,15 @@
//! but that term generally has a different meaning in programming:
//! <https://en.wikipedia.org/wiki/Metasyntactic_variable>.
use arrayvec::ArrayVec;
use super::{prelude::*, Doc, Ident};
use crate::{
diagnose::Annotate,
diagnostic_todo,
f::Functor,
fmt::{DisplayWrapper, TtQuote},
parse::util::SPair,
parse::{util::SPair, Token},
span::Span,
};
use std::fmt::Display;
@ -55,9 +57,26 @@ use std::fmt::Display;
/// the symbol representing that identifier then acts as a metavariable.
#[derive(Debug, PartialEq, Eq)]
pub enum Meta {
/// Metavariable represents a parameter without a value.
///
/// A value must be provided at or before expansion,
/// generally via template application arguments.
Required(Span),
ConcatList(Span),
/// Metavariable has a concrete lexical value.
///
/// This metavariable represents a literal and requires no further
/// reduction or processing.
Lexeme(Span, SPair),
/// Metavariable whose value is to be the concatenation of all
/// referenced metavariables.
///
/// This object has no value on its own;
/// it must contain edges to other metavariables,
/// and the order of those edges on the ASG represents concatenation
/// order.
ConcatList(Span),
}
impl Meta {
@ -137,17 +156,79 @@ object_rel! {
/// Metavariables contain lexical data and references to other
/// metavariables.
Meta -> {
tree Meta, // TODO: do we need tree?
// References to other metavariables
// (e.g. `<param-value>` in XML-based sources).
cross Ident,
// Owned lexical values.
//
// These differ from the above references because they represent
// inline lexemes that have no identifier.
tree Meta,
// e.g. template paramater description.
tree Doc,
}
}
impl ObjectIndex<Meta> {
pub fn assign_lexeme(self, asg: &mut Asg, lexeme: SPair) -> Self {
self.map_obj(asg, |meta| meta.assign_lexeme(lexeme))
/// Append a lexeme to this metavariable.
///
/// If `self` is [`Meta::Required`],
/// this provides a value and reuses the object already allocated.
///
/// If `self` is a single [`Meta::Lexeme`],
/// it is re-allocated to a separate [`Meta`] object along with the
/// provided `lexeme`,
/// and edges are added to both,
/// indicating concatenation.
///
/// Metavariables with multiple values already represents concatenation
/// and a new edge will be added without changing `self`.
pub fn append_lexeme(self, asg: &mut Asg, lexeme: SPair) -> Self {
use Meta::*;
let mut rels = ArrayVec::<SPair, 2>::new();
// We don't have access to `asg` within this closure because of
// `map_obj`;
// the above variable will be mutated by it to return extra
// information to do those operations afterward.
// If we do this often,
// then let's create a `map_obj` that is able to return
// supplemental information or create additional relationships
// (so, a map over a subgraph rather than an object).
self.map_obj(asg, |meta| match meta {
// Storage is already allocated for this lexeme.
Required(span) => Lexeme(span, lexeme),
// We could technically allocate a new symbol and combine the
// lexeme now,
// but let's wait so that we can avoid allocating
// intermediate symbols.
Lexeme(span, first_lexeme) => {
// We're converting from a single lexeme stored on `self` to
// a `Meta` with edges to both individual lexemes.
rels.push(first_lexeme);
rels.push(lexeme);
ConcatList(span)
}
// We're already representing concatenation so we need only add
// an edge to the new lexeme.
ConcatList(span) => {
rels.push(lexeme);
ConcatList(span)
}
});
for rel_lexeme in rels {
let oi = asg.create(Meta::Lexeme(rel_lexeme.span(), rel_lexeme));
self.add_edge_to(asg, oi, None);
}
self
}
pub fn close(self, asg: &mut Asg, close_span: Span) -> Self {
@ -157,4 +238,49 @@ impl ObjectIndex<Meta> {
})
})
}
// Append a reference to a metavariable identified by `oi_ref`.
//
// The value of the metavariable will not be known until expansion time,
// at which point its lexical value will be concatenated with those of
// any other references,
// in the order that they were added.
//
// It is expected that the value of `oi_ref` was produced via a lookup
// from the reference location and therefore contains the reference
// [`Span`];
// this is used to provide accurate diagnostic information.
pub fn concat_ref(self, asg: &mut Asg, oi_ref: ObjectIndex<Ident>) -> Self {
use Meta::*;
// We cannot mutate the ASG within `map_obj` below because of the
// held reference to `asg`,
// so this will be used to store data for later mutation.
let mut pre = None;
// References are only valid for a [`Self::ConcatList`].
self.map_obj(asg, |meta| match meta {
Required(span) | ConcatList(span) => ConcatList(span),
Lexeme(span, lex) => {
// We will move the lexeme into a _new_ object,
// and store a reference to it.
pre.replace(Meta::Lexeme(lex.span(), lex));
ConcatList(span)
}
});
// This represents a lexeme that was extracted into a new `Meta`;
// we must add the edge before appending the ref since
// concatenation will occur during expansion in edge order.
if let Some(orig) = pre {
asg.create(orig).add_edge_from(asg, self, None);
}
// Having been guaranteed a `ConcatList` above,
// we now only need to append an edge that references what to
// concatenate.
self.add_edge_to(asg, oi_ref, Some(oi_ref.span()))
}
}

View File

@ -25,7 +25,10 @@ use crate::{
ExprOp,
},
f::Functor,
parse::{util::SPair, ParseState},
parse::{
util::{spair, SPair},
ParseState,
},
span::{dummy::*, Span},
};
use std::fmt::Debug;
@ -328,3 +331,62 @@ fn traverses_ontological_tree_tpl_within_template() {
tree_reconstruction_report(toks),
);
}
// Metavariables are used to represent template parameters,
// and are used to perform various lexical manipulations.
// The most fundamental of them is concatenation,
// and in the special case of concatenating a single value,
// assignment.
//
// This asserts that concatenation results in the expected graph and that
// the traversal respects concatenation order.
#[test]
fn traverses_ontological_tree_complex_tpl_meta() {
#[rustfmt::skip]
let toks = vec![
PkgStart(S1, spair("/pkg", S1)),
TplStart(S2),
BindIdent(spair("_tpl_", S3)),
// -- Above this line was setup -- //
MetaStart(S4),
BindIdent(spair("@param@", S5)),
// It will be important to observe that ordering
// is respected during traversal,
// otherwise concatenation order will be wrong.
MetaLexeme(spair("foo", S6)),
RefIdent(spair("@other@", S7)), // --.
MetaLexeme(spair("bar", S8)), // |
MetaEnd(S9), // |
// |
MetaStart(S10), // |
BindIdent(spair("@other@", S11)), // <-'
MetaEnd(S12),
TplEnd(S13),
PkgEnd(S14),
];
// We need more concise expressions for the below table of values.
let d = DynObjectRel::new;
let m = |a: Span, b: Span| a.merge(b).unwrap();
#[rustfmt::skip]
assert_eq!(
// A -|-> B | A span -|-> B span | espan | depth
vec![//-----|-------|-----------|------------|--------|-----------------
(d(Root, Pkg, SU, m(S1, S14), None ), Depth(1)),
(d(Pkg, Ident, m(S1, S14), S3, None ), Depth(2)),
(d(Ident, Tpl, S3, m(S2, S13), None ), Depth(3)),
(d(Tpl, Ident, m(S2, S13), S5, None ), Depth(4)),
(d(Ident, Meta, S5, m(S4, S9), None ), Depth(5)),
(d(Meta, Meta, m(S4, S9), S6, None ), Depth(6)),
/*cross*/ (d(Meta, Ident, m(S4, S9), S11, Some(S7)), Depth(6)),
(d(Meta, Meta, m(S4, S9), S8, None, ), Depth(6)),
(d(Tpl, Ident, m(S2, S13), S11, None ), Depth(4)),
(d(Ident, Meta, S11, m(S10, S12), None ), Depth(5)),
],
tree_reconstruction_report(toks),
);
}