From b4b85a5e85b0b6de885bc4731f0f78f4af40efaf Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Wed, 12 Jul 2023 15:49:58 -0400 Subject: [PATCH] tamer: asg::air: Support Meta::ConcatList with lexemes and refs This handles the common cases for meta, which includes what interpolation desugars into. Most of this work was in testing and reasoning about the issue; `asg::graph::visit:ontree::test` has a good summary of the structure of the graph that results. The last remaining steps to make this work end-to-end is for NIR->AIR to lower `Nir::Ref` into `Air::BindIdent`, and then for `asg::graph::xmli` to reconstruct concatenation lists. I'll then be able to commit the xmli test case I've been sitting on, whose errors have been guiding my development. DEV-13163 --- tamer/src/asg/air.rs | 23 ++ tamer/src/asg/air/meta.rs | 19 +- tamer/src/asg/air/meta/test.rs | 369 +++++++++++++++++++++++ tamer/src/asg/air/test.rs | 2 +- tamer/src/asg/graph/object/meta.rs | 136 ++++++++- tamer/src/asg/graph/visit/ontree/test.rs | 64 +++- 6 files changed, 600 insertions(+), 13 deletions(-) create mode 100644 tamer/src/asg/air/meta/test.rs diff --git a/tamer/src/asg/air.rs b/tamer/src/asg/air.rs index 70b96529..ccf0f37c 100644 --- a/tamer/src/asg/air.rs +++ b/tamer/src/asg/air.rs @@ -46,6 +46,9 @@ use std::{ fmt::{Debug, Display}, }; +#[cfg(test)] +use super::graph::object::ObjectRelTo; + #[macro_use] mod ir; use fxhash::FxHashMap; @@ -967,6 +970,26 @@ impl AirAggregateCtx { .map(EnvScopeKind::into_inner) } + /// Resolve an identifier at the scope of the provided environment and + /// retrieve its definition. + /// + /// If the identifier is not in scope or does not have a definition, + /// [`None`] will be returned; + /// the caller cannot distinguish between the two using this method; + /// see [`Self::env_scope_lookup`] if that distinction is important. + #[cfg(test)] + fn env_scope_lookup_ident_dfn( + &self, + env: impl ObjectIndexRelTo, + name: SPair, + ) -> Option> + where + Ident: ObjectRelTo, + { + self.env_scope_lookup::(env, name) + .and_then(|oi| oi.definition(self.asg_ref())) + } + /// Attempt to retrieve an identifier from the graph by name relative to /// the immediate environment `imm_env`. /// diff --git a/tamer/src/asg/air/meta.rs b/tamer/src/asg/air/meta.rs index c808895f..f32fa321 100644 --- a/tamer/src/asg/air/meta.rs +++ b/tamer/src/asg/air/meta.rs @@ -76,7 +76,7 @@ impl ParseState for AirMetaAggregate { } (TplMeta(oi_meta), AirMeta(MetaLexeme(lexeme))) => Transition( - TplMeta(oi_meta.assign_lexeme(ctx.asg_mut(), lexeme)), + TplMeta(oi_meta.append_lexeme(ctx.asg_mut(), lexeme)), ) .incomplete(), @@ -106,6 +106,8 @@ impl ParseState for AirMetaAggregate { Transition(TplMeta(oi_meta)).incomplete() } + // TODO: The user _probably_ meant to use `` in XML NIR, + // so maybe we should have an error to that effect. (TplMeta(..), tok @ AirDoc(DocText(..))) => { diagnostic_todo!( vec![tok.note("this token")], @@ -114,11 +116,13 @@ impl ParseState for AirMetaAggregate { ) } - (TplMeta(..), tok @ AirBind(RefIdent(..))) => { - diagnostic_todo!( - vec![tok.note("this token")], - "AirBind in metavar context (param-value)" - ) + // Reference to another metavariable, + // e.g. using `` in XML NIR. + (TplMeta(oi_meta), AirBind(RefIdent(name))) => { + let oi_ref = ctx.lookup_lexical_or_missing(name); + + Transition(TplMeta(oi_meta.concat_ref(ctx.asg_mut(), oi_ref))) + .incomplete() } (TplMeta(..), tok @ AirMeta(MetaStart(..))) => { @@ -159,3 +163,6 @@ impl AirMetaAggregate { Self::Ready } } + +#[cfg(test)] +mod test; diff --git a/tamer/src/asg/air/meta/test.rs b/tamer/src/asg/air/meta/test.rs new file mode 100644 index 00000000..5b857b25 --- /dev/null +++ b/tamer/src/asg/air/meta/test.rs @@ -0,0 +1,369 @@ +// Tests for ASG IR metavariable parsing +// +// Copyright (C) 2014-2023 Ryan Specialty, LLC. +// +// This file is part of TAME. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +/// Metavariable parsing tests. +/// +/// Note that some metavariable-related tests are in +/// [`super::super::tpl::test`], +/// where the focus is on how they are integrated as template +/// parameters. +/// The tests here focus instead on the particulars of metavariables +/// themselves, +/// with templates being only incidental. +use super::*; +use crate::{ + asg::{ + air::{ + test::{parse_as_pkg_body, pkg_lookup}, + Air::{self, *}, + }, + graph::object::{meta::MetaRel, Meta, Tpl}, + }, + parse::{util::spair, Parser}, + span::{dummy::*, Span}, + sym::SymbolId, +}; + +type Sut = AirAggregate; + +// Metavariables can reference the lexical value of other metavariables. +// This does not actually check that the target reference is a metavariable, +// at least not at the time of writing. +// TODO: But the system ought to, +// since if we defer that to expansion-time, +// then we don't have confidence that the template definition is actually +// valid as a part of this compilation unit. +#[test] +fn metavar_ref_only() { + let name_meta = "@foo@"; + let name_other = "@other@"; + + #[rustfmt::skip] + assert_concat_list( + [ + MetaStart(S1), + BindIdent(spair(name_meta, S2)), + + // Reference to another metavariable, + // effectively creating an alias. + RefIdent(spair(name_other, S3)), // --. + MetaEnd(S4), // | + // | + MetaStart(S5), // | + BindIdent(spair(name_other, S6)), // <-' + MetaEnd(S7), + ], + + name_meta, + S1.merge(S4), + + [ + &Meta::Required(S5.merge(S7).unwrap()), + ], + ); +} + +// Similar to above, +// but multiple references. +#[test] +fn metavar_ref_multiple() { + let name_meta = "@foo@"; + let name_other_a = "@other-a@"; + let name_other_b = "@other-b@"; + + #[rustfmt::skip] + assert_concat_list( + // Def/ref is commutative, + // so this defines with both orderings to demonstrate that. + [ + MetaStart(S1), + BindIdent(spair(name_other_a, S2)), // <-. + MetaEnd(S3), // | + // | + MetaStart(S3), // | + BindIdent(spair(name_meta, S4)), // | + // | + RefIdent(spair(name_other_a, S5)), // --' + RefIdent(spair(name_other_b, S6)), // --. + MetaEnd(S7), // | + // | + MetaStart(S8), // | + BindIdent(spair(name_other_b, S9)), // <-' + MetaEnd(S10), + ], + + name_meta, + S3.merge(S7), + + [ + &Meta::Required(S1.merge(S3).unwrap()), + &Meta::Required(S8.merge(S10).unwrap()), + ], + ); +} + +// If a metavariable already has a concrete lexical value, +// then appending a reference to it should convert it into a list. +#[test] +fn metavar_ref_after_lexeme() { + let name_meta = "@foo@"; + let value = "foo value"; + let name_other = "@other@"; + + #[rustfmt::skip] + assert_concat_list( + [ + MetaStart(S1), + BindIdent(spair(name_meta, S2)), + + // At this point, + // we have only a concrete lexeme, + // and so we are not a list. + MetaLexeme(spair(value, S3)), + + // Upon encountering this token, + // we should convert into a list. + RefIdent(spair(name_other, S4)), // --. + MetaEnd(S5), // | + // | + MetaStart(S6), // | + BindIdent(spair(name_other, S7)), // <-' + MetaEnd(S8), + ], + + name_meta, + S1.merge(S5), + + // Having been converted into a list, + // we should have a reference to _two_ metavariables: + // one holding the original lexeme and the reference. + [ + &Meta::Lexeme(S3, spair(value, S3)), + &Meta::Required(S6.merge(S8).unwrap()), + ], + ); +} + +// If we have a reference, +// then that means we already have a `ConcatList`, +// and therefore should just have to append to it. +// This is the same as the above test, +// with operations reversed. +// It is not commutative, +// though, +// since concatenation is ordered. +#[test] +fn lexeme_after_metavar_ref() { + let name_meta = "@foo@"; + let value = "foo value"; + let name_other = "@other@"; + + #[rustfmt::skip] + assert_concat_list( + [ + MetaStart(S1), + BindIdent(spair(name_meta, S2)), + + // We produce a list here... + RefIdent(spair(name_other, S3)), // --. + // | + // ...and should just append a // | + // `Lexeme` here. // | + MetaLexeme(spair(value, S4)), // | + MetaEnd(S5), // | + // | + MetaStart(S6), // | + BindIdent(spair(name_other, S7)), // <-' + MetaEnd(S8), + ], + + name_meta, + S1.merge(S5), + + // Same as the previous test, + // but concatenation order is reversed. + [ + &Meta::Required(S6.merge(S8).unwrap()), + + // Note the first span here is not that of the parent + // metavariable, + // since we do not want diagnostics to suggest that this + // object represents the entirety of the parent. + &Meta::Lexeme(S4, spair(value, S4)), + ], + ); +} + +// Multiple lexemes should _also_ produce a list. +// While this could have been replaced by a single lexeme, +// there are still uses: +// maybe this was the result of template expansion, +// or simply a multi-line formatting choice in the provided sources. +#[test] +fn lexeme_after_lexeme() { + let name_meta = "@foo@"; + let value_a = "foo value a"; + let value_b = "foo value b"; + + #[rustfmt::skip] + assert_concat_list( + [ + MetaStart(S1), + BindIdent(spair(name_meta, S2)), + + MetaLexeme(spair(value_a, S3)), + MetaLexeme(spair(value_b, S4)), + MetaEnd(S5), + ], + + name_meta, + S1.merge(S5), + + [ + // Since these are Meta objects derived from the original, + // our spans + // (the first value in the tuple) + // are not that of the containing metavariable; + // we don't want diagnostics implying that this represents + // the entirety of that the parent metavariable. + &Meta::Lexeme(S3, spair(value_a, S3)), + &Meta::Lexeme(S4, spair(value_b, S4)), + ], + ); +} + +/////// /////// +/////// Tests above; plumbing begins below /////// +/////// /////// + +fn assert_concat_list<'a, IT, IE: 'a>( + toks: IT, + meta_name: impl Into, + expect_span: Option, + expect: IE, +) where + IT: IntoIterator, + IT::IntoIter: Debug, + IE: IntoIterator, + IE::IntoIter: Debug + DoubleEndedIterator, +{ + let (ctx, oi_tpl) = air_ctx_from_tpl_body_toks(toks); + let asg = ctx.asg_ref(); + + let oi_meta = ctx + .env_scope_lookup_ident_dfn::( + oi_tpl, + spair(meta_name.into(), DUMMY_SPAN), + ) + .unwrap(); + + // Meta references are only supported through lexical concatenation. + assert_eq!( + &Meta::ConcatList(expect_span.expect("expected metavar span is None")), + oi_meta.resolve(asg), + "expected metavariable to be a ConcatList with the provided span", + ); + + // We `collect()` rather than using `Iterator::eq` so that the failure + // message includes the data. + // If we do this too often, + // we can consider a crate like `itertools` or write our own + // comparison, + // but it's not worth doing for these tests where the cost of + // collection is so low and insignificant. + assert_eq!( + // TODO: We need to modify edge methods to return in the proper + // order (not reversed) without a performance hit, + // which will involve investigating Petgraph further. + expect.into_iter().rev().collect::>(), + // Each reference is to an Ident whose definition is the other + // metavariable. + oi_meta + .edges(asg) + .filter_map(|rel| match rel { + MetaRel::Meta(oi) => Some(oi), + MetaRel::Ident(oi) => oi.definition::(asg), + MetaRel::Doc(_) => None, + }) + .map(ObjectIndex::cresolve(asg)) + .collect::>(), + "note: expected tokens are in reverse order in this error message", + ); +} + +const STUB_TPL_NAME: &str = "__incidental-tpl__"; + +/// Parse the provided tokens within the context of a template body. +/// +/// This allows tests to focus on testing of metavariables instead of +/// mudding tests with setup. +/// +/// This creates a package and template that are purely incidental and serve +/// only as scaffolding to put the parser into the necessary state and +/// context. +/// This is an alternative to providing methods to force the parse into a +/// certain context, +/// since we're acting as users of the SUT's public API and are +/// therefore testing real-world situations. +fn parse_as_tpl_body>( + toks: I, +) -> Parser + Debug> +where + ::IntoIter: Debug, +{ + #[rustfmt::skip] + let head = [ + TplStart(S1), + BindIdent(spair(STUB_TPL_NAME, S1)), + ]; + + #[rustfmt::skip] + let tail = [ + TplEnd(S1), + ]; + + #[rustfmt::skip] + parse_as_pkg_body( + head.into_iter() + .chain(toks.into_iter()) + .chain(tail.into_iter()) + ) +} + +fn air_ctx_from_tpl_body_toks>( + toks: I, +) -> (::Context, ObjectIndex) +where + I::IntoIter: Debug, +{ + let mut sut = parse_as_tpl_body(toks); + assert!(sut.all(|x| x.is_ok())); + + let ctx = sut.finalize().unwrap().into_private_context(); + + let oi_tpl = pkg_lookup(&ctx, spair(STUB_TPL_NAME, S1)) + .expect( + "could not locate stub template (did you call \ + air_ctx_from_tpl_body_toks without parse_as_tpl_body?)", + ) + .definition(ctx.asg_ref()) + .expect("missing stub template definition (test setup bug?)"); + + (ctx, oi_tpl) +} diff --git a/tamer/src/asg/air/test.rs b/tamer/src/asg/air/test.rs index 58ec94b8..48ccdf98 100644 --- a/tamer/src/asg/air/test.rs +++ b/tamer/src/asg/air/test.rs @@ -692,7 +692,7 @@ where use std::iter; Sut::parse( - iter::once(PkgStart(S1, spair("/pkg", S1))) + iter::once(PkgStart(S1, spair("/incidental-pkg", S1))) .chain(toks.into_iter()) .chain(iter::once(PkgEnd(S1))), ) diff --git a/tamer/src/asg/graph/object/meta.rs b/tamer/src/asg/graph/object/meta.rs index 4934b72c..39bf4159 100644 --- a/tamer/src/asg/graph/object/meta.rs +++ b/tamer/src/asg/graph/object/meta.rs @@ -30,13 +30,15 @@ //! but that term generally has a different meaning in programming: //! . +use arrayvec::ArrayVec; + use super::{prelude::*, Doc, Ident}; use crate::{ diagnose::Annotate, diagnostic_todo, f::Functor, fmt::{DisplayWrapper, TtQuote}, - parse::util::SPair, + parse::{util::SPair, Token}, span::Span, }; use std::fmt::Display; @@ -55,9 +57,26 @@ use std::fmt::Display; /// the symbol representing that identifier then acts as a metavariable. #[derive(Debug, PartialEq, Eq)] pub enum Meta { + /// Metavariable represents a parameter without a value. + /// + /// A value must be provided at or before expansion, + /// generally via template application arguments. Required(Span), - ConcatList(Span), + + /// Metavariable has a concrete lexical value. + /// + /// This metavariable represents a literal and requires no further + /// reduction or processing. Lexeme(Span, SPair), + + /// Metavariable whose value is to be the concatenation of all + /// referenced metavariables. + /// + /// This object has no value on its own; + /// it must contain edges to other metavariables, + /// and the order of those edges on the ASG represents concatenation + /// order. + ConcatList(Span), } impl Meta { @@ -137,17 +156,79 @@ object_rel! { /// Metavariables contain lexical data and references to other /// metavariables. Meta -> { - tree Meta, // TODO: do we need tree? + // References to other metavariables + // (e.g. `` in XML-based sources). cross Ident, + // Owned lexical values. + // + // These differ from the above references because they represent + // inline lexemes that have no identifier. + tree Meta, + // e.g. template paramater description. tree Doc, } } impl ObjectIndex { - pub fn assign_lexeme(self, asg: &mut Asg, lexeme: SPair) -> Self { - self.map_obj(asg, |meta| meta.assign_lexeme(lexeme)) + /// Append a lexeme to this metavariable. + /// + /// If `self` is [`Meta::Required`], + /// this provides a value and reuses the object already allocated. + /// + /// If `self` is a single [`Meta::Lexeme`], + /// it is re-allocated to a separate [`Meta`] object along with the + /// provided `lexeme`, + /// and edges are added to both, + /// indicating concatenation. + /// + /// Metavariables with multiple values already represents concatenation + /// and a new edge will be added without changing `self`. + pub fn append_lexeme(self, asg: &mut Asg, lexeme: SPair) -> Self { + use Meta::*; + + let mut rels = ArrayVec::::new(); + + // We don't have access to `asg` within this closure because of + // `map_obj`; + // the above variable will be mutated by it to return extra + // information to do those operations afterward. + // If we do this often, + // then let's create a `map_obj` that is able to return + // supplemental information or create additional relationships + // (so, a map over a subgraph rather than an object). + self.map_obj(asg, |meta| match meta { + // Storage is already allocated for this lexeme. + Required(span) => Lexeme(span, lexeme), + + // We could technically allocate a new symbol and combine the + // lexeme now, + // but let's wait so that we can avoid allocating + // intermediate symbols. + Lexeme(span, first_lexeme) => { + // We're converting from a single lexeme stored on `self` to + // a `Meta` with edges to both individual lexemes. + rels.push(first_lexeme); + rels.push(lexeme); + + ConcatList(span) + } + + // We're already representing concatenation so we need only add + // an edge to the new lexeme. + ConcatList(span) => { + rels.push(lexeme); + ConcatList(span) + } + }); + + for rel_lexeme in rels { + let oi = asg.create(Meta::Lexeme(rel_lexeme.span(), rel_lexeme)); + self.add_edge_to(asg, oi, None); + } + + self } pub fn close(self, asg: &mut Asg, close_span: Span) -> Self { @@ -157,4 +238,49 @@ impl ObjectIndex { }) }) } + + // Append a reference to a metavariable identified by `oi_ref`. + // + // The value of the metavariable will not be known until expansion time, + // at which point its lexical value will be concatenated with those of + // any other references, + // in the order that they were added. + // + // It is expected that the value of `oi_ref` was produced via a lookup + // from the reference location and therefore contains the reference + // [`Span`]; + // this is used to provide accurate diagnostic information. + pub fn concat_ref(self, asg: &mut Asg, oi_ref: ObjectIndex) -> Self { + use Meta::*; + + // We cannot mutate the ASG within `map_obj` below because of the + // held reference to `asg`, + // so this will be used to store data for later mutation. + let mut pre = None; + + // References are only valid for a [`Self::ConcatList`]. + self.map_obj(asg, |meta| match meta { + Required(span) | ConcatList(span) => ConcatList(span), + + Lexeme(span, lex) => { + // We will move the lexeme into a _new_ object, + // and store a reference to it. + pre.replace(Meta::Lexeme(lex.span(), lex)); + + ConcatList(span) + } + }); + + // This represents a lexeme that was extracted into a new `Meta`; + // we must add the edge before appending the ref since + // concatenation will occur during expansion in edge order. + if let Some(orig) = pre { + asg.create(orig).add_edge_from(asg, self, None); + } + + // Having been guaranteed a `ConcatList` above, + // we now only need to append an edge that references what to + // concatenate. + self.add_edge_to(asg, oi_ref, Some(oi_ref.span())) + } } diff --git a/tamer/src/asg/graph/visit/ontree/test.rs b/tamer/src/asg/graph/visit/ontree/test.rs index f905fee0..d6c585fd 100644 --- a/tamer/src/asg/graph/visit/ontree/test.rs +++ b/tamer/src/asg/graph/visit/ontree/test.rs @@ -25,7 +25,10 @@ use crate::{ ExprOp, }, f::Functor, - parse::{util::SPair, ParseState}, + parse::{ + util::{spair, SPair}, + ParseState, + }, span::{dummy::*, Span}, }; use std::fmt::Debug; @@ -328,3 +331,62 @@ fn traverses_ontological_tree_tpl_within_template() { tree_reconstruction_report(toks), ); } + +// Metavariables are used to represent template parameters, +// and are used to perform various lexical manipulations. +// The most fundamental of them is concatenation, +// and in the special case of concatenating a single value, +// assignment. +// +// This asserts that concatenation results in the expected graph and that +// the traversal respects concatenation order. +#[test] +fn traverses_ontological_tree_complex_tpl_meta() { + #[rustfmt::skip] + let toks = vec![ + PkgStart(S1, spair("/pkg", S1)), + TplStart(S2), + BindIdent(spair("_tpl_", S3)), + + // -- Above this line was setup -- // + + MetaStart(S4), + BindIdent(spair("@param@", S5)), + + // It will be important to observe that ordering + // is respected during traversal, + // otherwise concatenation order will be wrong. + MetaLexeme(spair("foo", S6)), + RefIdent(spair("@other@", S7)), // --. + MetaLexeme(spair("bar", S8)), // | + MetaEnd(S9), // | + // | + MetaStart(S10), // | + BindIdent(spair("@other@", S11)), // <-' + MetaEnd(S12), + TplEnd(S13), + PkgEnd(S14), + ]; + + // We need more concise expressions for the below table of values. + let d = DynObjectRel::new; + let m = |a: Span, b: Span| a.merge(b).unwrap(); + + #[rustfmt::skip] + assert_eq!( + // A -|-> B | A span -|-> B span | espan | depth + vec![//-----|-------|-----------|------------|--------|----------------- + (d(Root, Pkg, SU, m(S1, S14), None ), Depth(1)), + (d(Pkg, Ident, m(S1, S14), S3, None ), Depth(2)), + (d(Ident, Tpl, S3, m(S2, S13), None ), Depth(3)), + (d(Tpl, Ident, m(S2, S13), S5, None ), Depth(4)), + (d(Ident, Meta, S5, m(S4, S9), None ), Depth(5)), + (d(Meta, Meta, m(S4, S9), S6, None ), Depth(6)), + /*cross*/ (d(Meta, Ident, m(S4, S9), S11, Some(S7)), Depth(6)), + (d(Meta, Meta, m(S4, S9), S8, None, ), Depth(6)), + (d(Tpl, Ident, m(S2, S13), S11, None ), Depth(4)), + (d(Ident, Meta, S11, m(S10, S12), None ), Depth(5)), + ], + tree_reconstruction_report(toks), + ); +}