tamer: asg::graph::object::tpl::TplShape: Introduce template "shapes"

This change is the first to utilize matching on edges to determine the state of the template (to begin to derive its shape). But this is notable for my finally caving on `min_specialization`. The commit contains a bunch of rationale for why I introduced it. I've been sitting on trying it for _years_. I had hoped for further progress in determining a stabalization path, but that doesn't seem to be happening. The reason I caved is because _not_ using it is a significant barrier to utilizing robust types in various scenarios. I've been having to work around that with significant efforts to write boilerplate code to match on types and branch to various static paths accordingly. It makes it really expensive to make certain types of changes, and it make the code really difficult to understand once you start to peel back abstractions that try to hide it. I'll see how this goes and, if it goes well, begin to replace old methods with specialization. See the next commit for some cleanup. I purposefully left this a bit of a mess (at the bottom of `asg::graph::object::tpl`) to emphasize what I'm doing and why I introduced it. DEV-13163
2023-07-25 12:27:17 -04:00 · 2023-07-25 12:27:17 -04:00 · 37c962a7ee
parent 4168c579fd
commit 37c962a7ee
3 changed files with 297 additions and 22 deletions
--- a/tamer/src/asg/air/tpl/test.rs
+++ b/tamer/src/asg/air/tpl/test.rs
@ -30,7 +30,7 @@ use crate::{
            },
            Air::*,
        },
-        graph::object::{Doc, Meta, ObjectRel},
+        graph::object::{tpl::TplShape, Doc, Meta, ObjectRel},
        Expr, ExprOp, Ident,
    },
    parse::util::spair,
@ -57,6 +57,7 @@ fn tpl_defining_pkg() {

    let tpl = pkg_expect_ident_obj::<Tpl>(&ctx, id_tpl);
    assert_eq!(S2.merge(S4).unwrap(), tpl.span());
+    assert_eq!(TplShape::Empty, tpl.shape());

    let oi_id_tpl = pkg_lookup(&ctx, id_tpl).unwrap();
    assert_eq!(
@ -90,6 +91,7 @@ fn tpl_after_expr() {

    let tpl = pkg_expect_ident_obj::<Tpl>(&ctx, id_tpl);
    assert_eq!(S5.merge(S7).unwrap(), tpl.span());
+    assert_eq!(TplShape::Empty, tpl.shape());
 }

 // Templates within expressions are permitted by NIR at the time of writing
@ -138,6 +140,7 @@ fn tpl_within_expr() {
    // The inner template.
    let tpl = pkg_expect_ident_obj::<Tpl>(&ctx, id_tpl);
    assert_eq!(S6.merge(S8).unwrap(), tpl.span());
+    assert_eq!(TplShape::Empty, tpl.shape());

    // The expression that was produced on the graph ought to be equivalent
    //   to the expression without the template being present at all
@ -188,9 +191,9 @@ fn tpl_apply_within_expr() {
    let ctx = air_ctx_from_pkg_body_toks(toks);
    let asg = ctx.asg_ref();

-    // The inner template.
    let tpl = pkg_expect_ident_obj::<Tpl>(&ctx, id_tpl);
    assert_eq!(S4.merge(S6).unwrap(), tpl.span());
+    assert_eq!(TplShape::Empty, tpl.shape());

    // The expression that was produced on the graph ought to be equivalent
    //   to the expression without the template being present at all,
@ -268,6 +271,13 @@ fn tpl_with_reachable_expression() {
    let tpl = oi_tpl.resolve(&asg);
    assert_eq!(S1.merge(S9).unwrap(), tpl.span());

+    // Because the above expressions were bound to identifiers,
+    //   they will not be inlined into the application site
+    //     (they'll be hoisted to the nearest container,
+    //        which might be the same as the application site,
+    //        but it's still not inlining an expression).
+    assert_eq!(TplShape::Empty, tpl.shape());
+
    // The inner expressions are reachable,
    //   but the intent is to expand them into the template's eventual
    //   application site.
@ -322,11 +332,16 @@ fn tpl_holds_dangling_expressions() {
        TplStart(S1),
          BindIdent(id_tpl),

-          // Dangling
+          // Dangling expression.
+          // This would be inlined at an application site,
+          //   and so this changes the shape of the template.
          ExprStart(ExprOp::Sum, S3),
          ExprEnd(S4),

          // Dangling
+          //   (TODO: This won't be valid;
+          //      extract into separate test case to check for a new
+          //      AsgError variant.)
          ExprStart(ExprOp::Sum, S5),
          ExprEnd(S6),
        TplEnd(S7),
@ -336,6 +351,11 @@ fn tpl_holds_dangling_expressions() {
    let asg = ctx.asg_ref();

    let oi_tpl = pkg_expect_ident_oi::<Tpl>(&ctx, id_tpl);
+    let tpl = oi_tpl.resolve(&asg);
+
+    // TODO: Until the above is invalid,
+    //   the second is overwriting the first.
+    assert_eq!(TplShape::Expr(S5.merge(S6).unwrap()), tpl.shape());

    assert_eq!(
        vec![S5.merge(S6).unwrap(), S3.merge(S4).unwrap(),],
@ -491,6 +511,11 @@ fn tpl_with_param() {
    let asg = ctx.asg_ref();

    let oi_tpl = pkg_expect_ident_oi::<Tpl>(&ctx, id_tpl);
+    let tpl = oi_tpl.resolve(&asg);
+
+    // The template contains no body
+    //   (only metavariables / params).
+    assert_eq!(TplShape::Empty, tpl.shape());

    // The template should have an edge to each identifier for each
    //   metavariable.
@ -558,13 +583,23 @@ fn tpl_nested() {

    // The identifier for the inner template should be local to the outer
    //   template.
-    let oi_tpl_inner = oi_tpl_outer.lookup_local_linear(&asg, id_tpl_inner);
-    assert_eq!(
-        S3.merge(S5),
-        oi_tpl_inner
-            .and_then(|oi| oi.definition::<Tpl>(&asg))
-            .map(|oi| oi.resolve(&asg).span())
-    );
+    let oi_tpl_inner_ident =
+        oi_tpl_outer.lookup_local_linear(&asg, id_tpl_inner);
+    let tpl_inner = oi_tpl_inner_ident
+        .and_then(|oi| oi.definition::<Tpl>(&asg))
+        .map(ObjectIndex::cresolve(&asg));
+
+    assert_eq!(S3.merge(S5), tpl_inner.map(Tpl::span));
+
+    let tpl_outer = oi_tpl_outer.resolve(&asg);
+
+    // The inner template has no body and so is empty.
+    assert_eq!(TplShape::Empty, tpl_inner.unwrap().shape());
+
+    // The outer template defines an inner template but has nothing to
+    //   inline,
+    //     and so its shape is also empty.
+    assert_eq!(TplShape::Empty, tpl_outer.shape());
 }

 // A template application within another template can be interpreted as
@ -603,7 +638,12 @@ fn tpl_apply_nested() {
        .edges_filtered::<Tpl>(&asg)
        .map(|oi| oi.resolve(&asg).span());

-    assert_eq!(vec![S3.merge(S4).unwrap()], inners.collect::<Vec<_>>(),);
+    assert_eq!(vec![S3.merge(S4).unwrap()], inners.collect::<Vec<_>>());
+
+    // Since the inner template is empty,
+    //   so too should the outer.
+    let tpl_outer = oi_tpl_outer.resolve(&asg);
+    assert_eq!(TplShape::Empty, tpl_outer.shape());
 }

 // Template application should resolve all the same regardless of order of
@ -646,6 +686,12 @@ fn tpl_apply_nested_missing() {
    let oi_tpl_outer = pkg_expect_ident_oi::<Tpl>(&ctx, id_tpl_outer);
    assert_eq!(S1.merge(S12).unwrap(), oi_tpl_outer.resolve(&asg).span());

+    // We apply two template,
+    //   both of which are empty,
+    //   and so the outer shape is still empty.
+    let tpl_outer = oi_tpl_outer.resolve(&asg);
+    assert_eq!(TplShape::Empty, tpl_outer.shape());
+
    // The inner template should be contained within the outer and so not
    //   globally resolvable.
    assert!(pkg_lookup(&ctx, id_tpl_inner).is_none());
@ -696,8 +742,8 @@ fn tpl_doc_short_desc() {
    let ctx = air_ctx_from_pkg_body_toks(toks);
    let asg = ctx.asg_ref();

-    let oi_expr = pkg_expect_ident_oi::<Tpl>(&ctx, id_tpl);
-    let oi_docs = oi_expr
+    let oi_tpl = pkg_expect_ident_oi::<Tpl>(&ctx, id_tpl);
+    let oi_docs = oi_tpl
        .edges_filtered::<Doc>(&asg)
        .map(ObjectIndex::cresolve(&asg));

@ -705,6 +751,10 @@ fn tpl_doc_short_desc() {
        vec![&Doc::new_indep_clause(clause)],
        oi_docs.collect::<Vec<_>>(),
    );
+
+    // The documentation does not contribute to expansion and therefore does
+    //   not influence the shape of the template.
+    assert_eq!(TplShape::Empty, oi_tpl.resolve(&asg).shape());
 }

 // While NIR does not accept metavariables (params) within expressions that
@ -734,7 +784,8 @@ fn metavars_within_exprs_hoisted_to_parent_tpl() {
          BindIdent(id_tpl_outer),

          // This expression begins the body of the template.
-          // NIR would not allow params past this point.
+          // NIR would not allow params past this point,
+          //   but desugaring may produce this.
          ExprStart(ExprOp::Sum, S3),
            // Expresions are not containers and so this metavariable should
            //   be hoisted to the parent container context.
@ -788,6 +839,13 @@ fn metavars_within_exprs_hoisted_to_parent_tpl() {
        .span();

    assert_eq!(S11.merge(S13).unwrap(), span_inner);
+
+    // The template would expand into an expression,
+    //   since it otherwise dangling.
+    assert_eq!(
+        TplShape::Expr(S3.merge(S7).unwrap()),
+        oi_outer.resolve(&asg).shape(),
+    );
 }

 #[test]
@ -855,4 +913,12 @@ fn expr_abstract_bind_produces_cross_edge_from_ident_to_meta() {
        "Tpl must not have an edge directly to Expr \
           (is it considered dangling?)",
    );
+
+    // Because the expression _will be_ bound to an identifier during
+    //   instantiation,
+    //     it'll be hoisted upon expansion,
+    //     and so our shape is still empty.
+    // This is the same result as if we had a concrete identifier;
+    //   it all ends up expanding into the same thing in the end.
+    assert_eq!(TplShape::Empty, oi_tpl.resolve(&asg).shape());
 }
--- a/tamer/src/asg/graph/object/tpl.rs
+++ b/tamer/src/asg/graph/object/tpl.rs
@ -26,31 +26,138 @@ use crate::{f::Functor, parse::util::SPair, span::Span};

 /// Template with associated name.
 #[derive(Debug, PartialEq, Eq)]
-pub struct Tpl(Span);
+pub struct Tpl(Span, TplShape);

 impl Tpl {
+    pub fn new(span: Span) -> Self {
+        Self(span, TplShape::default())
+    }
+
    pub fn span(&self) -> Span {
        match self {
-            Self(span) => *span,
+            Self(span, _) => *span,
        }
    }

-    pub fn new(span: Span) -> Self {
-        Self(span)
+    pub fn shape(&self) -> TplShape {
+        match self {
+            Self(_, shape) => *shape,
+        }
    }
 }

 impl Functor<Span> for Tpl {
    fn map(self, f: impl FnOnce(Span) -> Span) -> Self::Target {
        match self {
-            Self(span) => Self(f(span)),
+            Self(span, shape) => Self(f(span), shape),
+        }
+    }
+}
+
+impl Functor<TplShape> for Tpl {
+    fn map(self, f: impl FnOnce(TplShape) -> TplShape) -> Self::Target {
+        match self {
+            Self(span, shape) => Self(span, f(shape)),
        }
    }
 }

 impl Display for Tpl {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "template")
+        let Self(_, shape) = self;
+        write!(f, "template with {shape}")
+    }
+}
+
+/// The "shape" of a template when expanded into an expression context.
+///
+/// The shape of a template can be thought of like a puzzle piece.
+/// Each application context permits a particular type of puzzle piece,
+///   and a compatible template must be expanded into it,
+///   or otherwise be made to be compatible.
+///
+/// Template shapes must be known statically by the time the definition has
+///   completed.
+/// A definition is not complete until all missing identifier references
+///   have been defined.
+/// A corollary of this is that templates applied _within_ templates will
+///   be able to determine their shape because the shape of the applied
+///   template will be known,
+///     allowing them to compose without compromising this property.
+///
+/// Objects that would typically be hoisted out of an expression context do
+///   not contribute to the shape of a template.
+/// That is---
+///   if an object would not typically be parented to the expansion context
+///   if manually written at that source location,
+///     then it will not be parented by a template expansion,
+///     and so will not contribute to its shape.
+///
+/// Dynamic Inner Template Application
+/// ==================================
+/// Sometimes the shape of inner applications cannot be known because their
+///   application depends on values of metavariables that are provided by
+///   the caller.
+/// One such example is that the body of the template is conditional
+///   depending on what values are provided to the template.
+///
+/// In this case,
+///   it may be necessary for the body of the template to _coerce_ into a
+///   statically known shape by wrapping the dynamic application in a known
+///   object.
+/// For example,
+///   if a template's body can conditionally expand into one of a set of
+///   [`TplShape::Expr`] templates,
+///     then that condition can be wrapped in an [`Expr`] object so that,
+///       no matter what the expansion,
+///     we'll always have a shape of [`TplShape::Expr`].
+///
+/// Expansion Ordering
+/// ==================
+/// By requiring a shape to be available by the time the definition of a
+///   template is completed,
+///     a system like [`AIR`](crate::asg::air) is able to pre-allocate an
+///     [`Object`] at the application site.
+/// This ensures that we are able to generate a graph with the proper edge
+///   ordering,
+///     which is important for non-commutative objects.
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
+pub enum TplShape {
+    /// The template will not inline any objects.
+    #[default]
+    Empty,
+
+    /// The template is non-[`Empty`](Self::Empty),
+    ///   but its shape cannot yet be determined.
+    ///
+    /// A template's shape must be known by the time its definition has been
+    ///   completed.
+    /// Note that a definition is not complete until all missing identifiers
+    ///   have been defined.
+    Unknown,
+
+    /// The template can be expanded inline into a single [`Expr`].
+    ///
+    /// This allows a template to be expanded into an expression context and
+    ///   provides assurances that it will not take the place of more than a
+    ///   single expression.
+    ///
+    /// The associated span provides rationale for this shape assertion.
+    /// The [`ObjectIndex`] is not cached here to avoid having to keep them
+    ///   in sync if the graph changes,
+    ///     in which case this rationale may represent the _original_
+    ///     rationale before any graph rewriting.
+    Expr(Span),
+}
+
+impl Display for TplShape {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        // phrase as "template with ..."
+        match self {
+            TplShape::Unknown => write!(f, "unknown shape"),
+            TplShape::Empty => write!(f, "empty shape"),
+            TplShape::Expr(_) => write!(f, "shape of a single expression"),
+        }
    }
 }

@ -82,7 +189,7 @@ impl ObjectIndex<Tpl> {
    ///   definition.
    pub fn close(self, asg: &mut Asg, close_span: Span) -> Self {
        self.map_obj(asg, |tpl| {
-            tpl.map(|open_span| {
+            tpl.map(|open_span: Span| {
                open_span.merge(close_span).unwrap_or(open_span)
            })
        })
@ -132,4 +239,39 @@ impl ObjectIndex<Tpl> {
    }
 }

-impl<OB: ObjectRelatable> AsgRelMut<OB> for Tpl {}
+impl AsgRelMut<Expr> for Tpl {
+    fn pre_add_edge(
+        asg: &mut Asg,
+        from_oi: ObjectIndex<Self>,
+        to_oi: ObjectIndex<Expr>,
+        _ctx_span: Option<Span>,
+        commit: impl FnOnce(&mut Asg),
+    ) -> Result<(), AsgError> {
+        let span = to_oi.resolve(asg).span();
+        from_oi.map_obj(asg, |tpl| tpl.overwrite(TplShape::Expr(span)));
+
+        Ok(commit(asg))
+    }
+}
+
+// TODO: Merge this into the macro above
+impl AsgRelMut<Ident> for Tpl {}
+impl AsgRelMut<Tpl> for Tpl {}
+impl AsgRelMut<Doc> for Tpl {}
+
+// This uses `min_specialization` to satisfy trait bounds for
+//   `<ObjectIndexTo as ObjectIndexRelTo>::add_edge`.
+// This will be better integrated in future commits.
+// See message of the commit that introduced this comment for more
+//   information.
+impl<OB: ObjectRelatable> AsgRelMut<OB> for Tpl {
+    default fn pre_add_edge(
+        asg: &mut Asg,
+        _from_oi: ObjectIndex<Self>,
+        _to_oi: ObjectIndex<OB>,
+        _ctx_span: Option<Span>,
+        commit: impl FnOnce(&mut Asg),
+    ) -> Result<(), AsgError> {
+        Ok(commit(asg))
+    }
+}
--- a/tamer/src/lib.rs
+++ b/tamer/src/lib.rs
@ -180,6 +180,73 @@
 //       which can be inscrutable if you are not very familiar with Rust's
 //       borrow checker.
 #![allow(clippy::needless_lifetimes)]
+// Uh oh.  Trait specialization, you say?
+// This deserves its own section.
+//
+// Rust has two trait specialization feature flags:
+//   - min_specialization; and
+//   - specialization.
+//
+// Both are unstable,
+//   but _the latter has soundness holes when it comes to lifetimes_.
+// A viable subset of `specialization` was introduced for use in the Rust
+//   compiler itself,
+//     dubbed `min_specialization`.
+// That hopefully-not-unsound subset is what has been adopted here.
+//
+// Here's the problem:
+//   TAMER makes _heavy_ use of the type system for various guarantees,
+//     operating as proofs.
+// This static information means that we're able to determine a lot of
+//   behavior statically.
+// However,
+//   we also have to support various operations dynamically,
+//     and marry to the two together.
+// The best example of this at the time of writing is AIR,
+//   which uses static types for graph construction and manipulation
+//   whenever it can,
+//     but sometimes has to rely on runtime information to determine which
+//     types are applicable.
+// In that case,
+//   we have to match on runtime type information and branch into various
+//   static paths based on that information.
+//
+// Furthermore,
+//   this type information often exhibits specialized behavior for certain
+//   cases,
+//     and fallback behavior for all others.
+//
+// This conversion back and fourth in various direction results in either a
+//   maintenance burden
+//     (e.g. any time new types or variants are introduced,
+//       branching code has to be manually updated),
+//     or complex macros that attempt to generate that code.
+// It's all boilerplate,
+//   and it's messy.
+//
+// Trait specialization allows for a simple and declarative approach to
+//   solving these problems without all of the boilerplate;
+//     the type system can be used to match on relevant types and will fall
+//     back to specialization in situations where we are not concerned with
+//     other types.
+// In situations where we _do_ want to comprehensively match all types,
+//   we still have that option in the traditional way.
+//
+// TAMER will begin to slowly and carefully utilize `min_specialization` in
+//   isolated areas to experiment with the stability and soundness of the
+//   system.
+// You can search for its uses by searching for `default fn`.
+//
+// If it is decided to _not_ utilize this feature in the future,
+//   then specialization must be replaced with burdensome branching code as
+//   mentioned above.
+// It is doable without sacrificing type safety,
+//   but it makes many changes very time-consuming and therefore very
+//   expensive.
+//
+// (At the time of writing,
+//    there is no clear path to stabalization of this feature.)
+#![feature(min_specialization)]

 pub mod global;