From b2c6b7f0736d2936ceabbd6204647d9a50c934ae Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Wed, 22 Mar 2023 12:19:40 -0400 Subject: [PATCH] tamer: asg::air::expr: Do not cache (globally) identifiers created with StoreDangling I'm not happy with this implementation. The linear search is undesirable, but not too bad (and maybe wouldn't even be worth caching, if this were the whole story), but we _also_ need to prevent duplicate identifiers. We are not going to want to perform a linear search of a linked list (effectively) every time we add an identifier to check for uniqueness, so I think the caching is going to have to be generalized very shortly anyway. As it stands now, a duplicate identifier would cause an error at expansion time. That's not what we want, but it's not terrible, because you can have that same problem in normal circumstances without local conflicts. But this'll be used for metavariables as well, where we absolutely _do_ want to fail at template definition time. DEV-13708 --- tamer/src/asg/air/expr.rs | 19 +++++++++--- tamer/src/asg/air/tpl/test.rs | 24 +++++++++++++-- tamer/src/asg/graph.rs | 7 ++++- tamer/src/asg/graph/object.rs | 55 +++++++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 8 deletions(-) diff --git a/tamer/src/asg/air/expr.rs b/tamer/src/asg/air/expr.rs index b1d5cf28..b0106c18 100644 --- a/tamer/src/asg/air/expr.rs +++ b/tamer/src/asg/air/expr.rs @@ -417,6 +417,11 @@ mod root { /// the sub-expression will be rooted in [`Self`], /// but the [`Dangling`] root expression will still be rejected. /// + /// This expects identifiers to be rooted in the global environment, + /// which is the package representing the active compilation unit. + /// This may be relaxed once identifier caching is generalized; + /// at the time of writing it is too coupled to the graph root. + /// /// See [`RootStrategy`] for more information. #[derive(Debug, PartialEq)] pub struct ReachableOnly(ObjectIndex) @@ -456,6 +461,10 @@ mod root { /// Sub-expressions can be thought of as utilizing this strategy with an /// implicit parent [`ObjectIndex`](ObjectIndex). /// + /// Unlike [`ReachableOnly`], + /// this does _not_ cache identifiers in the global environment. + /// See there for more information. + /// /// See [`RootStrategy`] for more information. #[derive(Debug, PartialEq)] pub struct StoreDangling(ObjectIndex) @@ -470,11 +479,13 @@ mod root { Self(oi) } - fn defines(&self, asg: &mut Asg, id: SPair) -> ObjectIndex { - // We are a superset of `ReachableOnly`'s behavior, - // so delegate to avoid duplication. + fn defines(&self, asg: &mut Asg, name: SPair) -> ObjectIndex { + // This cannot simply call [`ReachableOnly`]'s `defines` because + // we cannot cache in the global environment. + // This can be realized once caching is generalized; + // see the commit that introduced this comment. match self { - Self(oi_root) => ReachableOnly(*oi_root).defines(asg, id), + Self(oi_root) => oi_root.declare_local(asg, name), } } diff --git a/tamer/src/asg/air/tpl/test.rs b/tamer/src/asg/air/tpl/test.rs index 08e14a03..b1d236d4 100644 --- a/tamer/src/asg/air/tpl/test.rs +++ b/tamer/src/asg/air/tpl/test.rs @@ -197,10 +197,12 @@ fn tpl_with_reachable_expression() { Air::BindIdent(id_tpl), Air::ExprStart(ExprOp::Sum, S3), + // Must not be cached in the global env. Air::BindIdent(id_expr_a), Air::ExprEnd(S5), Air::ExprStart(ExprOp::Sum, S6), + // Must not be cached in the global env. Air::BindIdent(id_expr_b), Air::ExprEnd(S8), Air::TplEnd(S9), @@ -215,14 +217,25 @@ fn tpl_with_reachable_expression() { // The inner expressions are reachable, // but the intent is to expand them into the template's eventual // application site. + // They have identifiers, + // but those identifiers _must not_ be cached in the global + // environment; + // such a determination will be made at expansion-time. // Given that, // they should be defined by the template... assert_eq!( vec![ - asg.lookup_global(id_expr_b).unwrap(), - asg.lookup_global(id_expr_a).unwrap(), + // At the time of writing, + // this is implemented using the same `edges_filtered`, + // but the point is that we want to ensure that the + // identifiers bound to this template are only these. + oi_tpl.lookup_local_linear(&asg, id_expr_b), + oi_tpl.lookup_local_linear(&asg, id_expr_a), ], - oi_tpl.edges_filtered::(&asg).collect::>() + oi_tpl + .edges_filtered::(&asg) + .map(Some) + .collect::>() ); // ...but not by the package containing the template. @@ -234,6 +247,11 @@ fn tpl_with_reachable_expression() { ], oi_pkg.edges_filtered::(&asg).collect::>() ); + + // Verify the above claim that these are not cached in the global + // environment. + assert_eq!(None, asg.lookup_global(id_expr_a)); + assert_eq!(None, asg.lookup_global(id_expr_b)); } // Templates can expand into many contexts, diff --git a/tamer/src/asg/graph.rs b/tamer/src/asg/graph.rs index 6b51709e..e3f682b4 100644 --- a/tamer/src/asg/graph.rs +++ b/tamer/src/asg/graph.rs @@ -719,12 +719,17 @@ impl Asg { self.get(index) } - /// Attempt to retrieve an identifier from the graph by name. + /// Attempt to retrieve an identifier from the graph by name utilizing + /// the global environment. /// /// Since only identifiers carry a name, /// this method cannot be used to retrieve all possible objects on the /// graph---for /// that, see [`Asg::get`]. + /// + /// The global environment is defined as the environment of the current + /// compilation unit, + /// which is a package. #[inline] pub fn lookup_global(&self, id: SPair) -> Option> { let i = id.symbol().as_usize(); diff --git a/tamer/src/asg/graph/object.rs b/tamer/src/asg/graph/object.rs index c023840d..814fc175 100644 --- a/tamer/src/asg/graph/object.rs +++ b/tamer/src/asg/graph/object.rs @@ -118,6 +118,7 @@ use crate::{ diagnose::{panic::DiagnosticPanic, Annotate, AnnotatedSpan}, diagnostic_panic, f::Functor, + parse::util::SPair, span::{Span, UNKNOWN_SPAN}, }; use petgraph::graph::NodeIndex; @@ -713,6 +714,60 @@ impl ObjectIndex { Self(index, span, _pd) => ObjectIndex::new(index, span), } } + + /// Attempt to look up a locally bound [`Ident`] via a linear search of + /// `self`'s edges. + /// + /// Performance + /// =========== + /// _This is a linear (O(1)) search of the edges of the node + /// corresponding to `self`!_ + /// At the time of writing, + /// edges are stored using index references in a manner similar to a + /// linked list (petgraph). + /// And for each such edge, + /// the target object must be resolved so that its + /// [`SymbolId`](crate::sym::SymbolId) may be retrieved and compared + /// against the provided `name`. + /// + /// If the number of edges is small and the objects are fairly localized + /// in memory relative to `self`, + /// then this may not be a concern. + /// However, + /// if you've arrived at this method while investigating unfavorable + /// circumstances during profiling, + /// then you should consider caching like the global environment + /// (see [`Asg::lookup_global`]). + pub fn lookup_local_linear( + &self, + asg: &Asg, + name: SPair, + ) -> Option> + where + O: ObjectRelTo, + { + self.edges_filtered::(asg) + .find(|oi| oi.resolve(asg).name().symbol() == name.symbol()) + } + + /// Declare a local identifier. + /// + /// A local identifier is lexically scoped to `self`. + /// This operation is valid only for [`ObjectKind`]s that can contain + /// edges to [`Ident`]s. + /// + /// TODO: This allows for duplicate local identifiers! + pub fn declare_local( + &self, + asg: &mut Asg, + name: SPair, + ) -> ObjectIndex + where + O: ObjectRelTo, + { + asg.create(Ident::declare(name)) + .add_edge_from(asg, *self, None) + } } impl ObjectIndex {