tamer: asg::graph::visit::ontree: Source ordering of ontological tree

This introduces the ability to specify an edge ordering for the ontological
tree traversal.  `tree_reconstruction` will now use a
`SourceCompatibleTreeEdgeOrder`, which will traverse the graph in an order
that will result in a properly ordered source reconstruction.  This is
needed for template headers, because interpolation causes
metavariables (exposed as template params) to be mixed into the body.

There's a lot of information here, including some TODOs on possible
improvements.  I used the unstable `is_sorted` to output how many template
were already sorted, based on one of our very large packages internally that
uses templates extensively, and found that none of the desugared shorthand
template expansions were already ordered.  If I tweak that a bit, then
nearly all templates will already be ordered, reducing the work that needs
to be done, leaving only template definitions with interpolation to be
concerned about, which is infrequent relative to everything else.

DEV-13163
main
Mike Gerwitz 2023-07-17 16:12:56 -04:00
parent b30018c23b
commit 5a301c1548
4 changed files with 362 additions and 50 deletions

View File

@ -29,7 +29,7 @@
//! This is a [depth-first search][w-depth-first-search]
//! visiting all nodes that are _reachable_ from the graph root
//! (see [`Asg::root`]).
//! [`ObjectIndex`]es are emitted in pre-order during the traversal,
//! [`TreeWalkRel`]s are emitted in pre-order during the traversal,
//! and may be emitted more than once if
//! (a) they are the destination of cross edges or
//! (b) they are shared between trees
@ -103,7 +103,7 @@
//!
//! Depth Tracking
//! ==============
//! Each [`ObjectIndex`] emitted by this traversal is accompanied by a
//! Each [`TreeWalkRel`] emitted by this traversal is accompanied by a
//! [`Depth`] representing the length of the current path relative to the
//! [`Asg`] root.
//! Since the ASG root is never emitted,
@ -135,11 +135,34 @@
//! because the [`Depth`] represents the current _path_,
//! the same [`ObjectIndex`] may be emitted multiple times with different
//! [`Depth`]s.
//!
//!
//! Edge Order
//! ==========
//! The order of edges in the tree is important,
//! since there are a number of non-commutative operations in TAME.
//! Ordering is determined by a [`TreeEdgeOrder`] strategy:
//!
//! 1. [`NaturalTreeEdgeOrder`] will traverse in the same order that edges
//! were added to the graph.
//! This ordering is fine for most internal operations,
//! but is not suitable for [`tree_reconstruction`].
//!
//! 2. [`SourceCompatibleTreeEdgeOrder`] traverses edges in an order that
//! will produce a valid source file for [NIR XML](crate::nir).
//! For example,
//! templates require a header and body section,
//! where [`Asg`] permits mixing them.
//! This maintains natural order in all other cases.
use std::fmt::Display;
use std::{fmt::Display, marker::PhantomData};
use super::super::{object::DynObjectRel, Asg, Object, ObjectIndex};
use super::super::{
object::{self, DynObjectRel},
Asg,
};
use crate::{
asg::graph::object::ObjectTy,
parse::{self, Token},
span::{Span, UNKNOWN_SPAN},
};
@ -149,7 +172,9 @@ use crate::{
pub use crate::xir::flat::Depth;
#[cfg(doc)]
use super::super::object::ObjectRel;
use super::super::object::{ObjectIndex, ObjectRel};
pub use order::*;
/// Produce an iterator suitable for reconstructing a source tree based on
/// the contents of the [`Asg`].
@ -160,7 +185,9 @@ use super::super::object::ObjectRel;
///
/// See the [module-level documentation](super) for important information
/// about this traversal.
pub fn tree_reconstruction(asg: &Asg) -> TreePreOrderDfs {
pub fn tree_reconstruction(
asg: &Asg,
) -> TreePreOrderDfs<SourceCompatibleTreeEdgeOrder> {
TreePreOrderDfs::new(asg)
}
@ -170,11 +197,11 @@ pub fn tree_reconstruction(asg: &Asg) -> TreePreOrderDfs {
/// _it does not track visited nodes_,
/// relying instead on the ontology and recognition of cross edges to
/// produce the intended spanning tree.
/// An [`ObjectIndex`] that is the target of a cross edge will be output
/// An object that is the target of a cross edge will be output
/// more than once.
///
/// See [`tree_reconstruction`] for more information.
pub struct TreePreOrderDfs<'a> {
pub struct TreePreOrderDfs<'a, O: TreeEdgeOrder> {
/// Reference [`Asg`].
///
/// Holding a reference to the [`Asg`] allows us to serve conveniently
@ -189,6 +216,8 @@ pub struct TreePreOrderDfs<'a> {
///
/// The traversal ends once the stack becomes empty.
stack: Vec<(DynObjectRel, Depth)>,
_phantom: PhantomData<O>,
}
/// Initial size of the DFS stack for [`TreePreOrderDfs`].
@ -196,34 +225,40 @@ pub struct TreePreOrderDfs<'a> {
/// TODO: Derive a heuristic from our systems.
const TREE_INITIAL_STACK_SIZE: usize = 8;
impl<'a> TreePreOrderDfs<'a> {
impl<'a, O: TreeEdgeOrder> TreePreOrderDfs<'a, O> {
fn new(asg: &'a Asg) -> Self {
let span = UNKNOWN_SPAN;
let mut dfs = Self {
asg,
stack: Vec::with_capacity(TREE_INITIAL_STACK_SIZE),
_phantom: PhantomData,
};
let root = asg.root(span);
dfs.push_edges_of(root.widen(), Depth::root());
let root_rel = DynObjectRel::new(
root.rel_ty(),
root.rel_ty(),
root.widen(),
root.widen(),
None,
);
dfs.push_edges_of(&root_rel, Depth::root());
dfs
}
fn push_edges_of(&mut self, oi: ObjectIndex<Object>, depth: Depth) {
self.asg
.edges_dyn(oi)
.map(|rel| (rel, depth.child_depth()))
.collect_into(&mut self.stack);
fn push_edges_of(&mut self, rel: &DynObjectRel, depth: Depth) {
O::push_edges_of(self.asg, rel, depth, &mut self.stack)
}
}
impl<'a> Iterator for TreePreOrderDfs<'a> {
impl<'a, O: TreeEdgeOrder> Iterator for TreePreOrderDfs<'a, O> {
type Item = TreeWalkRel;
/// Produce the next [`ObjectIndex`] from the traversal in pre-order.
/// Produce the next [`TreeWalkRel`] from the traversal in pre-order.
///
/// An [`ObjectIndex`] may be emitted more than once;
/// An object may be emitted more than once;
/// see [`tree_reconstruction`] for more information.
///
/// Each item contains a corresponding [`Depth`],
@ -233,12 +268,17 @@ impl<'a> Iterator for TreePreOrderDfs<'a> {
/// This depth is the only way to derive the tree structure from this
/// iterator.
fn next(&mut self) -> Option<Self::Item> {
// Note that we pushed edges in the order that `Asg` provided,
// and now pop them,
// which causes us to visit the edges in reverse.
// Because of implementation details (Petgraph),
// this reversal ends up giving us the correct ordering.
let (rel, depth) = self.stack.pop()?;
// We want to output information about references to other trees,
// but we must not traverse into them.
if !rel.is_cross_edge() {
self.push_edges_of(*rel.target(), depth);
self.push_edges_of(&rel, depth);
}
Some(TreeWalkRel(rel, depth))
@ -283,5 +323,192 @@ impl Token for TreeWalkRel {
impl parse::Object for TreeWalkRel {}
mod order {
use super::*;
/// Emit edges in the same order that they were added to the graph.
///
/// Various parts of the system take care in what order edges are
/// added.
/// This ordering is important for operations that are not commutative.
pub struct NaturalTreeEdgeOrder;
/// Emit edges in as close to [`NaturalTreeEdgeOrder`] as possible,
/// sorting only where object ordering would otherwise be
/// syntactically or grammatically invalid for streaming source
/// generation.
///
/// Unless otherwise mentioned below,
/// ordering for objects will be the same as
/// [`NaturalTreeEdgeOrder`].
///
/// Template Headers
/// ----------------
/// For [NIR XML](crate::nir) sources for TAME,
/// templates require that parameters be placed in a header,
/// before all objects representing the body of the template.
/// This is necessary to disambiguate `<param>` nodes in sources,
/// even though no such ambiguity exists on the [`Asg`].
///
/// All metavariables representing template params will be hoisted into
/// the header,
/// immediately after any template description [`Doc`](object::Doc)
/// node.
/// This is a stable partial ordering:
/// the ordering of parameters relative to one-another will not change,
/// nor will the order of any objects in the body of the template.
/// See [`TplOrder`].
pub struct SourceCompatibleTreeEdgeOrder;
/// Order in which tree edges are emitted.
///
/// TAME is sensitive to edge ordering for non-commutative operations.
/// For source lk
pub trait TreeEdgeOrder {
/// Push the edges of `rel` onto the `target` stack.
///
/// The system will pop edges off of `target` to determine what edge
/// to traverse next.
/// This means that edges will be visited in an order that is the
/// reverse of the elements of `target`.
fn push_edges_of(
asg: &Asg,
rel: &DynObjectRel,
depth: Depth,
target: &mut Vec<(DynObjectRel, Depth)>,
);
}
impl TreeEdgeOrder for NaturalTreeEdgeOrder {
fn push_edges_of(
asg: &Asg,
rel: &DynObjectRel,
depth: Depth,
stack: &mut Vec<(DynObjectRel, Depth)>,
) {
let oi = *rel.target();
asg.edges_dyn(oi)
.map(|rel| (rel, depth.child_depth()))
.collect_into(stack);
}
}
/// Ordering of template children for [`SourceCompatibleTreeEdgeOrder`].
///
/// Since these edges are added to a _stack_,
/// larger values will be `pop`'d _before_ smaller ones,
/// as demonstrated by the variant order.
/// That is:
/// we sort in the reverse order that we will visit them.
///
/// ```text
/// ------> Sort direction
/// [ Body, Body, Param, Param, Desc ]
/// <------ visit direction
/// (pop from stack)
/// ```
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone)]
enum TplOrder {
/// Forcing the template description to come first mimics
/// the expected [`NaturalTreeEdgeOrder`].
///
/// We don't want to re-order things before it,
/// since we want to be able to stream source output,
/// e.g. `template/@desc` in XML.
TplDesc = 2,
/// Template parameters must appear in the template
/// "header",
/// which is all the nodes before the body that is to be
/// expanded on application.
Param = 1,
/// The body of the template is anything that is not part of
/// the header.
///
/// The body represents the contents of the template that
/// will be expanded in place of any template application.
Body = 0,
}
impl TreeEdgeOrder for SourceCompatibleTreeEdgeOrder {
fn push_edges_of(
asg: &Asg,
rel: &DynObjectRel,
depth: Depth,
stack: &mut Vec<(DynObjectRel, Depth)>,
) {
use ObjectTy::*;
// We start by adding edges to the stack in natural order,
// remembering the original stack offset so that we can sort
// just the portion that we added.
let offset = stack.len();
NaturalTreeEdgeOrder::push_edges_of(asg, rel, depth, stack);
match rel.target_ty() {
// Templates require partial ordering into a header and a body.
Tpl => {
// This represents the portion of the stack that we just
// contributed to via [`NaturalTreeEdgeOrder`] above.
let part = &mut stack[offset..];
// TODO: Ideally we'd have metadata on the edge itself
// about what type of object an `Ident` points to,
// so that we can use the faster `sort_by_key`.
// With that said,
// initial profiling on large packages with many
// template applications did not yield a
// significant difference between the two methods on
// system-level tests,
// and given the small number of template
// children,
// this consideration may be a micro-optimization.
// An unstable sort is avoided because we wish to
// retain natural ordering as much as possible.
//
// TODO: In practice,
// most of these are template _applications_ resulting
// from `tplshort` desugaring.
// At the time of writing,
// _all_ need sorting because `Ref` is output before
// the params.
// We could recognize them as template applications at
// some point and leave their order alone,
// but at the time of writing we have no imports,
// and so most refs are `Ident::Missing` in
// practice.
// Once template imports are taken care of,
// then _nearly every single `Tpl` in practice` will
// already be ordered and this will rarely have any
// reordering to do
// (just hoisting for interpolation in template
// definitions,
// which are not all that common relative to
// everything else).
part.sort_by_cached_key(|(child_rel, _)| {
if let Some(ident) =
child_rel.filter_into_target::<object::Ident>()
{
// This is the (comparatively) expensive lookup,
// requiring a small graph traversal.
match ident.definition::<object::Meta>(asg) {
Some(_) => TplOrder::Param,
None => TplOrder::Body,
}
} else if child_rel.target_ty() == Doc {
TplOrder::TplDesc
} else {
TplOrder::Body
}
});
}
// Leave natural (graph) ordering for everything else.
Root | Pkg | Ident | Expr | Meta | Doc => (),
}
}
}
}
#[cfg(test)]
mod test;

View File

@ -207,21 +207,21 @@ fn traverses_ontological_tree_tpl_apply() {
PkgStart(S1, SPair("/pkg".into(), S1)),
// The template that will be applied.
TplStart(S2),
BindIdent(id_tpl),
// This test is light for now,
// until we develop the ASG further.
TplEnd(S4),
// Apply the above template.
TplStart(S5),
RefIdent(ref_tpl),
MetaStart(S7),
BindIdent(id_param),
MetaLexeme(value_param),
MetaEnd(S10),
TplEndRef(S11), // notice the `Ref` at the end
BindIdent(id_tpl), // <-,
// |
// This test is light for now, // |
// until we develop the ASG further. // |
TplEnd(S4), // |
// |
// Apply the above template. // |
TplStart(S5), // |
RefIdent(ref_tpl), // |
// |
MetaStart(S7), // |
BindIdent(id_param), // |
MetaLexeme(value_param), // |
MetaEnd(S10), // |
TplEndRef(S11), // notice the `Ref` at the end --'
PkgEnd(S12),
];
@ -237,9 +237,13 @@ fn traverses_ontological_tree_tpl_apply() {
(d(Pkg, Ident, m(S1, S12), S3, None ), Depth(2)),
(d(Ident, Tpl, S3, m(S2, S4), None ), Depth(3)),
(d(Pkg, Tpl, m(S1, S12), m(S5, S11), None ), Depth(2)),
/*cross*/ (d(Tpl, Ident, m(S5, S11), S3, Some(S6)), Depth(3)),
(d(Tpl, Ident, m(S5, S11), S8, None ), Depth(3)),
(d(Ident, Meta, S8, m(S7, S10), None ), Depth(4)),
/*cross*/ (d(Tpl, Ident, m(S5, S11), S3, Some(S6)), Depth(3)),
// ^
// `- Note that the cross edge was moved to the bottom
// because all template params are moved into the
// template header for `SourceCompatibleTreeEdgeOrder`.
],
tree_reconstruction_report(toks),
);
@ -390,3 +394,88 @@ fn traverses_ontological_tree_complex_tpl_meta() {
tree_reconstruction_report(toks),
);
}
// TAME's grammar expects that template parameters be defined in a header,
// before the template body.
// This is important for disambiguating `<param`> in the sources,
// since they could otherwise refer to other types of parameters.
//
// TAMER generates metavariables during interpolation,
// causing params to be mixed with the body of the template;
// this is reflected in the natural ordering.
// But this would result in a semantically invalid source reconstruction,
// and so we must reorder edges during traversal such that the
// metavariables representing template parameters are visited _before_
// everything else.
#[test]
fn tpl_header_source_order() {
#[rustfmt::skip]
let toks = vec![
PkgStart(S1, spair("/pkg", S1)),
TplStart(S2),
BindIdent(spair("_tpl_", S3)),
// -- Above this line was setup -- //
MetaStart(S4),
BindIdent(spair("@param_before@", S5)),
MetaEnd(S6),
// Dangling (no Ident)
ExprStart(ExprOp::Sum, S7),
ExprEnd(S8),
MetaStart(S9),
BindIdent(spair("@param_after_a@", S10)),
MetaEnd(S11),
MetaStart(S12),
BindIdent(spair("@param_after_b@", S13)),
MetaEnd(S14),
// Reachable (with an Ident)
// (We want to be sure that we're not just hoisting all Idents
// without checking that they're actually Meta
// definitions).
ExprStart(ExprOp::Sum, S15),
BindIdent(spair("sum", S16)),
ExprEnd(S17),
TplEnd(S18),
PkgEnd(S19),
];
// We need more concise expressions for the below table of values.
let d = DynObjectRel::new;
let m = |a: Span, b: Span| a.merge(b).unwrap();
#[rustfmt::skip]
assert_eq!(
// A -|-> B | A span -|-> B span | espan | depth
vec![//-----|-------|-----------|------------|--------|-----------------
(d(Root, Pkg, SU, m(S1, S19), None ), Depth(1)),
(d(Pkg, Ident, m(S1, S19), S3, None ), Depth(2)),
(d(Ident, Tpl, S3, m(S2, S18), None ), Depth(3)),
(d(Tpl, Ident, m(S2, S18), S5, None ), Depth(4)),
(d(Ident, Meta, S5, m(S4, S6), None ), Depth(5)),
// ,-----------------------------------------------------------------------,
(d(Tpl, Ident, m(S2, S18), S10, None ), Depth(4)),
(d(Ident, Meta, S10, m(S9, S11), None ), Depth(5)),
(d(Tpl, Ident, m(S2, S18), S13, None ), Depth(4)),
(d(Ident, Meta, S13, m(S12, S14), None ), Depth(5)),
// '-----------------------------------------------------------------------'
(d(Tpl, Expr, m(S2, S18), m(S7, S8), None ), Depth(4)),
(d(Tpl, Ident, m(S2, S18), S16, None ), Depth(4)),
(d(Ident, Expr, S16, m(S15, S17), None ), Depth(5)),
],
// ^ The enclosed Ident->Meta pairs above have been hoisted out of
// the body and into the header of `Tpl`.
// This is a stable, partial ordering;
// elements do not change poisitions relative to one-another
// with the exception of hoisting.
// This means that all hoisted params retain their order relative
// to other params,
// and all objects in the body retain their positions relative
// to other objects in the body.
tree_reconstruction_report(toks),
);
}

View File

@ -165,6 +165,8 @@ impl ParseState for TplShortDesugar {
let tpl_name =
format!("_{}_", qname.local_name().lookup_str()).intern();
// TODO: This should be emitted _after_ params to save work
// for `crate::asg:graph::visit::ontree::SourceCompatibleTreeEdgeOrder`.
let name = SPair(tpl_name, span);
stack.push(Ref(name));

View File

@ -14,66 +14,60 @@
<template name="_interp-non-bind_"
desc="Interpolation in non-binding position">
<classify as="only" desc="@___dsgr_335@"/>
<param name="@___dsgr_335@"
desc="Generated from interpolated string `{@bar@}`">
<param-value name="@bar@" />
</param>
<classify as="prefixed" desc="@___dsgr_368@"/>
<param name="@___dsgr_368@"
desc="Generated from interpolated string `Prefix {@bar@}`">
<text>Prefix </text>
<param-value name="@bar@" />
</param>
<classify as="suffixed" desc="@___dsgr_3a3@"/>
<param name="@___dsgr_3a3@"
desc="Generated from interpolated string `{@bar@} suffix`">
<param-value name="@bar@" />
<text> suffix</text>
</param>
<classify as="both" desc="@___dsgr_3da@" />
<param name="@___dsgr_3da@"
desc="Generated from interpolated string `Prefix {@bar@} suffix`">
<text>Prefix </text>
<param-value name="@bar@" />
<text> suffix</text>
</param>
<classify as="only" desc="@___dsgr_335@"/>
<classify as="prefixed" desc="@___dsgr_368@"/>
<classify as="suffixed" desc="@___dsgr_3a3@"/>
<classify as="both" desc="@___dsgr_3da@" />
</template>
<template name="_with-abstract-ident_"
desc="Metavariable interpolation in binding position">
<param name="@___dsgr_4a8@"
desc="Generated from interpolated string `{@as@}`">
<param-value name="@as@" />
</param>
<classify as="@___dsgr_4a8@" />
<param name="@___dsgr_4ca@"
desc="Generated from interpolated string `prefix-{@as@}`">
<text>prefix-</text>
<param-value name="@as@" />
</param>
<classify as="@___dsgr_4ca@" />
<param name="@___dsgr_4f4@"
desc="Generated from interpolated string `{@as@}-suffix`">
<param-value name="@as@" />
<text>-suffix</text>
</param>
<classify as="@___dsgr_4f4@" />
<param name="@___dsgr_51e@"
desc="Generated from interpolated string `prefix-{@as@}-suffix`">
<text>prefix-</text>
<param-value name="@as@" />
<text>-suffix</text>
</param>
<classify as="@___dsgr_4a8@" />
<classify as="@___dsgr_4ca@" />
<classify as="@___dsgr_4f4@" />
<classify as="@___dsgr_51e@" />
</template>
</package>