tamer: asg::graph::visit::tree_reconstruction: New graph traversal

This begins to introduce a graph traversal useful for a source
reconstruction from the current state of the ASG.  The idea is to, after
having parsed and ingested the source through the lowering pipeline, to
re-output it to (a) prove that we have parsed correctly and (b) allow
progressively moving things from the XSLT-based compiler into TAMER.

There's quite a bit of documentation here; see that for more
information.  Generalizing this in an appropriate way took some time, but I
think this makes sense (that work began with the introduction of cross edges
in terms of the tree described by the graph's ontology).  But I do need to
come up with an illustration to include in the documentation.

DEV-13708
main
Mike Gerwitz 2023-02-07 14:59:36 -05:00
parent 4afc8c22e6
commit e6f736298b
7 changed files with 421 additions and 11 deletions

View File

@ -42,6 +42,7 @@ use petgraph::{
use std::{fmt::Debug, result::Result};
pub mod object;
pub mod visit;
use object::{ObjectContainer, ObjectRelTo};
@ -492,7 +493,7 @@ impl Asg {
}
/// Create an iterator over the [`ObjectIndex`]es of the outgoing edges
/// of `self`.
/// of `oi`.
///
/// This is a generic method that simply returns an [`ObjectKind`] of
/// [`Object`] for each [`ObjectIndex`];
@ -515,16 +516,12 @@ impl Asg {
&'a self,
oi: ObjectIndex<O>,
) -> impl Iterator<Item = O::Rel> + 'a {
self.graph.edges(oi.into()).map(move |edge| {
O::new_rel_dyn(
edge.weight().1,
ObjectIndex::<Object>::new(edge.target(), oi),
)
.diagnostic_unwrap(|| {
self.edges_dyn(oi.widen()).map(move |(rel_ty, oi_b)| {
O::new_rel_dyn(rel_ty, oi_b).diagnostic_unwrap(|| {
vec![
oi.internal_error(format!(
"encountered invalid outgoing edge type {:?}",
edge.weight()
rel_ty,
)),
oi.help(
"this means that Asg did not enforce edge invariants \
@ -535,6 +532,28 @@ impl Asg {
})
}
/// Create an iterator over the [`ObjectIndex`]es of the outgoing edges
/// of `oi` in a dynamic context.
///
/// _This method should be used only when the types of objects cannot be
/// statically known,_
/// which is generally true only for code paths operating on
/// significant portions of
/// (or the entirety of)
/// the graph without distinction.
/// See [`Self::edges`] for more information.
fn edges_dyn<'a>(
&'a self,
oi: ObjectIndex<Object>,
) -> impl Iterator<Item = (ObjectRelTy, ObjectIndex<Object>)> + 'a {
self.graph.edges(oi.into()).map(move |edge| {
(
edge.weight().1,
ObjectIndex::<Object>::new(edge.target(), oi),
)
})
}
/// Incoming edges to `oi` filtered by [`ObjectKind`] `OI`.
///
/// The rationale behind the filtering is that objects ought to focus

View File

@ -168,10 +168,57 @@ pub enum ObjectRelTy {
Expr,
}
/// Determine whether an edge from `from_ty` to `to_ty` is a cross edge.
///
/// This function is intended for _dynamic_ edge types,
/// which cannot be determined statically;
/// it should be used only in situations where the potential edge types
/// are unbounded,
/// e.g. on an iterator yielding generalized [`ObjectIndex`]es during
/// a full graph traversal.
/// You should otherwise use [`ObjectRel::is_cross_edge`].
///
/// The [`ObjectIndex`] `oi_to` represents the target object.
/// It is not utilized at the time of writing,
/// but is needed for internal data structures.
///
/// For more information on cross edges,
/// see [`ObjectRel::is_cross_edge`].
pub(super) fn is_dyn_cross_edge(
from_ty: ObjectRelTy,
to_ty: ObjectRelTy,
oi_to: ObjectIndex<Object>,
) -> bool {
/// Generate cross-edge mappings between ObjectRelTy and the associated
/// ObjectRel.
///
/// This is intended to both reduce boilerplate and to eliminate typos.
///
/// This mess will be optimized away,
/// but exists so that cross edge definitions can exist alongside
/// other relationship definitions for each individual object type,
/// rather than having to maintain them in aggregate here.
macro_rules! ty_cross_edge {
($($ty:ident),*) => {
match from_ty {
$(
ObjectRelTy::$ty => {
$ty::new_rel_dyn(to_ty, oi_to).is_some_and(
|rel| rel.is_cross_edge()
)
},
)*
}
}
}
ty_cross_edge!(Root, Pkg, Ident, Expr)
}
impl Display for Object {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Root(_) => write!(f, "root ASG node"),
Self::Root(root) => Display::fmt(root, f),
Self::Pkg(pkg) => Display::fmt(pkg, f),
Self::Ident(ident) => Display::fmt(ident, f),
Self::Expr(expr) => Display::fmt(expr, f),
@ -601,6 +648,18 @@ impl<O: ObjectKind> ObjectIndex<O> {
asg.root(self.span()).add_edge_to(asg, self);
self
}
/// Widen an [`ObjectKind`] `O` into [`Object`],
/// generalizing the index type.
///
/// This generalization is useful in dynamic contexts,
/// but it discards type information that must be later re-checked and
/// verified.
pub fn widen(self) -> ObjectIndex<Object> {
match self {
Self(index, span, _pd) => ObjectIndex::new(index, span),
}
}
}
impl ObjectIndex<Object> {
@ -717,6 +776,12 @@ pub trait ObjectRelatable: ObjectKind {
) -> Option<Self::Rel>;
}
impl<O: ObjectKind + ObjectRelatable> ObjectIndex<O> {
pub fn rel_ty(&self) -> ObjectRelTy {
O::rel_ty()
}
}
/// A relationship to another [`ObjectKind`].
///
/// This trait is intended to be implemented by enums that represent the
@ -750,7 +815,7 @@ pub trait ObjectRelatable: ObjectKind {
/// adhere to the prescribed ontology,
/// provided that invariants are properly upheld by the
/// [`asg`](crate::asg) module.
pub trait ObjectRel<OA: ObjectKind>: Sized {
pub trait ObjectRel<OA: ObjectKind + ObjectRelatable>: Sized {
/// Attempt to narrow into the [`ObjectKind`] `OB`.
///
/// Unlike [`Object`] nodes,

View File

@ -0,0 +1,193 @@
// ASG traversals
//
// Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Graph traversals.
//!
//! The traversal [`tree_reconstruction`] should be used if the intent is to
//! reconstruct a source representation of the program from the current
//! state of [`Asg`].
use super::{
object::{is_dyn_cross_edge, ObjectRelTy},
Asg, Object, ObjectIndex,
};
use crate::span::UNKNOWN_SPAN;
#[cfg(doc)]
use super::object::ObjectRel;
/// Produce an iterator suitable for reconstructing a source tree based on
/// the contents of the [`Asg`].
///
/// The implementation of this traversal is exceedingly simple because of
/// its reliance on important graph invariants,
/// but it embodies a number of important and subtle properties.
///
/// Traversal Properties
/// ====================
/// This is a [depth-first search][w-depth-first-search]
/// visiting all nodes that are _reachable_ from the graph root
/// (see [`Asg::root`]).
/// [`ObjectIndex`]es are emitted in pre-order during the traversal,
/// and may be emitted more than once if
/// (a) they are the destination of cross edges or
/// (b) they are shared between trees
/// (most likely due to compiler optimizations).
///
/// The tree is defined by the graph ontology,
/// not an arbitrary graph traversal.
/// This traversal is initialized by pushing each target [`ObjectIndex`] of
/// the ASG root
/// (see [`Asg::root`])
/// onto the stack.
/// Each iteration pops a single node off of the stack and visits it,
/// until no more nodes remain on the stack,
/// after which the traversal completes and the iterator is exhausted.
/// If the node was reached via a tree edge,
/// its edge targets are pushed onto the stack.
/// If a node is a target of a cross edge,
/// its edges targets are _not_ added to the stack for later traversal.
///
/// Targets of a cross edge
/// (see [`ObjectRel::is_cross_edge`])
/// will be emitted multiple times:
///
/// 1. The target of a cross edge is emitted each time a cross edge is
/// followed; and
/// 2. When the node is encountered on a tree edge.
///
/// The traversal relies on the ontology to enforce a tree-like structure
/// and to properly define cross edges via `ObjectRel::is_cross_edge`.
/// A _tree edge_ is an edge that is not a cross edge.
/// Consequently,
/// if a cross edge is replaced by a tree edge,
/// then this traversal interprets that edge as part of _multiple_ trees,
/// effectively inlining it as if the user had entered the exact same
/// code in both locations.
/// You should choose carefully where in the lowering pipeline you wish
/// for this traversal to take place so that the tree reconstruction has
/// the desired properties.
///
/// Because the graph is expected to be a DAG
/// (directed acyclic graph),
/// this traversal _does not track visited nodes_;
/// this ensures that nodes shared by trees due to optimizations like
/// common subexpression elimination will have proper trees
/// reconstructed.
/// If there are exceptional subgraphs where cycles do appear,
/// this traversal's implementation must be modified to take them into
/// account,
/// otherwise it will iterate indefinitely.
///
/// Edges are visited in the same order that they were added to the graph,
/// so the tree reconstruction should match closely the order of the
/// source file.
/// However,
/// note that compiler passes,
/// if present,
/// may modify the graph beyond recognition,
/// though they should retain ordering where it is important.
///
/// For more information,
/// see [`ObjectRel::is_cross_edge`].
///
/// [w-depth-first-search]: https://en.wikipedia.org/wiki/Depth-first_search
pub fn tree_reconstruction(asg: &Asg) -> TreePreOrderDfs {
TreePreOrderDfs::new(asg)
}
/// Pre-order depth-first search (DFS) using the ontological tree.
///
/// This DFS has an interesting property:
/// _it does not track visited nodes_,
/// relying instead on the ontology and recognition of cross edges to
/// produce the intended spanning tree.
/// An [`ObjectIndex`] that is the target of a cross edge will be output
/// more than once.
///
/// See [`tree_reconstruction`] for more information.
pub struct TreePreOrderDfs<'a> {
/// Reference [`Asg`].
///
/// Holding a reference to the [`Asg`] allows us to serve conveniently
/// as an iterator.
asg: &'a Asg,
/// DFS stack.
///
/// The tuple represents the source and target edge [`ObjectRelTy`]s
/// respectively,
/// along with the [`ObjectIndex`] to be visited.
/// As nodes are visited,
/// its edges are pushed onto the stack.
/// Each iterator pops a tuple off the stack and visits that node.
///
/// The traversal ends once the stack becomes empty.
stack: Vec<(ObjectRelTy, ObjectRelTy, ObjectIndex<Object>)>,
}
/// Initial size of the DFS stack for [`TreePreOrderDfs`].
///
/// TODO: Derive a heuristic from our systems.
const TREE_INITIAL_STACK_SIZE: usize = 8;
impl<'a> TreePreOrderDfs<'a> {
fn new(asg: &'a Asg) -> Self {
let span = UNKNOWN_SPAN;
let mut dfs = Self {
asg,
stack: Vec::with_capacity(TREE_INITIAL_STACK_SIZE),
};
let root = asg.root(span);
dfs.push_edges_of(root.rel_ty(), root.widen());
dfs
}
fn push_edges_of(&mut self, from_ty: ObjectRelTy, oi: ObjectIndex<Object>) {
self.asg
.edges_dyn(oi)
.map(|(rel_ty, oi)| (from_ty, rel_ty, oi))
.collect_into(&mut self.stack);
}
}
impl<'a> Iterator for TreePreOrderDfs<'a> {
type Item = ObjectIndex<Object>;
/// Produce the next [`ObjectIndex`] from the traversal in pre-order.
///
/// An [`ObjectIndex`] may be emitted more than once;
/// see [`tree_reconstruction`] for more information.
fn next(&mut self) -> Option<Self::Item> {
let (from_ty, next_ty, next) = self.stack.pop()?;
// We want to output information about references to other trees,
// but we must not traverse into them.
if !is_dyn_cross_edge(from_ty, next_ty, next) {
self.push_edges_of(next_ty, next);
}
Some(next)
}
}
#[cfg(test)]
mod test;

View File

@ -0,0 +1,103 @@
// ASG IR
//
// Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::*;
use crate::{
asg::{
air::{Air, AirAggregate},
ExprOp,
},
parse::{util::SPair, ParseState},
span::dummy::*,
};
use std::fmt::Debug;
use Air::*;
fn asg_from_toks<I: IntoIterator<Item = Air>>(toks: I) -> Asg
where
I::IntoIter: Debug,
{
let mut parser = AirAggregate::parse(toks.into_iter());
assert!(parser.all(|x| x.is_ok()));
parser.finalize().unwrap().into_context()
}
// Note that this is an integration test beginning at AIR.
//
// We will construct a test ASG using the same subsystem as the user;
// we want to be sure that the traversal works as we expect it to in
// practice,
// since the system is fairly complex and failures are more likely
// to occur at integration points.
#[test]
fn traverses_ontological_tree() {
let id_a = SPair("expr_a".into(), S3);
let id_b = SPair("expr_b".into(), S9);
let toks = vec![
// <package>
PkgOpen(S1),
// <expr>
ExprOpen(ExprOp::Sum, S2),
ExprIdent(id_a),
// <expr>
ExprOpen(ExprOp::Sum, S4),
ExprClose(S5),
// </expr>
ExprRef(SPair(id_b.symbol(), S6)),
ExprClose(S7),
// </expr>
// <expr>
ExprOpen(ExprOp::Sum, S8),
ExprIdent(id_b),
ExprClose(S10),
// </expr>
// </package>
PkgClose(S11),
];
let asg = asg_from_toks(toks);
// From the above graph,
// we're going to traverse in such a way as to reconstruct the source
// tree.
let sut = tree_reconstruction(&asg);
assert_eq!(
vec![
S1.merge(S11).unwrap(), // Pkg
S3, // Ident (id_a)
S2.merge(S7).unwrap(), // Expr
S4.merge(S5).unwrap(), // Expr
S9, // Ident (ExpRef)¹
S9, // Ident (id_b)
S8.merge(S10).unwrap(), // Expr
],
sut.map(ObjectIndex::cresolve(&asg))
.map(Object::span)
.collect::<Vec<_>>()
);
// ¹ We have lost the reference context (S6),
// which is probably the more appropriate one to be output here,
// given that this is a source reconstruction and ought to be mapped
// back to what the user entered at the equivalent point in the tree.
// TODO: Figure out how to best expose this,
// which probably also involves the introduction of edge spans.
}

View File

@ -78,7 +78,7 @@ pub use graph::{
},
Object, ObjectIndex, ObjectKind,
},
Asg, AsgResult, IndexType,
visit, Asg, AsgResult, IndexType,
};
/// Default concrete ASG implementation.

View File

@ -64,6 +64,11 @@
#![feature(nonzero_ops)]
// Enabled for qualified paths in `matches!`.
#![feature(more_qualified_paths)]
// Collecting interators into existing objects.
// Can be done manually in a more verbose way.
#![feature(iter_collect_into)]
// Convenience; can be done more verbosely.
#![feature(is_some_and)]
// Used for const params like `&'static str` in `crate::fmt`.
// If this is not stabalized,
// then we can do without by changing the abstraction;
@ -143,6 +148,19 @@
// significantly larger footprint than this form,
// so this lint does _not_ suggest a suitable replacement.
#![allow(clippy::obfuscated_if_else)]
// Sometimes being explicit about lifetimes,
// even if it's unnecessary,
// can help a human to understand what bounds are in play,
// which are hidden when they're elided.
// Sometimes doing such a thing is a bad idea and introduces complexity.
// We need to use our judgment.
// Further,
// Clippy sometimes recommends eliding named bounds which does not
// compile,
// but then accepts introducing an anonymous lifetime bound (`'_`),
// which can be inscrutable if you are not very familiar with Rust's
// borrow checker.
#![allow(clippy::needless_lifetimes)]
pub mod global;

View File

@ -227,6 +227,8 @@ pub struct Span {
ctx: Context,
}
assert_eq_size!(Span, Option<Span>);
impl Span {
/// Create a new span from its constituent parts.
pub fn new<C: Into<Context>>(
@ -716,6 +718,16 @@ pub mod dummy {
pub const S8: Span = S0.offset_add(8).unwrap();
pub const S9: Span = S0.offset_add(9).unwrap();
pub const S10: Span = S0.offset_add(10).unwrap();
pub const S11: Span = S0.offset_add(11).unwrap();
pub const S12: Span = S0.offset_add(12).unwrap();
pub const S13: Span = S0.offset_add(13).unwrap();
pub const S14: Span = S0.offset_add(14).unwrap();
pub const S15: Span = S0.offset_add(15).unwrap();
pub const S16: Span = S0.offset_add(16).unwrap();
pub const S17: Span = S0.offset_add(17).unwrap();
pub const S18: Span = S0.offset_add(18).unwrap();
pub const S19: Span = S0.offset_add(19).unwrap();
pub const S20: Span = S0.offset_add(20).unwrap();
}
#[cfg(test)]