tamer: asg::graph::visit::topo: Cycle detection

This introduces cycle detection, but it does not yet filter ontologically
permitted cycles, which will be needed prior to utilizing this in `tameld`.

There's a considerable amount of documentation here.  While the
implementation is fairly simple, there are important algorithmic decisions,
both in the DFS construction and the derivation of the cycle path from data
that already exists.

This also supports recovery (by ignoring cycles), which can then be utilized
to find more cycles and other errors in the system.

DEV-13162
main
Mike Gerwitz 2023-04-27 13:01:54 -04:00
parent e3094e0bad
commit c2c1434afe
6 changed files with 519 additions and 34 deletions

View File

@ -31,7 +31,7 @@ use crate::{
span::Span,
};
use super::TransitionError;
use super::{visit::Cycle, TransitionError};
/// An error from an ASG operation.
///
@ -129,6 +129,13 @@ pub enum AsgError {
/// The spans represent the expression and the documentation text
/// respectively.
InvalidDocContextExpr(Span, Span),
/// A circular dependency was found where it is not permitted.
///
/// A cycle almost always means that computing the value of an object
/// depends on first having computed itself,
/// which is not possible.
UnsupportedCycle(Cycle),
}
impl Display for AsgError {
@ -171,6 +178,9 @@ impl Display for AsgError {
InvalidDocContextExpr(_, _) => {
write!(f, "document text is not permitted within expressions")
}
UnsupportedCycle(cycle) => {
write!(f, "circular dependency: {cycle}")
}
}
}
}
@ -290,6 +300,34 @@ impl Diagnostic for AsgError {
likely be lifted in the future",
),
],
UnsupportedCycle(cycle) => {
// The cycle description clearly describes the cycle,
// but in neutral terms,
// since cycles may not necessarily be errors.
let mut desc = cycle.describe();
// (this will always be non-empty)
if let Some(obj) = cycle.path_rev().last() {
// But in this context,
// this _is_ a problem,
// so make clear why we're pointing this out.
// TODO: Include an identifier name,
// once `Cycle` supports it.
desc.extend(
[
obj.help(
"the value cannot be computed because its",
),
obj.help(
" definition requires first computing itself.",
),
]
.into_iter(),
);
}
desc
}
}
}
}

View File

@ -492,6 +492,9 @@ where
/// this is intended to be used to provide diagnostic information in the
/// event that the object somehow becomes unavailable for later
/// operations.
/// [`Self::resolve_span`] may be used to replace the inner [`Span`] with
/// that of the object that [`Self`] represents,
/// which is useful in a different diagnostic context.
///
/// _The span is not accounted for in [`PartialEq`] or [`Hash`]_,
/// since it represents the context in which the [`ObjectIndex`] was
@ -625,6 +628,17 @@ impl<O: ObjectKind> ObjectIndex<O> {
move |oi| oi.resolve(asg)
}
/// Replace our associated [`Span`] with the span of the resolved
/// [`Object`].
///
/// This is useful to utilize [`ObjectIndex`] in a diagnostic context
/// without having to allocate space for additional metadata.
pub fn resolve_span(self, asg: &Asg) -> ObjectIndexResolvedSpan<O> {
ObjectIndexResolvedSpan(
self.overwrite(self.widen().resolve(asg).span()),
)
}
/// Resolve the identifier and map over the resulting [`Object`]
/// narrowed to [`ObjectKind`] `O`,
/// replacing the object on the given [`Asg`].
@ -875,6 +889,47 @@ impl<O: ObjectKind> From<ObjectIndex<O>> for Span {
}
}
/// An [`ObjectKind`] whose associated [`Span`] has been resolved.
///
/// This exists simply to provide proof via the type system that resolution
/// occurred.
///
/// See [`ObjectIndex::resolve_span`].
#[derive(Debug, PartialEq, Eq)]
pub struct ObjectIndexResolvedSpan<O: ObjectKind>(ObjectIndex<O>);
impl<O: ObjectKind> ObjectIndexResolvedSpan<O> {
pub fn oi(&self) -> ObjectIndex<O> {
(*self).into()
}
pub fn span(&self) -> Span {
self.oi().span()
}
}
impl<O: ObjectKind> From<ObjectIndexResolvedSpan<O>> for ObjectIndex<O> {
fn from(value: ObjectIndexResolvedSpan<O>) -> Self {
match value {
ObjectIndexResolvedSpan(oi) => oi,
}
}
}
impl<O: ObjectKind> From<ObjectIndexResolvedSpan<O>> for Span {
fn from(value: ObjectIndexResolvedSpan<O>) -> Self {
ObjectIndex::from(value).span()
}
}
impl<O: ObjectKind> Clone for ObjectIndexResolvedSpan<O> {
fn clone(&self) -> Self {
Self(self.0)
}
}
impl<O: ObjectKind> Copy for ObjectIndexResolvedSpan<O> {}
/// A container for an [`Object`] allowing for owned borrowing of data.
///
/// The purpose of allowing this owned borrowing is to permit a functional

View File

@ -27,4 +27,4 @@ mod ontree;
mod topo;
pub use ontree::{tree_reconstruction, Depth, TreePreOrderDfs, TreeWalkRel};
pub use topo::{topo_sort, TopoPostOrderDfs};
pub use topo::{topo_sort, Cycle, TopoPostOrderDfs};

View File

@ -42,8 +42,15 @@
//! to a particular object and cannot be used in that way.
use super::super::{Asg, ObjectIndex};
use crate::asg::{graph::object::DynObjectRel, AsgError, Object};
use crate::{
asg::{graph::object::DynObjectRel, Object, ObjectIndexResolvedSpan},
diagnose::{Annotate, AnnotatedSpan, Diagnostic},
};
use fixedbitset::FixedBitSet;
use std::{error::Error, fmt::Display, iter::once};
#[cfg(doc)]
use crate::span::Span;
pub fn topo_sort(
asg: &Asg,
@ -54,7 +61,7 @@ pub fn topo_sort(
/// Topological sort implemented as a post-order depth-first search (DFS).
///
/// See the [module-level documentation](super) for important information
/// See the [module-level documentation](self) for important information
/// about this traversal.
pub struct TopoPostOrderDfs<'a> {
/// Reference [`Asg`].
@ -69,10 +76,22 @@ pub struct TopoPostOrderDfs<'a> {
/// its relationships (edge targets) are pushed onto the stack.
/// Each iterator pops a relationship off the stack and visits it.
///
/// The inner [`Result`] serves as a cycle flag set by
/// [`Self::flag_if_cycle`].
/// Computing the proper [`Cycle`] error before placing it on the stack
/// would not only bloat the size of each element of this stack,
/// but also use unnecessary memory on the heap.
/// The proper [`Cycle`] error will be computed when this element is
/// retrieved by [`Self::next_oi`].
///
/// _This may contain duplicate [`ObjectIndex`]es even if the graph
/// contains no cycles;_
/// see [`Self::push_neighbors`] for an explanation.
///
/// The traversal ends once the stack becomes empty.
/// It is expected the stack is initialized with at least one initial
/// object prior to beginning the traversal.
stack: Vec<ObjectIndex<Object>>,
stack: Vec<Result<ObjectIndex<Object>, ObjectIndex<Object>>>,
/// Objects that have already been added to [`Self::stack`].
///
@ -118,7 +137,7 @@ impl<'a> TopoPostOrderDfs<'a> {
let set_cap = asg.object_count();
let mut stack = Vec::with_capacity(INIT_STACK_CAP);
init.collect_into(&mut stack);
init.map(Ok).collect_into(&mut stack);
Self {
asg,
@ -127,43 +146,339 @@ impl<'a> TopoPostOrderDfs<'a> {
finished: FixedBitSet::with_capacity(set_cap),
}
}
/// Push the neighbors of the given [`ObjectIndex`] onto [`Self::stack`]
/// for later processing.
///
/// Placing neighbors on the stack allows us to yield elements from the
/// iterator without having to keep track of where we are on the graph
/// for each node in the path.
///
/// When visiting a node for the first time,
/// its neighbors
/// (objects to which `src_oi` has an edge)
/// are pushed onto [`Self::stack`].
/// It is expected that `src_oi` is left on the stack,
/// ensuring that its neighbors are processed before `src_oi` is,
/// leading to a post-order traversal.
///
/// Objects that have already been emitted will _not_ be pushed onto the
/// stack;
/// this determination is made by consulting [`Self::finished`].
///
/// Each object that is pushed onto the stack will be checked by
/// [`Self::flag_if_cycle`];
/// see that function for more information.
/// It is important that each cycle be flagged individually,
/// rather than returning an error from this function,
/// otherwise only one cycle per object would be found.
///
/// Duplicate Stack Entries Without Cycles
/// ======================================
/// [`Self::stack`] may contain duplicate [`ObjectIndex`]es even if
/// there is no cycle.
///
/// The reason for this is that a cycle only occurs when an
/// [`ObjectIndex`] is part of the path currently being visited.
/// But [`Self::stack`] contains objects that have _not yet been visited_;
/// they've been placed onto the stack by this method to be visited at
/// a future point.
///
/// Consider this graph:
///
/// ```text
/// (A) -> (B) -> (D)
/// '---> (C) <---'
/// ```
///
/// A traversal might yield this stack if `C` is visited before `B`:
///
/// ```text
/// [A] // root
/// [A, C, B] // self.push_neighbors(A)
/// [A, C, B, D] // self.push_neighbors(B)
/// [A, C, B, D, C] // self.push_neighbors(D)
/// ```
///
/// Since `C` does not contain an edge _to_ any previous object,
/// there is no cycle.
///
/// For this reason,
/// it is important for the implementation to check [`Self::finished`]
/// when removing objects from the stack to ensure that they have not
/// already been emitted.
fn push_neighbors(&mut self, src_oi: ObjectIndex<Object>) {
self.asg
.edges_dyn(src_oi)
.map(|dyn_oi| *dyn_oi.target())
.filter(|&oi| !self.finished.contains(oi.into()))
.map(|oi| Self::flag_if_cycle(&self.visited, oi))
.collect_into(&mut self.stack);
}
/// Determine if the provided [`ObjectIndex`] would introduce a cycle if
/// appended to the current path and flag it if so.
///
/// This should be called only after having checked [`Self::finished`],
/// which means that a node is _not_ in the path because it has
/// already been emitted.
///
/// With [`Self::finished`] having been ruled out,
/// this uses [`Self::visited`] to determine if a node must be part of
/// the active path of the DFS.
/// If so,
/// then introducing it again would produce a cycle.
///
/// We use [`Result`] where `E` is [`ObjectIndex`] to simply flag the
/// object as containing a cycle;
/// this allows us to defer computation of the cycle and allocation
/// of memory for that path until we actually visit the node on
/// [`Self::stack`].
/// This allows the element size of [`Self::stack`] to remain small.
///
/// See [`Self::find_cycle_path`] for the actual cycle computation that
/// will eventually be performed.
fn flag_if_cycle(
visited: &FixedBitSet,
oi: ObjectIndex<Object>,
) -> Result<ObjectIndex<Object>, ObjectIndex<Object>> {
if visited.contains(oi.into()) {
Err(oi)
} else {
Ok(oi)
}
}
/// Attempt to retrieve the next [`ObjectIndex`] from the stack for
/// processing,
/// leaving it on the stack.
///
/// If the object atop of the stack has been flagged as a cycle by
/// [`Self::flag_if_cycle`],
/// then the actual path associated with the cycle will be computed
/// by [`Self::find_cycle_path`] and an a [`Cycle`] returned.
///
/// See also [`Self::pop_next_oi`].
fn next_oi(&self) -> Option<Result<ObjectIndex<Object>, Cycle>> {
self.stack
.last()
.map(|result| result.map_err(|oi| self.find_cycle_path(oi)))
}
/// Remove an item from [`Self::stack`].
///
/// A better API for the future would take ownership over the stack and
/// know for certain that the element being removed is the element
/// previously returned.
///
/// See also [`Self::next_oi`].
fn pop_next_oi(&mut self) {
self.stack.pop();
}
/// Knowing that the provided [`ObjectIndex`] would produce a cycle if
/// added to the current path,
/// calculate the path representing the cycle.
///
/// This is a linear-time (`O(n)`) operation that performs a new heap
/// allocation.
/// Since cycles are an error case,
/// it is expected that they will not often occur and so the DFS
/// algorithm is optimized for the most common case;
/// it is not worth computing the path during the course of the
/// search since that path would almost always be discarded.
///
/// Deriving a path relies on understanding that:
///
/// 1. An [`ObjectIndex`] in [`Self::stack`] is either awaiting
/// processing or is _currently_ being processed.
/// This means that it contains the path,
/// but it also contains neighbors of objects in the path.
/// We must filter out those neighbors.
///
/// 2. The [`Result`] in [`Self::stack`] indicates whether the object
/// causes a cycle.
/// A previous object in the path must therefore be [`Ok`],
/// otherwise it would not have been traversed,
/// and so we must filter all [`Err`]s.
/// In doing so,
/// we also filter out `next` at the top of the stack,
/// and so _this function works correctly regardless of whether
/// `next` has already been `pop`'d from the stack_.
///
/// 3. [`Self::visited`] is set just before neighbors of an object are
/// pushed onto [`Self::stack`].
/// Therefore,
/// only objects marked as visited are part of the active path,
/// and so to discover that path we need only filter out
/// non-visited objects.
///
/// 4. [`Self::stack`] contains a path from a provided root.
/// We want to cut off the path at the beginning of the cycle.
/// The easiest way to do this is to iterate through the stack in
/// reverse,
/// stopping as soon as we encounter an [`ObjectIndex`]
/// matching `next`.
/// This has the effect of producing a cycle path in post-order,
/// which is consistent with the ordering of [`Self`]'s
/// traversal.
///
/// 5. The [`ObjectIndex`]es sourced from the [`Asg`] do not contain
/// the spans of the target objects.
/// Cycles will almost certainly result in diagnostic messages,
/// which require accurate spans,
/// and so we must resolve the [`ObjectIndex`] to retrieve the
/// target [`Span`].
///
/// The path produced will therefore be reversed,
/// with `next` as the last element.
/// `next` will _not_ be duplicated as the first element,
/// which means that if you were to repeat the returned path
/// indefinitely end-to-end
/// (e.g. using [`Iterator::cycle`]),
/// you would have precisely this cycle.
///
/// With all of that said,
/// the implementation is fairly straightforward and concise.
fn find_cycle_path(&self, next: ObjectIndex<Object>) -> Cycle {
let mut path = self
.stack
.iter()
.rev()
.copied()
.filter_map(Result::ok)
.take_while(|&oi| oi != next)
.filter(|&oi| self.visited.contains(oi.into()))
.map(|oi| oi.resolve_span(self.asg))
.collect::<Vec<_>>();
// We stopped _at_ `next`,
// so we need to manually add it to the path.
path.push(next.resolve_span(self.asg));
Cycle { path }
}
}
impl<'a> Iterator for TopoPostOrderDfs<'a> {
type Item = Result<ObjectIndex<Object>, AsgError>;
type Item = Result<ObjectIndex<Object>, Cycle>;
fn next(&mut self) -> Option<Self::Item> {
// Rust doesn't have guaranteed TCO as of 2023-04
loop {
let next = *self.stack.last()?;
match self.next_oi()? {
Ok(next) => {
if self.visited.put(next.into()) {
self.pop_next_oi();
if self.visited.put(next.into()) {
self.stack.pop(); // next
// See `Self::push_neighbors` for explanation.
if !self.finished.put(next.into()) {
break Some(Ok(next));
}
} else {
self.push_neighbors(next);
}
}
if !self.finished.put(next.into()) {
break Some(Ok(next));
} else {
// Must have been visited by another path.
continue;
};
}
self.asg
.edges_dyn(next)
.map(|dyn_oi| *dyn_oi.target())
.filter(|&oi| {
let finished = self.finished.contains(oi.into());
// TODO:
let _is_cycle =
!finished && self.visited.contains(oi.into());
!finished
})
.collect_into(&mut self.stack);
Err(cycle) => {
self.pop_next_oi();
return Some(Err(cycle));
}
};
}
}
}
/// A graph cycle.
///
/// A cycle means that a path contains a duplicate node,
/// as if it looped back on itself.
/// In terms of TAME,
/// a cycle implies a circular dependency.
///
/// Identifying Cycle Objects
/// =========================
/// TODO: Object names need to be derived from the cycle to display
/// concisely to the user.
/// The cycle very likely contains identifiers that can be used to describe
/// the cycle in more concise terms.
///
/// It used to be the case that cycles contained identifier names,
/// but that was before the topological sort was generalized to include
/// all graph objects;
/// see the commit that introduced this message for more information.
///
/// TODO: We also ought to represent the spans associated with _references_,
/// _in addition to_ just the referenced object.
#[derive(Debug, PartialEq)]
pub struct Cycle {
/// The path representing the cycle in post-order (reversed).
///
/// It is expected that [`ObjectIndex`]'s associated [`Span`] has been
/// resolved to that of the target object
/// (e.g. using [`ObjectIndex::resolve_span`]).
/// This allows the indexes to be useful in a diagnostic context.
///
/// See [`Self::path_rev`] for more information.
path: Vec<ObjectIndexResolvedSpan<Object>>,
}
impl Cycle {
/// The path representing the cycle in post-order (reversed).
///
/// The path is truncated such that the first node in the path is the
/// beginning of the cycle.
/// The final node in the cycle is omitted,
/// since it is the same as the first;
/// if you repeated this path indefinitely
/// (e.g. with [`Iterator::cycle`])
/// then you would have precisely the cycle.
///
/// The [`ObjectIndex`]es should have [`Span`]s that are resolved
/// against the target so that they are useful in a diagnostic
/// context.
pub fn path_rev(&self) -> &Vec<ObjectIndexResolvedSpan<Object>> {
&self.path
}
}
impl Display for Cycle {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
// TODO: See note on [`Cycle`] about deriving names.
write!(f, "[...]")
}
}
impl Error for Cycle {}
impl Diagnostic for Cycle {
fn describe(&self) -> Vec<AnnotatedSpan> {
let path = &self.path;
let n = path.len();
let ident = path.last().unwrap();
// TODO: See note on [`Cycle`] about deriving names.
path.iter()
.rev()
.enumerate()
.map(|(i, oi)| {
oi.note(match i {
0 => format!(
"[0/{n}] the cycle begins here, depending on..."
),
// TODO: s/object/<TYPE OF OBJECT>/
_ => {
format!("[{i}/{n}] ...this object, which depends on...")
}
})
})
.chain(once(ident.error(format!(
"[{n}/{n}] ...the object once again, \
creating the cycle"
))))
.collect::<Vec<_>>()
}
}
#[cfg(test)]
mod test;

View File

@ -34,19 +34,29 @@ use Air::*;
fn topo_report_only(
asg: &Asg,
edges: impl Iterator<Item = ObjectIndex<Object>>,
) -> Vec<Result<(ObjectTy, Span), AsgError>> {
) -> Vec<Result<(ObjectTy, Span), Vec<(ObjectTy, Span)>>> {
topo_sort(asg, edges)
.map(|result| {
result
.map(|oi| oi.resolve(asg))
.map(|obj| (obj.ty(), obj.span()))
.map_err(|cycle| {
cycle
.path_rev()
.iter()
// Retain the resolved span from Cycle so that our
// assertions verify that it is being resolved
// correctly.
.map(|oirs| (oirs.oi().resolve(asg).ty(), oirs.span()))
.collect()
})
})
.collect()
}
fn topo_report<I: IntoIterator<Item = Air>>(
toks: I,
) -> Vec<Result<(ObjectTy, Span), AsgError>>
) -> Vec<Result<(ObjectTy, Span), Vec<(ObjectTy, Span)>>>
where
I::IntoIter: Debug,
{
@ -329,3 +339,69 @@ fn sorts_objects_given_multiple_roots() {
topo_report(toks).into_iter().collect(),
);
}
// Most cycles are unsupported by TAME.
// Recovery allows compilation/linking to continue so that additional errors
// can be discovered and reported.
#[test]
fn unsupported_cycles_with_recovery() {
let id_a = SPair("expr_a".into(), S3);
let id_b = SPair("expr_b".into(), S8);
#[rustfmt::skip]
let toks = vec![
PkgStart(S1),
ExprStart(ExprOp::Sum, S2),
BindIdent(id_a), // <----. self-cycle
RefIdent(SPair(id_a.symbol(), S4)), // ____/ \
RefIdent(SPair(id_b.symbol(), S5)), // ---. \ a->b->a
ExprEnd(S6), // ) ) cycle
// / /
ExprStart(ExprOp::Sum, S7), // / /
BindIdent(id_b), // <' /
RefIdent(SPair(id_a.symbol(), S9)), // ----'
ExprEnd(S10),
PkgEnd(S11),
];
use ObjectTy::*;
let m = |a: Span, b: Span| a.merge(b).unwrap();
assert_eq!(
#[rustfmt::skip]
vec![
// Pkg -> Ident (id_a) -> Ref (id_a) gives us a self-cycle.
Err(vec![
(Expr, m(S2, S6)), // -.
(Ident, S3 ), // <' id_a
]),
// RECOVERY: We do not traverse into the cycle and continue as
// if the edge causing the cycle was not taken.
// ...which unfortunately lands us on another cycle caused by
// a->b->a before we can emit the parent Expr.
// TODO: In the future we ought to represent the reference here
// as well.
Err(vec![
(Expr, m(S7, S10)), // -.
(Ident, S8 ), // | id_b
(Expr, m(S2, S6)), // |
(Ident, S3 ), // <' id_a
]),
// RECOVERY: We ignore the edge leading to the cycle,
// which means that id_b Expr has no more dependencies.
Ok((Expr, m(S7, S10))),
Ok((Ident, S8 )),
// And id_a is now also complete,
// since the cycle was the last dependency.
Ok((Expr, m(S2, S6 ))),
Ok((Ident, S3 )),
Ok((Pkg, m(S1, S11))),
],
topo_report(toks).into_iter().collect::<Vec<_>>(),
);
}

View File

@ -70,7 +70,8 @@ pub use graph::{
FragmentText, Ident, IdentKind, Source, TransitionError,
TransitionResult, UnresolvedError,
},
Object, ObjectIndex, ObjectIndexRelTo, ObjectKind,
Object, ObjectIndex, ObjectIndexRelTo, ObjectIndexResolvedSpan,
ObjectKind,
},
visit,
xmli::AsgTreeToXirf,