diff --git a/tamer/src/asg/error.rs b/tamer/src/asg/error.rs index c48cac2e..bfa187ff 100644 --- a/tamer/src/asg/error.rs +++ b/tamer/src/asg/error.rs @@ -31,7 +31,7 @@ use crate::{ span::Span, }; -use super::TransitionError; +use super::{visit::Cycle, TransitionError}; /// An error from an ASG operation. /// @@ -129,6 +129,13 @@ pub enum AsgError { /// The spans represent the expression and the documentation text /// respectively. InvalidDocContextExpr(Span, Span), + + /// A circular dependency was found where it is not permitted. + /// + /// A cycle almost always means that computing the value of an object + /// depends on first having computed itself, + /// which is not possible. + UnsupportedCycle(Cycle), } impl Display for AsgError { @@ -171,6 +178,9 @@ impl Display for AsgError { InvalidDocContextExpr(_, _) => { write!(f, "document text is not permitted within expressions") } + UnsupportedCycle(cycle) => { + write!(f, "circular dependency: {cycle}") + } } } } @@ -290,6 +300,34 @@ impl Diagnostic for AsgError { likely be lifted in the future", ), ], + UnsupportedCycle(cycle) => { + // The cycle description clearly describes the cycle, + // but in neutral terms, + // since cycles may not necessarily be errors. + let mut desc = cycle.describe(); + + // (this will always be non-empty) + if let Some(obj) = cycle.path_rev().last() { + // But in this context, + // this _is_ a problem, + // so make clear why we're pointing this out. + // TODO: Include an identifier name, + // once `Cycle` supports it. + desc.extend( + [ + obj.help( + "the value cannot be computed because its", + ), + obj.help( + " definition requires first computing itself.", + ), + ] + .into_iter(), + ); + } + + desc + } } } } diff --git a/tamer/src/asg/graph/object.rs b/tamer/src/asg/graph/object.rs index 6182cd9c..2ec78418 100644 --- a/tamer/src/asg/graph/object.rs +++ b/tamer/src/asg/graph/object.rs @@ -492,6 +492,9 @@ where /// this is intended to be used to provide diagnostic information in the /// event that the object somehow becomes unavailable for later /// operations. +/// [`Self::resolve_span`] may be used to replace the inner [`Span`] with +/// that of the object that [`Self`] represents, +/// which is useful in a different diagnostic context. /// /// _The span is not accounted for in [`PartialEq`] or [`Hash`]_, /// since it represents the context in which the [`ObjectIndex`] was @@ -625,6 +628,17 @@ impl ObjectIndex { move |oi| oi.resolve(asg) } + /// Replace our associated [`Span`] with the span of the resolved + /// [`Object`]. + /// + /// This is useful to utilize [`ObjectIndex`] in a diagnostic context + /// without having to allocate space for additional metadata. + pub fn resolve_span(self, asg: &Asg) -> ObjectIndexResolvedSpan { + ObjectIndexResolvedSpan( + self.overwrite(self.widen().resolve(asg).span()), + ) + } + /// Resolve the identifier and map over the resulting [`Object`] /// narrowed to [`ObjectKind`] `O`, /// replacing the object on the given [`Asg`]. @@ -875,6 +889,47 @@ impl From> for Span { } } +/// An [`ObjectKind`] whose associated [`Span`] has been resolved. +/// +/// This exists simply to provide proof via the type system that resolution +/// occurred. +/// +/// See [`ObjectIndex::resolve_span`]. +#[derive(Debug, PartialEq, Eq)] +pub struct ObjectIndexResolvedSpan(ObjectIndex); + +impl ObjectIndexResolvedSpan { + pub fn oi(&self) -> ObjectIndex { + (*self).into() + } + + pub fn span(&self) -> Span { + self.oi().span() + } +} + +impl From> for ObjectIndex { + fn from(value: ObjectIndexResolvedSpan) -> Self { + match value { + ObjectIndexResolvedSpan(oi) => oi, + } + } +} + +impl From> for Span { + fn from(value: ObjectIndexResolvedSpan) -> Self { + ObjectIndex::from(value).span() + } +} + +impl Clone for ObjectIndexResolvedSpan { + fn clone(&self) -> Self { + Self(self.0) + } +} + +impl Copy for ObjectIndexResolvedSpan {} + /// A container for an [`Object`] allowing for owned borrowing of data. /// /// The purpose of allowing this owned borrowing is to permit a functional diff --git a/tamer/src/asg/graph/visit.rs b/tamer/src/asg/graph/visit.rs index 4c9d0574..38b51400 100644 --- a/tamer/src/asg/graph/visit.rs +++ b/tamer/src/asg/graph/visit.rs @@ -27,4 +27,4 @@ mod ontree; mod topo; pub use ontree::{tree_reconstruction, Depth, TreePreOrderDfs, TreeWalkRel}; -pub use topo::{topo_sort, TopoPostOrderDfs}; +pub use topo::{topo_sort, Cycle, TopoPostOrderDfs}; diff --git a/tamer/src/asg/graph/visit/topo.rs b/tamer/src/asg/graph/visit/topo.rs index 77544d71..ea9e154e 100644 --- a/tamer/src/asg/graph/visit/topo.rs +++ b/tamer/src/asg/graph/visit/topo.rs @@ -42,8 +42,15 @@ //! to a particular object and cannot be used in that way. use super::super::{Asg, ObjectIndex}; -use crate::asg::{graph::object::DynObjectRel, AsgError, Object}; +use crate::{ + asg::{graph::object::DynObjectRel, Object, ObjectIndexResolvedSpan}, + diagnose::{Annotate, AnnotatedSpan, Diagnostic}, +}; use fixedbitset::FixedBitSet; +use std::{error::Error, fmt::Display, iter::once}; + +#[cfg(doc)] +use crate::span::Span; pub fn topo_sort( asg: &Asg, @@ -54,7 +61,7 @@ pub fn topo_sort( /// Topological sort implemented as a post-order depth-first search (DFS). /// -/// See the [module-level documentation](super) for important information +/// See the [module-level documentation](self) for important information /// about this traversal. pub struct TopoPostOrderDfs<'a> { /// Reference [`Asg`]. @@ -69,10 +76,22 @@ pub struct TopoPostOrderDfs<'a> { /// its relationships (edge targets) are pushed onto the stack. /// Each iterator pops a relationship off the stack and visits it. /// + /// The inner [`Result`] serves as a cycle flag set by + /// [`Self::flag_if_cycle`]. + /// Computing the proper [`Cycle`] error before placing it on the stack + /// would not only bloat the size of each element of this stack, + /// but also use unnecessary memory on the heap. + /// The proper [`Cycle`] error will be computed when this element is + /// retrieved by [`Self::next_oi`]. + /// + /// _This may contain duplicate [`ObjectIndex`]es even if the graph + /// contains no cycles;_ + /// see [`Self::push_neighbors`] for an explanation. + /// /// The traversal ends once the stack becomes empty. /// It is expected the stack is initialized with at least one initial /// object prior to beginning the traversal. - stack: Vec>, + stack: Vec, ObjectIndex>>, /// Objects that have already been added to [`Self::stack`]. /// @@ -118,7 +137,7 @@ impl<'a> TopoPostOrderDfs<'a> { let set_cap = asg.object_count(); let mut stack = Vec::with_capacity(INIT_STACK_CAP); - init.collect_into(&mut stack); + init.map(Ok).collect_into(&mut stack); Self { asg, @@ -127,43 +146,339 @@ impl<'a> TopoPostOrderDfs<'a> { finished: FixedBitSet::with_capacity(set_cap), } } + + /// Push the neighbors of the given [`ObjectIndex`] onto [`Self::stack`] + /// for later processing. + /// + /// Placing neighbors on the stack allows us to yield elements from the + /// iterator without having to keep track of where we are on the graph + /// for each node in the path. + /// + /// When visiting a node for the first time, + /// its neighbors + /// (objects to which `src_oi` has an edge) + /// are pushed onto [`Self::stack`]. + /// It is expected that `src_oi` is left on the stack, + /// ensuring that its neighbors are processed before `src_oi` is, + /// leading to a post-order traversal. + /// + /// Objects that have already been emitted will _not_ be pushed onto the + /// stack; + /// this determination is made by consulting [`Self::finished`]. + /// + /// Each object that is pushed onto the stack will be checked by + /// [`Self::flag_if_cycle`]; + /// see that function for more information. + /// It is important that each cycle be flagged individually, + /// rather than returning an error from this function, + /// otherwise only one cycle per object would be found. + /// + /// Duplicate Stack Entries Without Cycles + /// ====================================== + /// [`Self::stack`] may contain duplicate [`ObjectIndex`]es even if + /// there is no cycle. + /// + /// The reason for this is that a cycle only occurs when an + /// [`ObjectIndex`] is part of the path currently being visited. + /// But [`Self::stack`] contains objects that have _not yet been visited_; + /// they've been placed onto the stack by this method to be visited at + /// a future point. + /// + /// Consider this graph: + /// + /// ```text + /// (A) -> (B) -> (D) + /// '---> (C) <---' + /// ``` + /// + /// A traversal might yield this stack if `C` is visited before `B`: + /// + /// ```text + /// [A] // root + /// [A, C, B] // self.push_neighbors(A) + /// [A, C, B, D] // self.push_neighbors(B) + /// [A, C, B, D, C] // self.push_neighbors(D) + /// ``` + /// + /// Since `C` does not contain an edge _to_ any previous object, + /// there is no cycle. + /// + /// For this reason, + /// it is important for the implementation to check [`Self::finished`] + /// when removing objects from the stack to ensure that they have not + /// already been emitted. + fn push_neighbors(&mut self, src_oi: ObjectIndex) { + self.asg + .edges_dyn(src_oi) + .map(|dyn_oi| *dyn_oi.target()) + .filter(|&oi| !self.finished.contains(oi.into())) + .map(|oi| Self::flag_if_cycle(&self.visited, oi)) + .collect_into(&mut self.stack); + } + + /// Determine if the provided [`ObjectIndex`] would introduce a cycle if + /// appended to the current path and flag it if so. + /// + /// This should be called only after having checked [`Self::finished`], + /// which means that a node is _not_ in the path because it has + /// already been emitted. + /// + /// With [`Self::finished`] having been ruled out, + /// this uses [`Self::visited`] to determine if a node must be part of + /// the active path of the DFS. + /// If so, + /// then introducing it again would produce a cycle. + /// + /// We use [`Result`] where `E` is [`ObjectIndex`] to simply flag the + /// object as containing a cycle; + /// this allows us to defer computation of the cycle and allocation + /// of memory for that path until we actually visit the node on + /// [`Self::stack`]. + /// This allows the element size of [`Self::stack`] to remain small. + /// + /// See [`Self::find_cycle_path`] for the actual cycle computation that + /// will eventually be performed. + fn flag_if_cycle( + visited: &FixedBitSet, + oi: ObjectIndex, + ) -> Result, ObjectIndex> { + if visited.contains(oi.into()) { + Err(oi) + } else { + Ok(oi) + } + } + + /// Attempt to retrieve the next [`ObjectIndex`] from the stack for + /// processing, + /// leaving it on the stack. + /// + /// If the object atop of the stack has been flagged as a cycle by + /// [`Self::flag_if_cycle`], + /// then the actual path associated with the cycle will be computed + /// by [`Self::find_cycle_path`] and an a [`Cycle`] returned. + /// + /// See also [`Self::pop_next_oi`]. + fn next_oi(&self) -> Option, Cycle>> { + self.stack + .last() + .map(|result| result.map_err(|oi| self.find_cycle_path(oi))) + } + + /// Remove an item from [`Self::stack`]. + /// + /// A better API for the future would take ownership over the stack and + /// know for certain that the element being removed is the element + /// previously returned. + /// + /// See also [`Self::next_oi`]. + fn pop_next_oi(&mut self) { + self.stack.pop(); + } + + /// Knowing that the provided [`ObjectIndex`] would produce a cycle if + /// added to the current path, + /// calculate the path representing the cycle. + /// + /// This is a linear-time (`O(n)`) operation that performs a new heap + /// allocation. + /// Since cycles are an error case, + /// it is expected that they will not often occur and so the DFS + /// algorithm is optimized for the most common case; + /// it is not worth computing the path during the course of the + /// search since that path would almost always be discarded. + /// + /// Deriving a path relies on understanding that: + /// + /// 1. An [`ObjectIndex`] in [`Self::stack`] is either awaiting + /// processing or is _currently_ being processed. + /// This means that it contains the path, + /// but it also contains neighbors of objects in the path. + /// We must filter out those neighbors. + /// + /// 2. The [`Result`] in [`Self::stack`] indicates whether the object + /// causes a cycle. + /// A previous object in the path must therefore be [`Ok`], + /// otherwise it would not have been traversed, + /// and so we must filter all [`Err`]s. + /// In doing so, + /// we also filter out `next` at the top of the stack, + /// and so _this function works correctly regardless of whether + /// `next` has already been `pop`'d from the stack_. + /// + /// 3. [`Self::visited`] is set just before neighbors of an object are + /// pushed onto [`Self::stack`]. + /// Therefore, + /// only objects marked as visited are part of the active path, + /// and so to discover that path we need only filter out + /// non-visited objects. + /// + /// 4. [`Self::stack`] contains a path from a provided root. + /// We want to cut off the path at the beginning of the cycle. + /// The easiest way to do this is to iterate through the stack in + /// reverse, + /// stopping as soon as we encounter an [`ObjectIndex`] + /// matching `next`. + /// This has the effect of producing a cycle path in post-order, + /// which is consistent with the ordering of [`Self`]'s + /// traversal. + /// + /// 5. The [`ObjectIndex`]es sourced from the [`Asg`] do not contain + /// the spans of the target objects. + /// Cycles will almost certainly result in diagnostic messages, + /// which require accurate spans, + /// and so we must resolve the [`ObjectIndex`] to retrieve the + /// target [`Span`]. + /// + /// The path produced will therefore be reversed, + /// with `next` as the last element. + /// `next` will _not_ be duplicated as the first element, + /// which means that if you were to repeat the returned path + /// indefinitely end-to-end + /// (e.g. using [`Iterator::cycle`]), + /// you would have precisely this cycle. + /// + /// With all of that said, + /// the implementation is fairly straightforward and concise. + fn find_cycle_path(&self, next: ObjectIndex) -> Cycle { + let mut path = self + .stack + .iter() + .rev() + .copied() + .filter_map(Result::ok) + .take_while(|&oi| oi != next) + .filter(|&oi| self.visited.contains(oi.into())) + .map(|oi| oi.resolve_span(self.asg)) + .collect::>(); + + // We stopped _at_ `next`, + // so we need to manually add it to the path. + path.push(next.resolve_span(self.asg)); + + Cycle { path } + } } impl<'a> Iterator for TopoPostOrderDfs<'a> { - type Item = Result, AsgError>; + type Item = Result, Cycle>; fn next(&mut self) -> Option { // Rust doesn't have guaranteed TCO as of 2023-04 loop { - let next = *self.stack.last()?; + match self.next_oi()? { + Ok(next) => { + if self.visited.put(next.into()) { + self.pop_next_oi(); - if self.visited.put(next.into()) { - self.stack.pop(); // next + // See `Self::push_neighbors` for explanation. + if !self.finished.put(next.into()) { + break Some(Ok(next)); + } + } else { + self.push_neighbors(next); + } + } - if !self.finished.put(next.into()) { - break Some(Ok(next)); - } else { - // Must have been visited by another path. - continue; - }; - } - - self.asg - .edges_dyn(next) - .map(|dyn_oi| *dyn_oi.target()) - .filter(|&oi| { - let finished = self.finished.contains(oi.into()); - - // TODO: - let _is_cycle = - !finished && self.visited.contains(oi.into()); - - !finished - }) - .collect_into(&mut self.stack); + Err(cycle) => { + self.pop_next_oi(); + return Some(Err(cycle)); + } + }; } } } +/// A graph cycle. +/// +/// A cycle means that a path contains a duplicate node, +/// as if it looped back on itself. +/// In terms of TAME, +/// a cycle implies a circular dependency. +/// +/// Identifying Cycle Objects +/// ========================= +/// TODO: Object names need to be derived from the cycle to display +/// concisely to the user. +/// The cycle very likely contains identifiers that can be used to describe +/// the cycle in more concise terms. +/// +/// It used to be the case that cycles contained identifier names, +/// but that was before the topological sort was generalized to include +/// all graph objects; +/// see the commit that introduced this message for more information. +/// +/// TODO: We also ought to represent the spans associated with _references_, +/// _in addition to_ just the referenced object. +#[derive(Debug, PartialEq)] +pub struct Cycle { + /// The path representing the cycle in post-order (reversed). + /// + /// It is expected that [`ObjectIndex`]'s associated [`Span`] has been + /// resolved to that of the target object + /// (e.g. using [`ObjectIndex::resolve_span`]). + /// This allows the indexes to be useful in a diagnostic context. + /// + /// See [`Self::path_rev`] for more information. + path: Vec>, +} + +impl Cycle { + /// The path representing the cycle in post-order (reversed). + /// + /// The path is truncated such that the first node in the path is the + /// beginning of the cycle. + /// The final node in the cycle is omitted, + /// since it is the same as the first; + /// if you repeated this path indefinitely + /// (e.g. with [`Iterator::cycle`]) + /// then you would have precisely the cycle. + /// + /// The [`ObjectIndex`]es should have [`Span`]s that are resolved + /// against the target so that they are useful in a diagnostic + /// context. + pub fn path_rev(&self) -> &Vec> { + &self.path + } +} + +impl Display for Cycle { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + // TODO: See note on [`Cycle`] about deriving names. + write!(f, "[...]") + } +} + +impl Error for Cycle {} + +impl Diagnostic for Cycle { + fn describe(&self) -> Vec { + let path = &self.path; + let n = path.len(); + let ident = path.last().unwrap(); + + // TODO: See note on [`Cycle`] about deriving names. + path.iter() + .rev() + .enumerate() + .map(|(i, oi)| { + oi.note(match i { + 0 => format!( + "[0/{n}] the cycle begins here, depending on..." + ), + // TODO: s/object// + _ => { + format!("[{i}/{n}] ...this object, which depends on...") + } + }) + }) + .chain(once(ident.error(format!( + "[{n}/{n}] ...the object once again, \ + creating the cycle" + )))) + .collect::>() + } +} + #[cfg(test)] mod test; diff --git a/tamer/src/asg/graph/visit/topo/test.rs b/tamer/src/asg/graph/visit/topo/test.rs index 7eb1fc50..a4adba57 100644 --- a/tamer/src/asg/graph/visit/topo/test.rs +++ b/tamer/src/asg/graph/visit/topo/test.rs @@ -34,19 +34,29 @@ use Air::*; fn topo_report_only( asg: &Asg, edges: impl Iterator>, -) -> Vec> { +) -> Vec>> { topo_sort(asg, edges) .map(|result| { result .map(|oi| oi.resolve(asg)) .map(|obj| (obj.ty(), obj.span())) + .map_err(|cycle| { + cycle + .path_rev() + .iter() + // Retain the resolved span from Cycle so that our + // assertions verify that it is being resolved + // correctly. + .map(|oirs| (oirs.oi().resolve(asg).ty(), oirs.span())) + .collect() + }) }) .collect() } fn topo_report>( toks: I, -) -> Vec> +) -> Vec>> where I::IntoIter: Debug, { @@ -329,3 +339,69 @@ fn sorts_objects_given_multiple_roots() { topo_report(toks).into_iter().collect(), ); } + +// Most cycles are unsupported by TAME. +// Recovery allows compilation/linking to continue so that additional errors +// can be discovered and reported. +#[test] +fn unsupported_cycles_with_recovery() { + let id_a = SPair("expr_a".into(), S3); + let id_b = SPair("expr_b".into(), S8); + + #[rustfmt::skip] + let toks = vec![ + PkgStart(S1), + ExprStart(ExprOp::Sum, S2), + BindIdent(id_a), // <----. self-cycle + RefIdent(SPair(id_a.symbol(), S4)), // ____/ \ + RefIdent(SPair(id_b.symbol(), S5)), // ---. \ a->b->a + ExprEnd(S6), // ) ) cycle + // / / + ExprStart(ExprOp::Sum, S7), // / / + BindIdent(id_b), // <' / + RefIdent(SPair(id_a.symbol(), S9)), // ----' + ExprEnd(S10), + PkgEnd(S11), + ]; + + use ObjectTy::*; + let m = |a: Span, b: Span| a.merge(b).unwrap(); + + assert_eq!( + #[rustfmt::skip] + vec![ + // Pkg -> Ident (id_a) -> Ref (id_a) gives us a self-cycle. + Err(vec![ + (Expr, m(S2, S6)), // -. + (Ident, S3 ), // <' id_a + ]), + + // RECOVERY: We do not traverse into the cycle and continue as + // if the edge causing the cycle was not taken. + + // ...which unfortunately lands us on another cycle caused by + // a->b->a before we can emit the parent Expr. + // TODO: In the future we ought to represent the reference here + // as well. + Err(vec![ + (Expr, m(S7, S10)), // -. + (Ident, S8 ), // | id_b + (Expr, m(S2, S6)), // | + (Ident, S3 ), // <' id_a + ]), + + // RECOVERY: We ignore the edge leading to the cycle, + // which means that id_b Expr has no more dependencies. + Ok((Expr, m(S7, S10))), + Ok((Ident, S8 )), + + // And id_a is now also complete, + // since the cycle was the last dependency. + Ok((Expr, m(S2, S6 ))), + Ok((Ident, S3 )), + + Ok((Pkg, m(S1, S11))), + ], + topo_report(toks).into_iter().collect::>(), + ); +} diff --git a/tamer/src/asg/mod.rs b/tamer/src/asg/mod.rs index 985fbf71..444c5b46 100644 --- a/tamer/src/asg/mod.rs +++ b/tamer/src/asg/mod.rs @@ -70,7 +70,8 @@ pub use graph::{ FragmentText, Ident, IdentKind, Source, TransitionError, TransitionResult, UnresolvedError, }, - Object, ObjectIndex, ObjectIndexRelTo, ObjectKind, + Object, ObjectIndex, ObjectIndexRelTo, ObjectIndexResolvedSpan, + ObjectKind, }, visit, xmli::AsgTreeToXirf,