// Graph abstraction
//
// Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see .
//! Abstract semantic graph.
//!
//! ![Visualization of ASG ontology](../ontviz.svg)
use self::object::{
DynObjectRel, ObjectIndexRelTo, ObjectRelFrom, ObjectRelTy,
ObjectRelatable, Root,
};
use super::{
AsgError, FragmentText, Ident, IdentKind, Object, ObjectIndex, ObjectKind,
Source, TransitionResult,
};
use crate::{
diagnose::{panic::DiagnosticPanic, Annotate, AnnotatedSpan},
f::Functor,
fmt::{DisplayWrapper, TtQuote},
global,
parse::{util::SPair, Token},
span::Span,
sym::SymbolId,
};
use petgraph::{
graph::{DiGraph, Graph, NodeIndex},
visit::EdgeRef,
Direction,
};
use std::{fmt::Debug, result::Result};
pub mod object;
pub mod visit;
pub mod xmli;
use object::{ObjectContainer, ObjectRelTo};
/// Datatype representing node and edge indexes.
pub trait IndexType = petgraph::graph::IndexType;
/// A [`Result`] with a hard-coded [`AsgError`] error type.
///
/// This is the result of every [`Asg`] operation that could potentially
/// fail in error.
pub type AsgResult = Result;
/// The [`ObjectRelTy`] (representing the [`ObjectKind`]) of the source and
/// destination [`Node`]s respectively.
///
/// This small memory expense allows for bidirectional edge filtering
/// and [`ObjectIndex`] [`ObjectKind`] resolution without an extra layer
/// of indirection to look up the source/target [`Node`].
///
/// The edge may also optionally contain a [`Span`] that provides additional
/// context in situations where the distinction between the span of the
/// target object and the span of the _reference_ to that object is
/// important.
type AsgEdge = (ObjectRelTy, ObjectRelTy, Option);
/// Each node of the graph.
type Node = ObjectContainer;
/// Index size for Graph nodes and edges.
type Ix = global::ProgSymSize;
/// An abstract semantic graph (ASG) of [objects](object).
///
/// This implementation is currently based on [`petgraph`].
///
/// Identifiers are cached by name for `O(1)` lookup.
/// Since [`SymbolId`][crate::sym::SymbolId] is used for this purpose,
/// the index may contain more entries than nodes and may contain gaps.
///
/// This IR focuses on the definition and manipulation of objects and their
/// dependencies.
/// See [`Ident`]for a summary of valid identifier object state
/// transitions.
///
/// Objects are never deleted from the graph,
/// so [`ObjectIndex`]s will remain valid for the lifetime of the ASG.
///
/// For more information,
/// see the [module-level documentation][self].
pub struct Asg {
// TODO: private; see `ld::xmle::lower`.
/// Directed graph on which objects are stored.
pub graph: DiGraph,
/// Map of [`SymbolId`][crate::sym::SymbolId] to node indexes.
///
/// This allows for `O(1)` lookup of identifiers in the graph.
/// Note that,
/// while we store [`NodeIndex`] internally,
/// the public API encapsulates it within an [`ObjectIndex`].
index: Vec>,
/// Empty node indicating that no object exists for a given index.
empty_node: NodeIndex,
/// The root node used for reachability analysis and topological
/// sorting.
root_node: NodeIndex,
}
impl Debug for Asg {
/// Trimmed-down Asg [`Debug`] output.
///
/// This primarily hides the large `self.index` that takes up so much
/// space in parser traces,
/// but also hides irrelevant information.
///
/// The better option in the future may be to create a newtype for
/// `index` if it sticks around in its current form,
/// which in turn can encapsulate `self.empty_node`.
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("Asg")
.field("root_node", &self.root_node)
.field("graph", &self.graph)
.finish_non_exhaustive()
}
}
impl Default for Asg {
fn default() -> Self {
Self::new()
}
}
impl Asg {
/// Create a new ASG.
///
/// See also [`with_capacity`](Asg::with_capacity).
pub fn new() -> Self {
// TODO: Determine a proper initial capacity.
Self::with_capacity(0, 0)
}
/// Create an ASG with the provided initial capacity.
///
/// The value for `objects` will be used as the capacity for the nodes
/// in the graph,
/// as well as the initial index capacity.
/// The value for `edges` may be more difficult to consider,
/// since edges are used to represent various relationships between
/// different types of objects,
/// but it's safe to say that each object will have at least one
/// edge to another object.
pub fn with_capacity(objects: usize, edges: usize) -> Self {
let mut graph = Graph::with_capacity(objects, edges);
let mut index = Vec::with_capacity(objects);
// Exhaust the first index to be used as a placeholder
// (its value does not matter).
let empty_node = graph.add_node(Object::Root(Root).into());
index.push(empty_node);
// Automatically add the root which will be used to determine what
// identifiers ought to be retained by the final program.
// This is not indexed and is not accessable by name.
let root_node = graph.add_node(Object::Root(Root).into());
Self {
graph,
index,
empty_node,
root_node,
}
}
/// Get the underlying Graph
pub fn into_inner(self) -> DiGraph {
self.graph
}
/// Index the provided symbol `name` as representing the identifier `node`.
///
/// This index permits `O(1)` identifier lookups.
///
/// After an identifier is indexed it is not expected to be reassigned
/// to another node.
/// Debug builds contain an assertion that will panic in this instance.
///
/// Panics
/// ======
/// Will panic if unable to allocate more space for the index.
fn index_identifier(&mut self, name: SymbolId, node: NodeIndex) {
let i = name.as_usize();
if i >= self.index.len() {
// If this is ever a problem we can fall back to usize max and
// re-compare before panicing
let new_size = (i + 1)
.checked_next_power_of_two()
.expect("internal error: cannot allocate space for ASG index");
self.index.resize(new_size, self.empty_node);
}
// We should never overwrite indexes
debug_assert!(self.index[i] == self.empty_node);
self.index[i] = node;
}
/// Lookup `ident` or add a missing identifier to the graph and return a
/// reference to it.
///
/// The provided span is necessary to seed the missing identifier with
/// some sort of context to aid in debugging why a missing identifier
/// was introduced to the graph.
/// The provided span will be used even if an identifier exists on the
/// graph,
/// which can be used for retaining information on the location that
/// requested the identifier.
/// To retrieve the span of a previously declared identifier,
/// you must resolve the [`Ident`] object and inspect it.
///
/// See [`Ident::declare`] for more information.
pub(super) fn lookup_global_or_missing(
&mut self,
ident: SPair,
) -> ObjectIndex {
self.lookup_global(ident).unwrap_or_else(|| {
let index = self.graph.add_node(Ident::declare(ident).into());
self.index_identifier(ident.symbol(), index);
ObjectIndex::new(index, ident.span())
})
}
/// Perform a state transition on an identifier by name.
///
/// Look up `ident` or add a missing identifier if it does not yet exist
/// (see [`Self::lookup_global_or_missing`]).
/// Then invoke `f` with the located identifier and replace the
/// identifier on the graph with the result.
///
/// This will safely restore graph state to the original identifier
/// value on transition failure.
fn with_ident_lookup_global(
&mut self,
name: SPair,
f: F,
) -> AsgResult>
where
F: FnOnce(Ident) -> TransitionResult,
{
let identi = self.lookup_global_or_missing(name);
self.with_ident(identi, f)
}
/// Perform a state transition on an identifier by [`ObjectIndex`].
///
/// Invoke `f` with the located identifier and replace the identifier on
/// the graph with the result.
///
/// This will safely restore graph state to the original identifier
/// value on transition failure.
fn with_ident(
&mut self,
identi: ObjectIndex,
f: F,
) -> AsgResult>
where
F: FnOnce(Ident) -> TransitionResult,
{
let container = self.graph.node_weight_mut(identi.into()).unwrap();
container
.try_replace_with(f)
.map(|()| identi)
.map_err(Into::into)
}
/// Root object.
///
/// All [`Object`]s reachable from the root will be included in the
/// compilation unit or linked executable.
///
/// The `witness` is used in the returned [`ObjectIndex`] and is
/// intended for diagnostic purposes to highlight the source entity that
/// triggered the request of the root.
pub fn root(&self, witness: Span) -> ObjectIndex {
ObjectIndex::new(self.root_node, witness)
}
/// Add an object as a root.
///
/// Roots are always included during a topological sort and any
/// reachability analysis.
///
/// Ideally,
/// roots would be minimal and dependencies properly organized such
/// that objects will be included if they are a transitive dependency
/// of some included subsystem.
///
/// See also [`IdentKind::is_auto_root`].
pub fn add_root(&mut self, identi: ObjectIndex) {
self.graph.add_edge(
self.root_node,
identi.into(),
(ObjectRelTy::Root, ObjectRelTy::Ident, None),
);
}
/// Whether an object is rooted.
///
/// See [`Asg::add_root`] for more information about roots.
#[cfg(test)]
pub(super) fn is_rooted(&self, identi: ObjectIndex) -> bool {
self.graph.contains_edge(self.root_node, identi.into())
}
/// Declare a concrete identifier.
///
/// An identifier declaration is similar to a declaration in a header
/// file in a language like C,
/// describing the structure of the identifier.
/// Once declared,
/// this information cannot be changed.
///
/// Identifiers are uniquely identified by a [`SymbolId`] `name`.
/// If an identifier of the same `name` already exists,
/// then the provided declaration is compared against the existing
/// declaration---should
/// they be incompatible,
/// then the operation will fail;
/// otherwise,
/// the existing identifier will be returned.
///
/// If a concrete identifier has already been declared (see
/// [`Asg::declare`]),
/// then extern declarations will be compared and,
/// if compatible,
/// the identifier will be immediately _resolved_ and the object
/// on the graph will not be altered.
/// Resolution will otherwise fail in error.
///
/// For more information on state transitions that can occur when
/// redeclaring an identifier that already exists,
/// see [`Ident::resolve`].
///
/// A successful declaration will add an identifier to the graph
/// and return an [`ObjectIndex`] reference.
pub fn declare(
&mut self,
name: SPair,
kind: IdentKind,
src: Source,
) -> AsgResult> {
let is_auto_root = kind.is_auto_root();
self.with_ident_lookup_global(name, |obj| {
obj.resolve(name.span(), kind, src)
})
.map(|node| {
is_auto_root.then(|| self.add_root(node));
node
})
}
/// Declare an abstract identifier.
///
/// An _extern_ declaration declares an identifier the same as
/// [`Asg::declare`],
/// but omits source information.
/// Externs are identifiers that are expected to be defined somewhere
/// else ("externally"),
/// and are resolved at [link-time][crate::ld].
///
/// If a concrete identifier has already been declared (see
/// [`Asg::declare`]),
/// then the declarations will be compared and,
/// if compatible,
/// the identifier will be immediately _resolved_ and the object
/// on the graph will not be altered.
/// Resolution will otherwise fail in error.
///
/// See [`Ident::extern_`] and
/// [`Ident::resolve`] for more information on
/// compatibility related to extern resolution.
pub fn declare_extern(
&mut self,
name: SPair,
kind: IdentKind,
src: Source,
) -> AsgResult> {
self.with_ident_lookup_global(name, |obj| {
obj.extern_(name.span(), kind, src)
})
}
/// Set the fragment associated with a concrete identifier.
///
/// Fragments are intended for use by the [linker][crate::ld].
/// For more information,
/// see [`Ident::set_fragment`].
pub fn set_fragment(
&mut self,
name: SPair,
text: FragmentText,
) -> AsgResult> {
self.with_ident_lookup_global(name, |obj| obj.set_fragment(text))
}
/// Create a new object on the graph.
///
/// The provided [`ObjectIndex`] will be augmented with the span
/// of `obj`.
pub(super) fn create(&mut self, obj: O) -> ObjectIndex {
let o = obj.into();
let span = o.span();
let node_id = self.graph.add_node(ObjectContainer::from(o.into()));
ObjectIndex::new(node_id, span)
}
/// Add an edge from the [`Object`] represented by the
/// [`ObjectIndex`] `from_oi` to the object represented by `to_oi`.
///
/// The edge may optionally contain a _contextual [`Span`]_,
/// in cases where it is important to distinguish between the span
/// associated with the target and the span associated with the
/// _reference_ to the target.
///
/// For more information on how the ASG's ontology is enforced statically,
/// see [`ObjectRelTo`].
fn add_edge(
&mut self,
from_oi: impl ObjectIndexRelTo,
to_oi: ObjectIndex,
ctx_span: Option,
) {
self.graph.add_edge(
from_oi.widen().into(),
to_oi.into(),
(from_oi.src_rel_ty(), OB::rel_ty(), ctx_span),
);
}
/// Retrieve an object from the graph by [`ObjectIndex`].
///
/// Since an [`ObjectIndex`] should only be produced by an [`Asg`],
/// and since objects are never deleted from the graph,
/// this should never fail so long as references are not shared
/// between multiple graphs.
/// It is nevertheless wrapped in an [`Option`] just in case.
#[inline]
pub fn get(&self, index: ObjectIndex) -> Option<&O> {
self.graph
.node_weight(index.into())
.map(ObjectContainer::get)
}
/// Attempt to map over an inner [`Object`] referenced by
/// [`ObjectIndex`].
///
/// The type `O` is the expected type of the [`Object`],
/// which should be known to the caller based on the provied
/// [`ObjectIndex`].
/// This method will attempt to narrow to that object type,
/// panicing if there is a mismatch;
/// see the [`object` module documentation](object) for more
/// information and rationale on this behavior.
///
/// Panics
/// ======
/// This method chooses to simplify the API by choosing panics for
/// situations that ought never to occur and represent significant bugs
/// in the compiler.
/// Those situations are:
///
/// 1. If the provided [`ObjectIndex`] references a node index that is
/// not present on the graph;
/// 2. If the node referenced by [`ObjectIndex`] exists but its container
/// is empty because an object was taken but never returned; and
/// 3. If an object cannot be narrowed (downcast) to type `O`,
/// representing a type mismatch between what the caller thinks
/// this object represents and what the object actually is.
#[must_use = "returned ObjectIndex has a possibly-updated and more relevant span"]
pub(super) fn try_map_obj(
&mut self,
index: ObjectIndex,
f: impl FnOnce(O) -> Result,
) -> Result, E> {
let obj_container =
self.graph.node_weight_mut(index.into()).diagnostic_expect(
|| diagnostic_node_missing_desc(index),
"invalid ObjectIndex: data are missing from the ASG",
);
obj_container
.try_replace_with(f)
.map(|()| index.overwrite(obj_container.get::