tame/tamer/src/asg/graph.rs

// Graph abstraction
//
//  Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
//  This file is part of TAME.
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.

//! Abstract graph as the basis for concrete ASGs.

use super::{
    AsgError, FragmentText, Ident, IdentKind, Object, Source, TransitionResult,
};
use crate::global;
use crate::sym::SymbolId;
use petgraph::graph::{DiGraph, Graph, NodeIndex};
use std::fmt::Debug;
use std::result::Result;

/// Datatype representing node and edge indexes.
pub trait IndexType = petgraph::graph::IndexType;

/// A [`Result`] with a hard-coded [`AsgError`] error type.
///
/// This is the result of every [`Asg`] operation that could potentially
///   fail in error.
pub type AsgResult<T> = Result<T, AsgError>;

/// There are currently no data stored on edges ("edge weights").
pub type AsgEdge = ();

/// Each node of the graph represents an object.
///
/// Enclosed in an [`Option`] to permit moving owned values out of the
///   graph.
pub type Node = Option<Object>;

/// Index size for Graph nodes and edges.
type Ix = global::ProgSymSize;

/// An abstract semantic graph (ASG) of [objects][super::object].
///
/// This implementation is currently based on [`petgraph`].
///
/// Identifiers are cached by name for `O(1)` lookup.
/// Since [`SymbolId`][crate::sym::SymbolId] is used for this purpose,
///   the index may contain more entries than nodes and may contain gaps.
///
/// This IR focuses on the definition and manipulation of objects and their
///   dependencies.
/// See [`Ident`]for a summary of valid identifier object state
///   transitions.
///
/// Objects are never deleted from the graph,
///   so [`ObjectRef`]s will remain valid for the lifetime of the ASG.
///
/// For more information,
///   see the [module-level documentation][self].
#[derive(Debug, Default)]
pub struct Asg {
    // TODO: private; see `ld::xmle::lower`.
    /// Directed graph on which objects are stored.
    pub graph: DiGraph<Node, AsgEdge, Ix>,

    /// Map of [`SymbolId`][crate::sym::SymbolId] to node indexes.
    ///
    /// This allows for `O(1)` lookup of identifiers in the graph.
    /// Note that,
    ///   while we store [`NodeIndex`] internally,
    ///   the public API encapsulates it within an [`ObjectRef`].
    index: Vec<NodeIndex<Ix>>,

    /// Empty node indicating that no object exists for a given index.
    empty_node: NodeIndex<Ix>,

    /// The root node used for reachability analysis and topological
    ///   sorting.
    root_node: NodeIndex<Ix>,
}

impl Asg {
    /// Create a new ASG.
    ///
    /// See also [`with_capacity`](Asg::with_capacity).
    pub fn new() -> Self {
        Self::with_capacity(0, 0)
    }

    /// Create an ASG with the provided initial capacity.
    ///
    /// The value for `objects` will be used as the capacity for the nodes
    ///   in the graph,
    ///     as well as the initial index capacity.
    /// The value for `edges` may be more difficult to consider,
    ///   since edges are used to represent various relationships between
    ///   different types of objects,
    ///     but it's safe to say that each object will have at least one
    ///     edge to another object.
    pub fn with_capacity(objects: usize, edges: usize) -> Self {
        let mut graph = Graph::with_capacity(objects, edges);
        let mut index = Vec::with_capacity(objects);

        // Exhaust the first index to be used as a placeholder.
        let empty_node = graph.add_node(None);
        index.push(empty_node);

        // Automatically add the root which will be used to determine what
        //   identifiers ought to be retained by the final program.
        // This is not indexed and is not accessable by name.
        let root_node = graph.add_node(Some(Object::Root));

        Self {
            graph,
            index,
            empty_node,
            root_node,
        }
    }

    /// Get the underlying Graph
    pub fn into_inner(self) -> DiGraph<Node, AsgEdge, Ix> {
        self.graph
    }

    /// Index the provided symbol `name` as representing the identifier `node`.
    ///
    /// This index permits `O(1)` identifier lookups.
    ///
    /// After an identifier is indexed it is not expected to be reassigned
    ///   to another node.
    /// Debug builds contain an assertion that will panic in this instance.
    ///
    /// Panics
    /// ======
    /// Will panic if unable to allocate more space for the index.
    fn index_identifier(&mut self, name: SymbolId, node: NodeIndex<Ix>) {
        let i = name.as_usize();

        if i >= self.index.len() {
            // If this is ever a problem we can fall back to usize max and
            // re-compare before panicing
            let new_size = (i + 1)
                .checked_next_power_of_two()
                .expect("internal error: cannot allocate space for ASG index");

            self.index.resize(new_size, self.empty_node);
        }

        // We should never overwrite indexes
        debug_assert!(self.index[i] == self.empty_node);

        self.index[i] = node;
    }

    /// Lookup `ident` or add a missing identifier to the graph and return a
    ///   reference to it.
    ///
    /// See [`Ident::declare`] for more information.
    fn lookup_or_missing(&mut self, ident: SymbolId) -> ObjectRef {
        self.lookup(ident).unwrap_or_else(|| {
            let index = self.graph.add_node(Some(Ident::declare(ident).into()));

            self.index_identifier(ident, index);
            ObjectRef::new(index)
        })
    }

    /// Perform a state transition on an identifier by name.
    ///
    /// Look up `ident` or add a missing identifier if it does not yet exist
    ///   (see `lookup_or_missing`).
    /// Then invoke `f` with the located identifier and replace the
    ///   identifier on the graph with the result.
    ///
    /// This will safely restore graph state to the original identifier
    ///   value on transition failure.
    fn with_ident_lookup<F>(
        &mut self,
        name: SymbolId,
        f: F,
    ) -> AsgResult<ObjectRef>
    where
        F: FnOnce(Ident) -> TransitionResult<Ident>,
    {
        let identi = self.lookup_or_missing(name);
        self.with_ident(identi, f)
    }

    /// Perform a state transition on an identifier by [`ObjectRef`].
    ///
    /// Invoke `f` with the located identifier and replace the identifier on
    ///   the graph with the result.
    ///
    /// This will safely restore graph state to the original identifier
    ///   value on transition failure.
    fn with_ident<F>(&mut self, identi: ObjectRef, f: F) -> AsgResult<ObjectRef>
    where
        F: FnOnce(Ident) -> TransitionResult<Ident>,
    {
        let node = self.graph.node_weight_mut(identi.into()).unwrap();

        let obj = node
            .take()
            .expect("internal error: missing object")
            .unwrap_ident();

        f(obj)
            .and_then(|obj| {
                node.replace(obj.into());
                Ok(identi)
            })
            .or_else(|(orig, err)| {
                node.replace(orig.into());
                Err(err.into())
            })
    }

    // TODO: This is transitional;
    //   remove once [`crate::xmlo::asg_builder`] is removed.
    pub fn root(&self) -> NodeIndex<Ix> {
        self.root_node
    }

    /// Add an object as a root.
    ///
    /// Roots are always included during a topological sort and any
    ///   reachability analysis.
    ///
    /// Ideally,
    ///   roots would be minimal and dependencies properly organized such
    ///   that objects will be included if they are a transitive dependency
    ///   of some included subsystem.
    ///
    /// See also [`IdentKind::is_auto_root`].
    pub fn add_root(&mut self, identi: ObjectRef) {
        self.graph
            .add_edge(self.root_node, identi.into(), Default::default());
    }

    /// Declare a concrete identifier.
    ///
    /// An identifier declaration is similar to a declaration in a header
    ///   file in a language like C,
    ///     describing the structure of the identifier.
    /// Once declared,
    ///   this information cannot be changed.
    ///
    /// Identifiers are uniquely identified by a [`SymbolId`] `name`.
    /// If an identifier of the same `name` already exists,
    ///   then the provided declaration is compared against the existing
    ///   declaration---should
    ///     they be incompatible,
    ///       then the operation will fail;
    ///     otherwise,
    ///       the existing identifier will be returned.
    ///
    /// If a concrete identifier has already been declared (see
    ///   [`Asg::declare`]),
    ///     then extern declarations will be compared and,
    ///       if compatible,
    ///       the identifier will be immediately _resolved_ and the object
    ///         on the graph will not be altered.
    /// Resolution will otherwise fail in error.
    ///
    /// For more information on state transitions that can occur when
    ///   redeclaring an identifier that already exists,
    ///     see [`Ident::resolve`].
    ///
    /// A successful declaration will add an identifier to the graph
    ///   and return an [`ObjectRef`] reference.
    pub fn declare(
        &mut self,
        name: SymbolId,
        kind: IdentKind,
        src: Source,
    ) -> AsgResult<ObjectRef> {
        let is_auto_root = kind.is_auto_root();

        self.with_ident_lookup(name, |obj| obj.resolve(kind, src))
            .and_then(|node| {
                is_auto_root.then(|| self.add_root(node));
                Ok(node)
            })
    }

    /// Declare an abstract identifier.
    ///
    /// An _extern_ declaration declares an identifier the same as
    ///   [`Asg::declare`],
    ///     but omits source information.
    /// Externs are identifiers that are expected to be defined somewhere
    ///   else ("externally"),
    ///     and are resolved at [link-time][crate::ld].
    ///
    /// If a concrete identifier has already been declared (see
    ///   [`Asg::declare`]),
    ///     then the declarations will be compared and,
    ///       if compatible,
    ///       the identifier will be immediately _resolved_ and the object
    ///         on the graph will not be altered.
    /// Resolution will otherwise fail in error.
    ///
    /// See [`Ident::extern_`] and
    ///   [`Ident::resolve`] for more information on
    ///   compatibility related to extern resolution.
    pub fn declare_extern(
        &mut self,
        name: SymbolId,
        kind: IdentKind,
        src: Source,
    ) -> AsgResult<ObjectRef> {
        self.with_ident_lookup(name, |obj| obj.extern_(kind, src))
    }

    /// Set the fragment associated with a concrete identifier.
    ///
    /// Fragments are intended for use by the [linker][crate::ld].
    /// For more information,
    ///   see [`Ident::set_fragment`].
    pub fn set_fragment(
        &mut self,
        name: SymbolId,
        text: FragmentText,
    ) -> AsgResult<ObjectRef> {
        self.with_ident_lookup(name, |obj| obj.set_fragment(text))
    }

    /// Retrieve an object from the graph by [`ObjectRef`].
    ///
    /// Since an [`ObjectRef`] should only be produced by an [`Asg`],
    ///   and since objects are never deleted from the graph,
    ///   this should never fail so long as references are not shared
    ///   between multiple graphs.
    /// It is nevertheless wrapped in an [`Option`] just in case.
    #[inline]
    pub fn get<I: Into<ObjectRef>>(&self, index: I) -> Option<&Object> {
        self.graph.node_weight(index.into().into()).map(|node| {
            node.as_ref()
                .expect("internal error: Asg::get missing Node data")
        })
    }

    /// Retrieve an identifier from the graph by [`ObjectRef`].
    ///
    /// If the object exists but is not an identifier,
    ///   [`None`] will be returned.
    #[inline]
    pub fn get_ident<I: Into<ObjectRef>>(&self, index: I) -> Option<&Ident> {
        self.get(index).and_then(Object::as_ident_ref)
    }

    /// Attempt to retrieve an identifier from the graph by name.
    ///
    /// Since only identifiers carry a name,
    ///   this method cannot be used to retrieve all possible objects on the
    ///   graph---for
    ///     that, see [`Asg::get`].
    #[inline]
    pub fn lookup(&self, name: SymbolId) -> Option<ObjectRef> {
        let i = name.as_usize();

        self.index
            .get(i)
            .filter(|ni| ni.index() > 0)
            .map(|ni| ObjectRef::new(*ni))
    }

    /// Declare that `dep` is a dependency of `ident`.
    ///
    /// An object must be declared as a dependency if its value must be
    ///   computed before computing the value of `ident`.
    /// The [linker][crate::ld] will ensure this ordering.
    ///
    /// See [`add_dep_lookup`][Asg::add_dep_lookup] if identifiers have to
    ///   be looked up by [`SymbolId`] or if they may not yet have been
    ///   declared.
    pub fn add_dep(&mut self, identi: ObjectRef, depi: ObjectRef) {
        self.graph
            .update_edge(identi.into(), depi.into(), Default::default());
    }

    /// Check whether `dep` is a dependency of `ident`.
    #[inline]
    pub fn has_dep(&self, ident: ObjectRef, dep: ObjectRef) -> bool {
        self.graph.contains_edge(ident.into(), dep.into())
    }

    /// Declare that `dep` is a dependency of `ident`,
    ///   regardless of whether they are known.
    ///
    /// In contrast to [`add_dep`][Asg::add_dep],
    ///   this method will add the dependency even if one or both of `ident`
    ///   or `dep` have not yet been declared.
    /// In such a case,
    ///   a missing identifier will be added as a placeholder,
    ///     allowing the ASG to be built with partial information as
    ///     identifiers continue to be discovered.
    /// See [`Ident::declare`] for more information.
    ///
    /// References to both identifiers are returned in argument order.
    pub fn add_dep_lookup(
        &mut self,
        ident: SymbolId,
        dep: SymbolId,
    ) -> (ObjectRef, ObjectRef) {
        let identi = self.lookup_or_missing(ident);
        let depi = self.lookup_or_missing(dep);

        self.graph
            .update_edge(identi.into(), depi.into(), Default::default());

        (identi, depi)
    }
}

/// Reference to an [object][super::object] stored within the [`Asg`].
///
/// Ident references are integer offsets,
///   not pointers.
/// See the [module-level documentation][self] for more information.
#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
pub struct ObjectRef(NodeIndex);

impl ObjectRef {
    pub fn new(index: NodeIndex) -> Self {
        Self(index)
    }
}

impl From<NodeIndex> for ObjectRef {
    fn from(index: NodeIndex) -> Self {
        Self(index)
    }
}

impl From<ObjectRef> for NodeIndex {
    fn from(objref: ObjectRef) -> Self {
        objref.0
    }
}

#[cfg(test)]
mod test {
    use super::super::error::AsgError;
    use super::*;
    use crate::num::Dim;
    use crate::sym::GlobalSymbolIntern;
    use std::assert_matches::assert_matches;

    type Sut = Asg;

    #[test]
    fn create_with_capacity() {
        let node_capacity = 100;
        let edge_capacity = 300;
        let sut = Sut::with_capacity(node_capacity, edge_capacity);

        let (nc, ec) = sut.graph.capacity();
        assert!(nc >= node_capacity);
        assert!(ec >= edge_capacity);
        assert!(sut.index.capacity() >= node_capacity);
    }

    #[test]
    fn declare_new_unique_idents() -> AsgResult<()> {
        let mut sut = Sut::new();

        // NB: The index ordering is important!  We first use a larger
        // index to create a gap, and then use an index within that gap
        // to ensure that it's not considered an already-defined
        // identifier.
        let syma = "syma".intern();
        let symb = "symab".intern();

        let nodea = sut.declare(
            syma,
            IdentKind::Meta,
            Source {
                desc: Some("a".into()),
                ..Default::default()
            },
        )?;

        let nodeb = sut.declare(
            symb,
            IdentKind::Worksheet,
            Source {
                desc: Some("b".into()),
                ..Default::default()
            },
        )?;

        assert_ne!(nodea, nodeb);

        let givena = sut.get_ident(nodea).unwrap();
        assert_eq!(syma, givena.name());
        assert_eq!(Some(&IdentKind::Meta), givena.kind());
        assert_eq!(
            Some(&Source {
                desc: Some("a".into()),
                ..Default::default()
            },),
            givena.src()
        );

        let givenb = sut.get_ident(nodeb).unwrap();
        assert_eq!(symb, givenb.name());
        assert_eq!(Some(&IdentKind::Worksheet), givenb.kind());
        assert_eq!(
            Some(&Source {
                desc: Some("b".into()),
                ..Default::default()
            }),
            givenb.src()
        );

        Ok(())
    }

    #[test]
    fn declare_kind_auto_root() -> AsgResult<()> {
        let mut sut = Sut::new();

        let auto_kind = IdentKind::Worksheet;
        // Sanity check, in case this changes.
        assert!(auto_kind.is_auto_root());

        let auto_root_node =
            sut.declare("auto_root".intern(), auto_kind, Default::default())?;

        // Should have been automatically added as a root.
        assert!(sut
            .graph
            .contains_edge(sut.root_node, auto_root_node.into()));

        let no_auto_kind = IdentKind::Tpl;
        assert!(!no_auto_kind.is_auto_root());

        let no_auto_root_node = sut.declare(
            "no_auto_root".intern(),
            no_auto_kind,
            Default::default(),
        )?;

        // Non-auto-roots should _not_ be added as roots automatically.
        assert!(!sut
            .graph
            .contains_edge(sut.root_node, no_auto_root_node.into()));

        Ok(())
    }

    #[test]
    fn lookup_by_symbol() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "lookup".into();
        let node = sut.declare(
            sym,
            IdentKind::Meta,
            Source {
                generated: true,
                ..Default::default()
            },
        )?;

        assert_eq!(Some(node), sut.lookup(sym));

        Ok(())
    }

    #[test]
    fn declare_fails_if_transition_fails() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "symdup".intern();
        let src = Source {
            desc: Some("orig".into()),
            ..Default::default()
        };

        // Set up an object to fail redeclaration.
        let node = sut.declare(sym, IdentKind::Meta, src.clone())?;
        let result = sut.declare(sym, IdentKind::Meta, Source::default());

        assert_matches!(result, Err(AsgError::ObjectTransition(..)));

        // The node should have been restored.
        assert_eq!(Some(&src), sut.get_ident(node).unwrap().src());

        Ok(())
    }

    #[test]
    fn declare_extern_returns_existing() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "symext".intern();
        let src = Source::default();
        let kind = IdentKind::Class(Dim::Matrix);
        let node = sut.declare_extern(sym, kind.clone(), src.clone())?;

        let resrc = Source {
            desc: Some("redeclare".into()),
            ..Default::default()
        };
        let redeclare = sut.declare_extern(sym, kind.clone(), resrc.clone())?;

        assert_eq!(node, redeclare);

        Ok(())
    }

    // Builds upon declare_returns_existing.
    #[test]
    fn declare_extern_fails_if_transition_fails() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "symdup".intern();
        let src = Source {
            desc: Some("orig".into()),
            ..Default::default()
        };

        let node = sut.declare(sym, IdentKind::Meta, src.clone())?;

        // Changes kind, which is invalid.
        let result =
            sut.declare_extern(sym, IdentKind::Worksheet, Source::default());

        assert_matches!(result, Err(AsgError::ObjectTransition(..)));

        // The node should have been restored.
        assert_eq!(Some(&src), sut.get_ident(node).unwrap().src());

        Ok(())
    }

    #[test]
    fn add_fragment_to_ident() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "tofrag".intern();
        let src = Source {
            generated: true,
            ..Default::default()
        };
        let node = sut.declare(sym, IdentKind::Meta, src.clone())?;

        let fragment = "a fragment".intern();
        let node_with_frag = sut.set_fragment(sym, fragment)?;

        // Attaching a fragment should _replace_ the node, not create a
        // new one
        assert_eq!(
            node, node_with_frag,
            "fragment node does not match original node"
        );

        let obj = sut.get_ident(node).unwrap();

        assert_eq!(sym, obj.name());
        assert_eq!(Some(&IdentKind::Meta), obj.kind());
        assert_eq!(Some(&src), obj.src());
        assert_eq!(Some(fragment), obj.fragment());

        Ok(())
    }

    #[test]
    fn add_fragment_to_ident_fails_if_transition_fails() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "failfrag".intern();
        let src = Source {
            generated: true,
            ..Default::default()
        };

        // The failure will come from terr below, not this.
        let node = sut.declare(sym, IdentKind::Meta, src.clone())?;

        // The first set will succeed.
        sut.set_fragment(sym, "".into())?;

        // This will fail.
        let result = sut.set_fragment(sym, "".into());

        // The node should have been restored.
        let obj = sut.get_ident(node).unwrap();

        assert_eq!(sym, obj.name());
        assert_matches!(result, Err(AsgError::ObjectTransition(..)));

        Ok(())
    }

    #[test]
    fn add_ident_dep_to_ident() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "sym".intern();
        let dep = "dep".intern();

        let symnode = sut.declare(sym, IdentKind::Meta, Source::default())?;
        let depnode = sut.declare(dep, IdentKind::Meta, Source::default())?;

        sut.add_dep(symnode, depnode);
        assert!(sut.has_dep(symnode, depnode));

        // sanity check if we re-add a dep
        sut.add_dep(symnode, depnode);
        assert!(sut.has_dep(symnode, depnode));

        Ok(())
    }

    // same as above test
    #[test]
    fn add_dep_lookup_existing() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "sym".intern();
        let dep = "dep".intern();

        let _ = sut.declare(sym, IdentKind::Meta, Source::default())?;
        let _ = sut.declare(dep, IdentKind::Meta, Source::default())?;

        let (symnode, depnode) = sut.add_dep_lookup(sym, dep);
        assert!(sut.has_dep(symnode, depnode));

        Ok(())
    }

    #[test]
    fn add_dep_lookup_missing() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "sym".intern();
        let dep = "dep".intern();

        // both of these are missing
        let (symnode, depnode) = sut.add_dep_lookup(sym, dep);
        assert!(sut.has_dep(symnode, depnode));

        assert_eq!(sym, sut.get_ident(symnode).unwrap().name());
        assert_eq!(dep, sut.get_ident(depnode).unwrap().name());

        Ok(())
    }

    #[test]
    fn declare_return_missing_symbol() -> AsgResult<()> {
        let mut sut = Sut::new();

        let sym = "sym".intern();
        let dep = "dep".intern();

        // both of these are missing, see add_dep_lookup_missing
        let (symnode, _) = sut.add_dep_lookup(sym, dep);

        let src = Source {
            desc: Some("redeclare missing".into()),
            ..Default::default()
        };

        // Check with a declared value
        let declared = sut.declare(sym, IdentKind::Meta, src.clone())?;

        assert_eq!(symnode, declared);

        let obj = sut.get_ident(declared).unwrap();

        assert_eq!(sym, obj.name());
        assert_eq!(Some(&IdentKind::Meta), obj.kind());
        assert_eq!(Some(&src), obj.src());

        Ok(())
    }
}