tame/tamer/src/asg/graph.rs

// Graph abstraction
//
//  Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
//  This file is part of TAME.
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.

//! Abstract semantic graph.
//!
//! ![Visualization of ASG ontology](../ontviz.svg)

use self::object::{
    DynObjectRel, ObjectRelFrom, ObjectRelTy, ObjectRelatable, Root,
};

use super::{
    AsgError, FragmentText, Ident, IdentKind, Object, ObjectIndex, ObjectKind,
    Source, TransitionResult,
};
use crate::{
    diagnose::{panic::DiagnosticPanic, Annotate, AnnotatedSpan},
    f::Functor,
    fmt::{DisplayWrapper, TtQuote},
    global,
    parse::{util::SPair, Token},
    span::Span,
    sym::SymbolId,
};
use petgraph::{
    graph::{DiGraph, Graph, NodeIndex},
    visit::EdgeRef,
    Direction,
};
use std::{fmt::Debug, result::Result};

pub mod object;
pub mod visit;
pub mod xmli;

use object::{ObjectContainer, ObjectRelTo};

/// Datatype representing node and edge indexes.
pub trait IndexType = petgraph::graph::IndexType;

/// A [`Result`] with a hard-coded [`AsgError`] error type.
///
/// This is the result of every [`Asg`] operation that could potentially
///   fail in error.
pub type AsgResult<T> = Result<T, AsgError>;

/// The [`ObjectRelTy`] (representing the [`ObjectKind`]) of the source and
///   destination [`Node`]s respectively.
///
/// This small memory expense allows for bidirectional edge filtering
///   and [`ObjectIndex`] [`ObjectKind`] resolution without an extra layer
///   of indirection to look up the source/target [`Node`].
///
/// The edge may also optionally contain a [`Span`] that provides additional
///   context in situations where the distinction between the span of the
///   target object and the span of the _reference_ to that object is
///   important.
type AsgEdge = (ObjectRelTy, ObjectRelTy, Option<Span>);

/// Each node of the graph.
type Node = ObjectContainer;

/// Index size for Graph nodes and edges.
type Ix = global::ProgSymSize;

/// An abstract semantic graph (ASG) of [objects](object).
///
/// This implementation is currently based on [`petgraph`].
///
/// Identifiers are cached by name for `O(1)` lookup.
/// Since [`SymbolId`][crate::sym::SymbolId] is used for this purpose,
///   the index may contain more entries than nodes and may contain gaps.
///
/// This IR focuses on the definition and manipulation of objects and their
///   dependencies.
/// See [`Ident`]for a summary of valid identifier object state
///   transitions.
///
/// Objects are never deleted from the graph,
///   so [`ObjectIndex`]s will remain valid for the lifetime of the ASG.
///
/// For more information,
///   see the [module-level documentation][self].
pub struct Asg {
    // TODO: private; see `ld::xmle::lower`.
    /// Directed graph on which objects are stored.
    pub graph: DiGraph<Node, AsgEdge, Ix>,

    /// Map of [`SymbolId`][crate::sym::SymbolId] to node indexes.
    ///
    /// This allows for `O(1)` lookup of identifiers in the graph.
    /// Note that,
    ///   while we store [`NodeIndex`] internally,
    ///   the public API encapsulates it within an [`ObjectIndex`].
    index: Vec<NodeIndex<Ix>>,

    /// Empty node indicating that no object exists for a given index.
    empty_node: NodeIndex<Ix>,

    /// The root node used for reachability analysis and topological
    ///   sorting.
    root_node: NodeIndex<Ix>,
}

impl Debug for Asg {
    /// Trimmed-down Asg [`Debug`] output.
    ///
    /// This primarily hides the large `self.index` that takes up so much
    ///   space in parser traces,
    ///     but also hides irrelevant information.
    ///
    /// The better option in the future may be to create a newtype for
    ///   `index` if it sticks around in its current form,
    ///     which in turn can encapsulate `self.empty_node`.
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        f.debug_struct("Asg")
            .field("root_node", &self.root_node)
            .field("graph", &self.graph)
            .finish_non_exhaustive()
    }
}

impl Default for Asg {
    fn default() -> Self {
        Self::new()
    }
}

impl Asg {
    /// Create a new ASG.
    ///
    /// See also [`with_capacity`](Asg::with_capacity).
    pub fn new() -> Self {
        // TODO: Determine a proper initial capacity.
        Self::with_capacity(0, 0)
    }

    /// Create an ASG with the provided initial capacity.
    ///
    /// The value for `objects` will be used as the capacity for the nodes
    ///   in the graph,
    ///     as well as the initial index capacity.
    /// The value for `edges` may be more difficult to consider,
    ///   since edges are used to represent various relationships between
    ///   different types of objects,
    ///     but it's safe to say that each object will have at least one
    ///     edge to another object.
    pub fn with_capacity(objects: usize, edges: usize) -> Self {
        let mut graph = Graph::with_capacity(objects, edges);
        let mut index = Vec::with_capacity(objects);

        // Exhaust the first index to be used as a placeholder
        //   (its value does not matter).
        let empty_node = graph.add_node(Object::Root(Root).into());
        index.push(empty_node);

        // Automatically add the root which will be used to determine what
        //   identifiers ought to be retained by the final program.
        // This is not indexed and is not accessable by name.
        let root_node = graph.add_node(Object::Root(Root).into());

        Self {
            graph,
            index,
            empty_node,
            root_node,
        }
    }

    /// Get the underlying Graph
    pub fn into_inner(self) -> DiGraph<Node, AsgEdge, Ix> {
        self.graph
    }

    /// Index the provided symbol `name` as representing the identifier `node`.
    ///
    /// This index permits `O(1)` identifier lookups.
    ///
    /// After an identifier is indexed it is not expected to be reassigned
    ///   to another node.
    /// Debug builds contain an assertion that will panic in this instance.
    ///
    /// Panics
    /// ======
    /// Will panic if unable to allocate more space for the index.
    fn index_identifier(&mut self, name: SymbolId, node: NodeIndex<Ix>) {
        let i = name.as_usize();

        if i >= self.index.len() {
            // If this is ever a problem we can fall back to usize max and
            // re-compare before panicing
            let new_size = (i + 1)
                .checked_next_power_of_two()
                .expect("internal error: cannot allocate space for ASG index");

            self.index.resize(new_size, self.empty_node);
        }

        // We should never overwrite indexes
        debug_assert!(self.index[i] == self.empty_node);

        self.index[i] = node;
    }

    /// Lookup `ident` or add a missing identifier to the graph and return a
    ///   reference to it.
    ///
    /// The provided span is necessary to seed the missing identifier with
    ///   some sort of context to aid in debugging why a missing identifier
    ///   was introduced to the graph.
    /// The provided span will be used even if an identifier exists on the
    ///   graph,
    ///     which can be used for retaining information on the location that
    ///     requested the identifier.
    /// To retrieve the span of a previously declared identifier,
    ///   you must resolve the [`Ident`] object and inspect it.
    ///
    /// See [`Ident::declare`] for more information.
    pub(super) fn lookup_or_missing(
        &mut self,
        ident: SPair,
    ) -> ObjectIndex<Ident> {
        self.lookup(ident).unwrap_or_else(|| {
            let index = self.graph.add_node(Ident::declare(ident).into());

            self.index_identifier(ident.symbol(), index);
            ObjectIndex::new(index, ident.span())
        })
    }

    /// Perform a state transition on an identifier by name.
    ///
    /// Look up `ident` or add a missing identifier if it does not yet exist
    ///   (see [`Self::lookup_or_missing`]).
    /// Then invoke `f` with the located identifier and replace the
    ///   identifier on the graph with the result.
    ///
    /// This will safely restore graph state to the original identifier
    ///   value on transition failure.
    fn with_ident_lookup<F>(
        &mut self,
        name: SPair,
        f: F,
    ) -> AsgResult<ObjectIndex<Ident>>
    where
        F: FnOnce(Ident) -> TransitionResult<Ident>,
    {
        let identi = self.lookup_or_missing(name);
        self.with_ident(identi, f)
    }

    /// Perform a state transition on an identifier by [`ObjectIndex`].
    ///
    /// Invoke `f` with the located identifier and replace the identifier on
    ///   the graph with the result.
    ///
    /// This will safely restore graph state to the original identifier
    ///   value on transition failure.
    fn with_ident<F>(
        &mut self,
        identi: ObjectIndex<Ident>,
        f: F,
    ) -> AsgResult<ObjectIndex<Ident>>
    where
        F: FnOnce(Ident) -> TransitionResult<Ident>,
    {
        let container = self.graph.node_weight_mut(identi.into()).unwrap();

        container
            .try_replace_with(f)
            .map(|()| identi)
            .map_err(Into::into)
    }

    /// Root object.
    ///
    /// All [`Object`]s reachable from the root will be included in the
    ///   compilation unit or linked executable.
    ///
    /// The `witness` is used in the returned [`ObjectIndex`] and is
    ///   intended for diagnostic purposes to highlight the source entity that
    ///   triggered the request of the root.
    pub fn root(&self, witness: Span) -> ObjectIndex<Root> {
        ObjectIndex::new(self.root_node, witness)
    }

    /// Add an object as a root.
    ///
    /// Roots are always included during a topological sort and any
    ///   reachability analysis.
    ///
    /// Ideally,
    ///   roots would be minimal and dependencies properly organized such
    ///   that objects will be included if they are a transitive dependency
    ///   of some included subsystem.
    ///
    /// See also [`IdentKind::is_auto_root`].
    pub fn add_root(&mut self, identi: ObjectIndex<Ident>) {
        self.graph.add_edge(
            self.root_node,
            identi.into(),
            (ObjectRelTy::Root, ObjectRelTy::Ident, None),
        );
    }

    /// Whether an object is rooted.
    ///
    /// See [`Asg::add_root`] for more information about roots.
    #[cfg(test)]
    pub(super) fn is_rooted(&self, identi: ObjectIndex<Ident>) -> bool {
        self.graph.contains_edge(self.root_node, identi.into())
    }

    /// Declare a concrete identifier.
    ///
    /// An identifier declaration is similar to a declaration in a header
    ///   file in a language like C,
    ///     describing the structure of the identifier.
    /// Once declared,
    ///   this information cannot be changed.
    ///
    /// Identifiers are uniquely identified by a [`SymbolId`] `name`.
    /// If an identifier of the same `name` already exists,
    ///   then the provided declaration is compared against the existing
    ///   declaration---should
    ///     they be incompatible,
    ///       then the operation will fail;
    ///     otherwise,
    ///       the existing identifier will be returned.
    ///
    /// If a concrete identifier has already been declared (see
    ///   [`Asg::declare`]),
    ///     then extern declarations will be compared and,
    ///       if compatible,
    ///       the identifier will be immediately _resolved_ and the object
    ///         on the graph will not be altered.
    /// Resolution will otherwise fail in error.
    ///
    /// For more information on state transitions that can occur when
    ///   redeclaring an identifier that already exists,
    ///     see [`Ident::resolve`].
    ///
    /// A successful declaration will add an identifier to the graph
    ///   and return an [`ObjectIndex`] reference.
    pub fn declare(
        &mut self,
        name: SPair,
        kind: IdentKind,
        src: Source,
    ) -> AsgResult<ObjectIndex<Ident>> {
        let is_auto_root = kind.is_auto_root();

        self.with_ident_lookup(name, |obj| obj.resolve(name.span(), kind, src))
            .map(|node| {
                is_auto_root.then(|| self.add_root(node));
                node
            })
    }

    /// Declare an abstract identifier.
    ///
    /// An _extern_ declaration declares an identifier the same as
    ///   [`Asg::declare`],
    ///     but omits source information.
    /// Externs are identifiers that are expected to be defined somewhere
    ///   else ("externally"),
    ///     and are resolved at [link-time][crate::ld].
    ///
    /// If a concrete identifier has already been declared (see
    ///   [`Asg::declare`]),
    ///     then the declarations will be compared and,
    ///       if compatible,
    ///       the identifier will be immediately _resolved_ and the object
    ///         on the graph will not be altered.
    /// Resolution will otherwise fail in error.
    ///
    /// See [`Ident::extern_`] and
    ///   [`Ident::resolve`] for more information on
    ///   compatibility related to extern resolution.
    pub fn declare_extern(
        &mut self,
        name: SPair,
        kind: IdentKind,
        src: Source,
    ) -> AsgResult<ObjectIndex<Ident>> {
        self.with_ident_lookup(name, |obj| obj.extern_(name.span(), kind, src))
    }

    /// Set the fragment associated with a concrete identifier.
    ///
    /// Fragments are intended for use by the [linker][crate::ld].
    /// For more information,
    ///   see [`Ident::set_fragment`].
    pub fn set_fragment(
        &mut self,
        name: SPair,
        text: FragmentText,
    ) -> AsgResult<ObjectIndex<Ident>> {
        self.with_ident_lookup(name, |obj| obj.set_fragment(text))
    }

    /// Create a new object on the graph.
    ///
    /// The provided [`ObjectIndex`] will be augmented with the span
    ///   of `obj`.
    pub(super) fn create<O: ObjectKind>(&mut self, obj: O) -> ObjectIndex<O> {
        let o = obj.into();
        let span = o.span();
        let node_id = self.graph.add_node(ObjectContainer::from(o.into()));

        ObjectIndex::new(node_id, span)
    }

    /// Add an edge from the [`Object`] represented by the
    ///   [`ObjectIndex`] `from_oi` to the object represented by `to_oi`.
    ///
    /// The edge may optionally contain a _contextual [`Span`]_,
    ///   in cases where it is important to distinguish between the span
    ///   associated with the target and the span associated with the
    ///   _reference_ to the target.
    ///
    /// For more information on how the ASG's ontology is enforced statically,
    ///   see [`ObjectRelTo`].
    fn add_edge<OA: ObjectKind, OB: ObjectKind>(
        &mut self,
        from_oi: ObjectIndex<OA>,
        to_oi: ObjectIndex<OB>,
        ctx_span: Option<Span>,
    ) where
        OA: ObjectRelTo<OB>,
    {
        self.graph.add_edge(
            from_oi.into(),
            to_oi.into(),
            (OA::rel_ty(), OB::rel_ty(), ctx_span),
        );
    }

    /// Retrieve an object from the graph by [`ObjectIndex`].
    ///
    /// Since an [`ObjectIndex`] should only be produced by an [`Asg`],
    ///   and since objects are never deleted from the graph,
    ///   this should never fail so long as references are not shared
    ///   between multiple graphs.
    /// It is nevertheless wrapped in an [`Option`] just in case.
    #[inline]
    pub fn get<O: ObjectKind>(&self, index: ObjectIndex<O>) -> Option<&O> {
        self.graph
            .node_weight(index.into())
            .map(ObjectContainer::get)
    }

    /// Attempt to map over an inner [`Object`] referenced by
    ///   [`ObjectIndex`].
    ///
    /// The type `O` is the expected type of the [`Object`],
    ///   which should be known to the caller based on the provied
    ///   [`ObjectIndex`].
    /// This method will attempt to narrow to that object type,
    ///   panicing if there is a mismatch;
    ///     see the [`object` module documentation](object) for more
    ///     information and rationale on this behavior.
    ///
    /// Panics
    /// ======
    /// This method chooses to simplify the API by choosing panics for
    ///   situations that ought never to occur and represent significant bugs
    ///   in the compiler.
    /// Those situations are:
    ///
    ///   1. If the provided [`ObjectIndex`] references a node index that is
    ///        not present on the graph;
    ///   2. If the node referenced by [`ObjectIndex`] exists but its container
    ///        is empty because an object was taken but never returned; and
    ///   3. If an object cannot be narrowed (downcast) to type `O`,
    ///        representing a type mismatch between what the caller thinks
    ///        this object represents and what the object actually is.
    #[must_use = "returned ObjectIndex has a possibly-updated and more relevant span"]
    pub(super) fn try_map_obj<O: ObjectKind, E>(
        &mut self,
        index: ObjectIndex<O>,
        f: impl FnOnce(O) -> Result<O, (O, E)>,
    ) -> Result<ObjectIndex<O>, E> {
        let obj_container =
            self.graph.node_weight_mut(index.into()).diagnostic_expect(
                || diagnostic_node_missing_desc(index),
                "invalid ObjectIndex: data are missing from the ASG",
            );

        obj_container
            .try_replace_with(f)
            .map(|()| index.overwrite(obj_container.get::<Object>().span()))
    }

    /// Create an iterator over the [`ObjectIndex`]es of the outgoing edges
    ///   of `oi`.
    ///
    /// This is a generic method that simply returns an [`ObjectKind`] of
    ///   [`Object`] for each [`ObjectIndex`];
    ///     it is the responsibility of the caller to narrow the type to
    ///     what is intended.
    /// This is sufficient in practice,
    ///   since the graph cannot be constructed without adhering to the edge
    ///   ontology defined by [`ObjectRelTo`],
    ///     but this API is not helpful for catching problems at
    ///     compile-time.
    ///
    /// The reason for providing a generic index to [`Object`] is that it
    ///   allows the caller to determine how strict it wants to be with
    ///   reading from the graph;
    ///     for example,
    ///       it may prefer to filter unwanted objects rather than panicing
    ///       if they do not match a given [`ObjectKind`],
    ///         depending on its ontology.
    fn edges<'a, O: ObjectKind + ObjectRelatable + 'a>(
        &'a self,
        oi: ObjectIndex<O>,
    ) -> impl Iterator<Item = O::Rel> + 'a {
        self.edges_dyn(oi.widen()).map(move |dyn_rel| {
            let target_ty = dyn_rel.target_ty();

            dyn_rel.narrow_target::<O>().diagnostic_unwrap(|| {
                vec![
                    oi.internal_error(format!(
                        "encountered invalid outgoing edge type {:?}",
                        target_ty,
                    )),
                    oi.help(
                        "this means that Asg did not enforce edge invariants \
                            during construction, which is a significant bug",
                    ),
                ]
            })
        })
    }

    /// Create an iterator over the [`ObjectIndex`]es of the outgoing edges
    ///   of `oi` in a dynamic context.
    ///
    /// _This method should be used only when the types of objects cannot be
    ///   statically known,_
    ///     which is generally true only for code paths operating on
    ///     significant portions of
    ///       (or the entirety of)
    ///       the graph without distinction.
    /// See [`Self::edges`] for more information.
    fn edges_dyn<'a>(
        &'a self,
        oi: ObjectIndex<Object>,
    ) -> impl Iterator<Item = DynObjectRel> + 'a {
        self.graph.edges(oi.into()).map(move |edge| {
            let (src_ty, target_ty, ctx_span) = edge.weight();

            DynObjectRel::new(
                *src_ty,
                *target_ty,
                oi,
                ObjectIndex::<Object>::new(edge.target(), oi),
                *ctx_span,
            )
        })
    }

    /// Incoming edges to `oi` filtered by [`ObjectKind`] `OI`.
    ///
    /// The rationale behind the filtering is that objects ought to focus
    ///   primarily on what they _relate to_,
    ///     which is what the ontology is designed around.
    /// If an object cares about what has an edge _to_ it,
    ///   it should have good reason and a specific use case in mind.
    fn incoming_edges_filtered<'a, OI: ObjectKind + ObjectRelatable + 'a>(
        &'a self,
        oi: ObjectIndex<impl ObjectKind + ObjectRelFrom<OI> + 'a>,
    ) -> impl Iterator<Item = ObjectIndex<OI>> + 'a {
        self.graph
            .edges_directed(oi.into(), Direction::Incoming)
            .filter(|edge| edge.weight().0 == OI::rel_ty())
            .map(move |edge| ObjectIndex::<OI>::new(edge.source(), oi))
    }

    /// Retrieve the [`ObjectIndex`] to which the given `ident` is bound,
    ///   if any.
    ///
    /// The type parameter `O` indicates the _expected_ [`ObjectKind`] to be
    ///   bound to the returned [`ObjectIndex`],
    ///     which will be used for narrowing (downcasting) the object after
    ///     lookup.
    /// An incorrect kind will not cause any failures until such a lookup
    ///   occurs.
    ///
    /// This will return [`None`] if the identifier is either opaque or does
    ///   not exist.
    fn get_ident_oi<O: ObjectKind>(
        &self,
        ident: SPair,
    ) -> Option<ObjectIndex<O>> {
        self.lookup(ident)
            .and_then(|identi| {
                self.graph
                    .neighbors_directed(identi.into(), Direction::Outgoing)
                    .next()
            })
            // Note that this use of `O` for `ObjectIndex` here means "I
            //   _expect_ this to `O`";
            //     the type will be verified during narrowing but will panic
            //     if this expectation is not met.
            .map(|ni| ObjectIndex::<O>::new(ni, ident.span()))
    }

    /// Retrieve the [`ObjectIndex`] to which the given `ident` is bound,
    ///   panicing if the identifier is either opaque or does not exist.
    ///
    /// Panics
    /// ======
    /// This method will panic if the identifier is opaque
    ///   (has no edge to the object to which it is bound)
    ///   or does not exist on the graph.
    pub fn expect_ident_oi<O: ObjectKind>(
        &self,
        ident: SPair,
    ) -> ObjectIndex<O> {
        self.get_ident_oi(ident).diagnostic_expect(
            || diagnostic_opaque_ident_desc(ident),
            || {
                format!(
                    "opaque identifier: {} has no object binding",
                    TtQuote::wrap(ident),
                )
            },
        )
    }

    /// Attempt to retrieve the [`Object`] to which the given `ident` is bound.
    ///
    /// If the identifier either does not exist on the graph or is opaque
    ///   (is not bound to any expression),
    ///   then [`None`] will be returned.
    ///
    /// If the system expects that the identifier must exist and would
    ///   otherwise represent a bug in the compiler,
    ///     see [`Self::expect_ident_obj`].
    ///
    /// Panics
    /// ======
    /// This method will panic if certain graph invariants are not met,
    ///   representing an invalid system state that should not be able to
    ///   occur through this API.
    /// Violations of these invariants represent either a bug in the API
    ///   (that allows for the invariant to be violated)
    ///   or direct manipulation of the underlying graph.
    pub fn get_ident_obj<O: ObjectKind>(&self, ident: SPair) -> Option<&O> {
        self.get_ident_oi::<O>(ident).map(|oi| self.expect_obj(oi))
    }

    pub(super) fn expect_obj<O: ObjectKind>(&self, oi: ObjectIndex<O>) -> &O {
        let obj_container =
            self.graph.node_weight(oi.into()).diagnostic_expect(
                || diagnostic_node_missing_desc(oi),
                "invalid ObjectIndex: data are missing from the ASG",
            );

        obj_container.get()
    }

    /// Attempt to retrieve the [`Object`] to which the given `ident` is bound,
    ///   panicing if the identifier is opaque or does not exist.
    ///
    /// This method represents a compiler invariant;
    ///   it should _only_ be used when the identifier _must_ exist,
    ///     otherwise there is a bug in the compiler.
    /// If this is _not_ the case,
    ///   use [`Self::get_ident_obj`] to get [`None`] in place of a panic.
    ///
    /// Panics
    /// ======
    /// This method will panic if
    ///
    ///   1. The identifier does not exist on the graph; or
    ///   2. The identifier is opaque (has no edge to any object on the
    ///        graph).
    pub fn expect_ident_obj<O: ObjectKind>(&self, ident: SPair) -> &O {
        self.get_ident_obj(ident).diagnostic_expect(
            || diagnostic_opaque_ident_desc(ident),
            || {
                format!(
                    "opaque identifier: {} has no object binding",
                    TtQuote::wrap(ident),
                )
            },
        )
    }

    /// Retrieve an identifier from the graph by [`ObjectIndex`].
    ///
    /// If the object exists but is not an identifier,
    ///   [`None`] will be returned.
    #[inline]
    pub fn get_ident(&self, index: ObjectIndex<Ident>) -> Option<&Ident> {
        self.get(index)
    }

    /// Attempt to retrieve an identifier from the graph by name.
    ///
    /// Since only identifiers carry a name,
    ///   this method cannot be used to retrieve all possible objects on the
    ///   graph---for
    ///     that, see [`Asg::get`].
    #[inline]
    pub fn lookup(&self, id: SPair) -> Option<ObjectIndex<Ident>> {
        let i = id.symbol().as_usize();

        self.index
            .get(i)
            .filter(|ni| ni.index() > 0)
            .map(|ni| ObjectIndex::new(*ni, id.span()))
    }

    /// Declare that `dep` is a dependency of `ident`.
    ///
    /// An object must be declared as a dependency if its value must be
    ///   computed before computing the value of `ident`.
    /// The [linker][crate::ld] will ensure this ordering.
    ///
    /// See [`add_dep_lookup`][Asg::add_dep_lookup] if identifiers have to
    ///   be looked up by [`SymbolId`] or if they may not yet have been
    ///   declared.
    pub fn add_dep<O: ObjectKind>(
        &mut self,
        identi: ObjectIndex<Ident>,
        depi: ObjectIndex<O>,
    ) where
        Ident: ObjectRelTo<O>,
    {
        self.graph.update_edge(
            identi.into(),
            depi.into(),
            (Ident::rel_ty(), O::rel_ty(), None),
        );
    }

    /// Check whether `dep` is a dependency of `ident`.
    #[inline]
    pub fn has_dep(
        &self,
        ident: ObjectIndex<Ident>,
        dep: ObjectIndex<Ident>,
    ) -> bool {
        self.graph.contains_edge(ident.into(), dep.into())
    }

    /// Declare that `dep` is a dependency of `ident`,
    ///   regardless of whether they are known.
    ///
    /// In contrast to [`add_dep`][Asg::add_dep],
    ///   this method will add the dependency even if one or both of `ident`
    ///   or `dep` have not yet been declared.
    /// In such a case,
    ///   a missing identifier will be added as a placeholder,
    ///     allowing the ASG to be built with partial information as
    ///     identifiers continue to be discovered.
    /// See [`Ident::declare`] for more information.
    ///
    /// References to both identifiers are returned in argument order.
    pub fn add_dep_lookup(
        &mut self,
        ident: SPair,
        dep: SPair,
    ) -> (ObjectIndex<Ident>, ObjectIndex<Ident>) {
        let identi = self.lookup_or_missing(ident);
        let depi = self.lookup_or_missing(dep);

        self.graph.update_edge(
            identi.into(),
            depi.into(),
            (Ident::rel_ty(), Ident::rel_ty(), None),
        );

        (identi, depi)
    }
}

fn diagnostic_node_missing_desc<O: ObjectKind>(
    index: ObjectIndex<O>,
) -> Vec<AnnotatedSpan<'static>> {
    vec![
        index.internal_error("this object is missing from the ASG"),
        index.help("this means that either an ObjectIndex was malformed, or"),
        index.help("  the object no longer exists on the graph, both of"),
        index.help("  which are unexpected and possibly represent data"),
        index.help("  corruption."),
        index.help("The system cannot proceed with confidence."),
    ]
}

fn diagnostic_opaque_ident_desc(ident: SPair) -> Vec<AnnotatedSpan<'static>> {
    vec![
        ident.internal_error(
            "this identifier is not bound to any object on the ASG",
        ),
        ident.help("the system expects to be able to reach the object that"),
        ident.help("  this identifies, but this identifier has no"),
        ident.help("  corresponding object present on the graph."),
    ]
}

#[cfg(test)]
mod test;
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								// Graph abstraction
 								//
-												Copyright year and name update

Ryan Specialty Group (RSG) rebranded to Ryan Specialty after its IPO.

											
										
										
											2023-01-17 23:09:25 -05:00
+								//  Copyright (C) 2014-2023 Ryan Specialty, LLC.
-												Copyright year 2020 update

											
										
										
											2020-03-06 11:05:18 -05:00
+								//
 								//  This file is part of TAME.
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								//
 								//  This program is free software: you can redistribute it and/or modify
 								//  it under the terms of the GNU General Public License as published by
 								//  the Free Software Foundation, either version 3 of the License, or
 								//  (at your option) any later version.
 								//
 								//  This program is distributed in the hope that it will be useful,
 								//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 								//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								//  GNU General Public License for more details.
 								//
 								//  You should have received a copy of the GNU General Public License
 								//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-												tamer: asg::graph::test: Extract into own file

DEV-13597

											
										
										
											2023-01-17 21:57:50 -05:00
+								//! Abstract semantic graph.
-												tamer: Embed ASG ontology visualization in rustdoc-generated docs

There, in-your-face and not hidden in some tools directory.

DEV-13708

											
										
										
											2023-03-10 14:11:55 -05:00
+								//!
 								//! ![Visualization of ASG ontology](../ontviz.svg)
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								use self::object::{
 								    DynObjectRel, ObjectRelFrom, ObjectRelTy, ObjectRelatable, Root,
 								};
-												tamer: asg::graph: Static- and runtime-enforced multi-kind edge ontolgoy

This allows for edges to be multiple types, and gives us two important
benefits:

  (a) Compiler-verified correctness to ensure that we don't generate graphs
      that do not adhere to the ontology; and
  (b) Runtime verification of types, so that bugs are still memory safe.

There is a lot more information in the documentation within the patch.

This took a lot of iterating to get something that was tolerable.  There's
quite a bit of boilerplate here, and maybe that'll be abstracted away better
in the future as the graph grows.

In particular, it was challenging to determine how I wanted to actually go
about narrowing and looking up edges.  Initially I had hoped to represent
the subsets as `ObjectKind`s as well so that you could use them anywhere
`ObjectKind` was expected, but that proved to be far too difficult because I
cannot return a reference to a subset of `Object` (the value would be owned
on generation).  And while in a language like C maybe I'd pad structures and
cast between them safely, since they _do_ overlap, I can't confidently do
that here since Rust's discriminant and layout are not under my control.

I tried playing around with `std::mem::Discriminant` as well, but
`discriminant` (the function) requires a _value_, meaning I couldn't get the
discriminant of a static `Object` variant without some dummy value; wasn't
worth it over `ObjectRelTy.`  We further can't assign values to enum
variants unless they hold no data.  Rust a decade from now may be different
and will be interesting to look back on this struggle.

DEV-13597

											
										
										
											2023-01-23 11:40:10 -05:00
-												tamer: asg::object: Merge into asg::ident

Everything in this file relates to identifiers, and I'm about to introduce a
higher-level object, one of which may be an identifier.

DEV-11864

											
										
										
											2022-05-19 11:05:20 -04:00
+								use super::{
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    AsgError, FragmentText, Ident, IdentKind, Object, ObjectIndex, ObjectKind,
 								    Source, TransitionResult,
-												tamer: asg::object: Merge into asg::ident

Everything in this file relates to identifiers, and I'm about to introduce a
higher-level object, one of which may be an identifier.

DEV-11864

											
										
										
											2022-05-19 11:05:20 -04:00
+								};
-												tamer: asg::graph::test: Extract into own file

DEV-13597

											
										
										
											2023-01-17 21:57:50 -05:00
+								use crate::{
 								    diagnose::{panic::DiagnosticPanic, Annotate, AnnotatedSpan},
 								    f::Functor,
 								    fmt::{DisplayWrapper, TtQuote},
 								    global,
 								    parse::{util::SPair, Token},
-												tamer: asg: Root package definition

This causes a package definition to be rooted (so that it can be easily
accessed for a graph walk).  This keeps consistent with the new
`ObjectIndex`-based API by introducing a unit `Root` `ObjectKind` and the
boilerplate that goes with it.

This boilerplate, now glaringly obvious, will be refactored at some point,
since its repetition is onerous and distracting.

DEV-13159

											
										
										
											2023-01-31 22:00:51 -05:00
+								    span::Span,
-												tamer: asg::graph::test: Extract into own file

DEV-13597

											
										
										
											2023-01-17 21:57:50 -05:00
+								    sym::SymbolId,
 								};
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								use petgraph::{
 								    graph::{DiGraph, Graph, NodeIndex},
 								    visit::EdgeRef,
 								    Direction,
 								};
-												tamer: asg::graph::test: Extract into own file

DEV-13597

											
										
										
											2023-01-17 21:57:50 -05:00
+								use std::{fmt::Debug, result::Result};
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
-												tamer: asg::object: Move into graph module

The ASG delegates certain operations to Objects so that they may enforce
their own invariants and ontology.  It is therefore important that only
objects have access to certain methods on `Asg`, otherwise those invariants
could be circumvented.

It should be noted that the nesting of this module is such that AIR should
_not_ have privileged access to the ASG---it too must utilize objects to
ensure those invariants are enforced in a single place.

DEV-13597

											
										
										
											2023-01-17 22:58:41 -05:00
+								pub mod object;
-												tamer: asg::graph::visit::tree_reconstruction: New graph traversal

This begins to introduce a graph traversal useful for a source
reconstruction from the current state of the ASG.  The idea is to, after
having parsed and ingested the source through the lowering pipeline, to
re-output it to (a) prove that we have parsed correctly and (b) allow
progressively moving things from the XSLT-based compiler into TAMER.

There's quite a bit of documentation here; see that for more
information.  Generalizing this in an appropriate way took some time, but I
think this makes sense (that work began with the introduction of cross edges
in terms of the tree described by the graph's ontology).  But I do need to
come up with an illustration to include in the documentation.

DEV-13708

											
										
										
											2023-02-07 14:59:36 -05:00
+								pub mod visit;
-												tamer: asg::graph::{object::xir=>xmli}: Rename module

This better reflects what is being done and makes it easier for someone to
find.

DEV-13708

											
										
										
											2023-02-22 23:16:53 -05:00
+								pub mod xmli;
-												tamer: asg::object: Move into graph module

The ASG delegates certain operations to Objects so that they may enforce
their own invariants and ontology.  It is therefore important that only
objects have access to certain methods on `Asg`, otherwise those invariants
could be circumvented.

It should be noted that the nesting of this module is such that AIR should
_not_ have privileged access to the ASG---it too must utilize objects to
ensure those invariants are enforced in a single place.

DEV-13597

											
										
										
											2023-01-17 22:58:41 -05:00
 								use object::{ObjectContainer, ObjectRelTo};
-												[DEV-7084] TAMER: Finish encapsulating petgraph

This will allow us to migrate away from Petgraph in the future should we
choose to do so.

											
										
										
											2020-04-09 11:34:30 -04:00
+								/// Datatype representing node and edge indexes.
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								pub trait IndexType = petgraph::graph::IndexType;
-												[DEV-7084] TAMER: Finish encapsulating petgraph

This will allow us to migrate away from Petgraph in the future should we
choose to do so.

											
										
										
											2020-04-09 11:34:30 -04:00
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								/// A [`Result`] with a hard-coded [`AsgError`] error type.
 								///
 								/// This is the result of every [`Asg`] operation that could potentially
 								///   fail in error.
 								pub type AsgResult<T> = Result<T, AsgError>;
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								/// The [`ObjectRelTy`] (representing the [`ObjectKind`]) of the source and
 								///   destination [`Node`]s respectively.
 								///
 								/// This small memory expense allows for bidirectional edge filtering
 								///   and [`ObjectIndex`] [`ObjectKind`] resolution without an extra layer
 								///   of indirection to look up the source/target [`Node`].
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								///
 								/// The edge may also optionally contain a [`Span`] that provides additional
 								///   context in situations where the distinction between the span of the
 								///   target object and the span of the _reference_ to that object is
 								///   important.
 								type AsgEdge = (ObjectRelTy, ObjectRelTy, Option<Span>);
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
-												tamer: asg: New ObjectContainer for Node type

Working with the graph can be confusing with all of the layers
involved.  This begins to provide a better layer of abstraction that can
encapsulate the concept and enforce invariants.

Since I'm better able to enforce invariants now, this also removes the span
from the diagnostic message, since the invariant is now always enforced with
certainty.  I'm not removing the runtime panic, though; we can revisit that
if future profiling shows that it makes a negative impact.

DEV-13160

											
										
										
											2023-01-10 15:06:24 -05:00
+								/// Each node of the graph.
-												tamer: Remove graphml target

This was originally created to populate Neo4J for querying, but it has not
been utilized.  It's become a maintenance burden as I try to change the API
of and encapsulate the graph, which is important for upholding its
invariants.

This feature, or one like it, will return in the future.  I have other
related plans; we'll see if they materialize.

The graph can't be encapsulated fully just yet because of the linker; those
commits will come in the following days.

DEV-13597

											
										
										
											2023-01-23 13:35:14 -05:00
+								type Node = ObjectContainer;
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
 								/// Index size for Graph nodes and edges.
 								type Ix = global::ProgSymSize;
-												tamer: asg::object: Move into graph module

The ASG delegates certain operations to Objects so that they may enforce
their own invariants and ontology.  It is therefore important that only
objects have access to certain methods on `Asg`, otherwise those invariants
could be circumvented.

It should be noted that the nesting of this module is such that AIR should
_not_ have privileged access to the ASG---it too must utilize objects to
ensure those invariants are enforced in a single place.

DEV-13597

											
										
										
											2023-01-17 22:58:41 -05:00
+								/// An abstract semantic graph (ASG) of [objects](object).
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								///
 								/// This implementation is currently based on [`petgraph`].
 								///
 								/// Identifiers are cached by name for `O(1)` lookup.
 								/// Since [`SymbolId`][crate::sym::SymbolId] is used for this purpose,
 								///   the index may contain more entries than nodes and may contain gaps.
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								///
 								/// This IR focuses on the definition and manipulation of objects and their
 								///   dependencies.
-												tamer: asg::Ident{Object=>}: Rename

I think this may have been renamed _from_ `Ident` some time ago, but I'm too
lazy to check.  In any case, the name is redundant.

DEV-11864

											
										
										
											2022-05-19 11:17:04 -04:00
+								/// See [`Ident`]for a summary of valid identifier object state
-												tamer: asg::object: Merge into asg::ident

Everything in this file relates to identifiers, and I'm about to introduce a
higher-level object, one of which may be an identifier.

DEV-11864

											
										
										
											2022-05-19 11:05:20 -04:00
+								///   transitions.
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								///
 								/// Objects are never deleted from the graph,
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								///   so [`ObjectIndex`]s will remain valid for the lifetime of the ASG.
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								///
 								/// For more information,
 								///   see the [module-level documentation][self].
-												tamer: asg::graph::Asg: Remove type parameter O

This removes the generic on the Asg (which was formerly BaseAsg),
hard-coding `IdentObject`, which will further evolve.  This makes the IR an
actual concrete IR rather than an abstract data structure.

These tests bring me back a bit, since they were written as I was still
becoming familiar with Rust.

DEV-11864

											
										
										
											2022-05-12 15:44:32 -04:00
+								pub struct Asg {
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    // TODO: private; see `ld::xmle::lower`.
 								    /// Directed graph on which objects are stored.
-												tamer: asg::graph::Asg: Remove type parameter O

This removes the generic on the Asg (which was formerly BaseAsg),
hard-coding `IdentObject`, which will further evolve.  This makes the IR an
actual concrete IR rather than an abstract data structure.

These tests bring me back a bit, since they were written as I was still
becoming familiar with Rust.

DEV-11864

											
										
										
											2022-05-12 15:44:32 -04:00
+								    pub graph: DiGraph<Node, AsgEdge, Ix>,
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
 								    /// Map of [`SymbolId`][crate::sym::SymbolId] to node indexes.
 								    ///
 								    /// This allows for `O(1)` lookup of identifiers in the graph.
 								    /// Note that,
 								    ///   while we store [`NodeIndex`] internally,
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ///   the public API encapsulates it within an [`ObjectIndex`].
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								    index: Vec<NodeIndex<Ix>>,
 								    /// Empty node indicating that no object exists for a given index.
 								    empty_node: NodeIndex<Ix>,
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
 								    /// The root node used for reachability analysis and topological
 								    ///   sorting.
 								    root_node: NodeIndex<Ix>,
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								}
-												tamer: asg::graph::Asg: Non-exhaustive Debug impl

This hides information that's taking up a lot of space in the parser traces
and is not useful information.  In particular, the `index` contains a lot of
empty space due to pre-interned symbols.

The index was going to be converted into a HashMap, but that was reverted
because the tradeoff did not make sense, and so this problem remains; see
the previous commit for more information.

DEV-13159

											
										
										
											2023-01-27 10:22:54 -05:00
+								impl Debug for Asg {
 								    /// Trimmed-down Asg [`Debug`] output.
 								    ///
 								    /// This primarily hides the large `self.index` that takes up so much
 								    ///   space in parser traces,
 								    ///     but also hides irrelevant information.
 								    ///
 								    /// The better option in the future may be to create a newtype for
 								    ///   `index` if it sticks around in its current form,
 								    ///     which in turn can encapsulate `self.empty_node`.
 								    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 								        f.debug_struct("Asg")
 								            .field("root_node", &self.root_node)
 								            .field("graph", &self.graph)
 								            .finish_non_exhaustive()
 								    }
 								}
-												tamer: Refactor asg_builder into obj::xmlo::lower and asg::air

This finally uses `parse` all the way up to aggregation into the ASG, as can
be seen by the mess in `poc`.  This will be further simplified---I just need
to get this committed so that I can mentally get it off my plate.  I've been
separating this commit into smaller commits, but there's a point where it's
just not worth the effort anymore.  I don't like making large changes such
as this one.

There is still work to do here.  First, it's worth re-mentioning that
`poc` means "proof-of-concept", and represents things that still need a
proper home/abstraction.

Secondly, `poc` is retrieving the context of two parsers---`LowerContext`
and `Asg`.  The latter is desirable, since it's the final aggregation point,
but the former needs to be eliminated; in particular, packages need to be
worked into the ASG so that `found` can be removed.

Recursively loading `xmlo` files still happens in `poc`, but the compiler
will need this as well.  Once packages are on the ASG, along with their
state, that responsibility can be generalized as well.

That will then simplify lowering even further, to the point where hopefully
everything has the same shape (once final aggregation has an abstraction),
after which we can then create a final abstraction to concisely stitch
everything together.  Right now, Rust isn't able to infer `S` for
`Lower<S, LS>`, which is unfortunate, but we'll be able to help it along
with a more explicit abstraction.

DEV-11864

											
										
										
											2022-05-27 13:51:29 -04:00
+								impl Default for Asg {
 								    fn default() -> Self {
 								        Self::new()
 								    }
 								}
-												tamer: asg::graph::Asg: Remove type parameter O

This removes the generic on the Asg (which was formerly BaseAsg),
hard-coding `IdentObject`, which will further evolve.  This makes the IR an
actual concrete IR rather than an abstract data structure.

These tests bring me back a bit, since they were written as I was still
becoming familiar with Rust.

DEV-11864

											
										
										
											2022-05-12 15:44:32 -04:00
+								impl Asg {
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    /// Create a new ASG.
 								    ///
 								    /// See also [`with_capacity`](Asg::with_capacity).
 								    pub fn new() -> Self {
-												tamer: Refactor asg_builder into obj::xmlo::lower and asg::air

This finally uses `parse` all the way up to aggregation into the ASG, as can
be seen by the mess in `poc`.  This will be further simplified---I just need
to get this committed so that I can mentally get it off my plate.  I've been
separating this commit into smaller commits, but there's a point where it's
just not worth the effort anymore.  I don't like making large changes such
as this one.

There is still work to do here.  First, it's worth re-mentioning that
`poc` means "proof-of-concept", and represents things that still need a
proper home/abstraction.

Secondly, `poc` is retrieving the context of two parsers---`LowerContext`
and `Asg`.  The latter is desirable, since it's the final aggregation point,
but the former needs to be eliminated; in particular, packages need to be
worked into the ASG so that `found` can be removed.

Recursively loading `xmlo` files still happens in `poc`, but the compiler
will need this as well.  Once packages are on the ASG, along with their
state, that responsibility can be generalized as well.

That will then simplify lowering even further, to the point where hopefully
everything has the same shape (once final aggregation has an abstraction),
after which we can then create a final abstraction to concisely stitch
everything together.  Right now, Rust isn't able to infer `S` for
`Lower<S, LS>`, which is unfortunate, but we'll be able to help it along
with a more explicit abstraction.

DEV-11864

											
										
										
											2022-05-27 13:51:29 -04:00
+								        // TODO: Determine a proper initial capacity.
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        Self::with_capacity(0, 0)
 								    }
 								    /// Create an ASG with the provided initial capacity.
 								    ///
 								    /// The value for `objects` will be used as the capacity for the nodes
 								    ///   in the graph,
 								    ///     as well as the initial index capacity.
 								    /// The value for `edges` may be more difficult to consider,
 								    ///   since edges are used to represent various relationships between
 								    ///   different types of objects,
 								    ///     but it's safe to say that each object will have at least one
 								    ///     edge to another object.
 								    pub fn with_capacity(objects: usize, edges: usize) -> Self {
 								        let mut graph = Graph::with_capacity(objects, edges);
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								        let mut index = Vec::with_capacity(objects);
 								        // Exhaust the first index to be used as a placeholder
 								        //   (its value does not matter).
-												tamer: asg: Root package definition

This causes a package definition to be rooted (so that it can be easily
accessed for a graph walk).  This keeps consistent with the new
`ObjectIndex`-based API by introducing a unit `Root` `ObjectKind` and the
boilerplate that goes with it.

This boilerplate, now glaringly obvious, will be refactored at some point,
since its repetition is onerous and distracting.

DEV-13159

											
										
										
											2023-01-31 22:00:51 -05:00
+								        let empty_node = graph.add_node(Object::Root(Root).into());
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								        index.push(empty_node);
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
+								        // Automatically add the root which will be used to determine what
 								        //   identifiers ought to be retained by the final program.
 								        // This is not indexed and is not accessable by name.
-												tamer: asg: Root package definition

This causes a package definition to be rooted (so that it can be easily
accessed for a graph walk).  This keeps consistent with the new
`ObjectIndex`-based API by introducing a unit `Root` `ObjectKind` and the
boilerplate that goes with it.

This boilerplate, now glaringly obvious, will be refactored at some point,
since its repetition is onerous and distracting.

DEV-13159

											
										
										
											2023-01-31 22:00:51 -05:00
+								        let root_node = graph.add_node(Object::Root(Root).into());
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        Self {
 								            graph,
 								            index,
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								            empty_node,
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
+								            root_node,
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        }
 								    }
 								    /// Get the underlying Graph
-												tamer: asg::graph::Asg: Remove type parameter O

This removes the generic on the Asg (which was formerly BaseAsg),
hard-coding `IdentObject`, which will further evolve.  This makes the IR an
actual concrete IR rather than an abstract data structure.

These tests bring me back a bit, since they were written as I was still
becoming familiar with Rust.

DEV-11864

											
										
										
											2022-05-12 15:44:32 -04:00
+								    pub fn into_inner(self) -> DiGraph<Node, AsgEdge, Ix> {
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        self.graph
 								    }
 								    /// Index the provided symbol `name` as representing the identifier `node`.
 								    ///
 								    /// This index permits `O(1)` identifier lookups.
 								    ///
 								    /// After an identifier is indexed it is not expected to be reassigned
 								    ///   to another node.
 								    /// Debug builds contain an assertion that will panic in this instance.
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								    ///
 								    /// Panics
 								    /// ======
 								    /// Will panic if unable to allocate more space for the index.
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    fn index_identifier(&mut self, name: SymbolId, node: NodeIndex<Ix>) {
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								        let i = name.as_usize();
 								        if i >= self.index.len() {
 								            // If this is ever a problem we can fall back to usize max and
 								            // re-compare before panicing
 								            let new_size = (i + 1)
 								                .checked_next_power_of_two()
 								                .expect("internal error: cannot allocate space for ASG index");
 								            self.index.resize(new_size, self.empty_node);
 								        }
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
 								        // We should never overwrite indexes
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								        debug_assert!(self.index[i] == self.empty_node);
 								        self.index[i] = node;
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    }
 								    /// Lookup `ident` or add a missing identifier to the graph and return a
 								    ///   reference to it.
 								    ///
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								    /// The provided span is necessary to seed the missing identifier with
 								    ///   some sort of context to aid in debugging why a missing identifier
 								    ///   was introduced to the graph.
-												tamer: asg: Bind transparent ident

This provides the initial implementation allowing an identifier to be
defined (bound to an object and made transparent).

I'm not yet entirely sure whether I'll stick with the "transparent" and
"opaque" terminology when there's also "declare" and "define", but a
`Missing` state is a type of declaration and so the distinction does still
seem to be important.

There is still work to be done on `ObjectIndex::<Ident>::bind_definition`,
which will follow.  I'm going to be balancing work to provide type-level
guarantees, since I don't have the time to go as far as I'd like.

DEV-13597

											
										
										
											2023-01-17 16:31:13 -05:00
+								    /// The provided span will be used even if an identifier exists on the
 								    ///   graph,
 								    ///     which can be used for retaining information on the location that
 								    ///     requested the identifier.
 								    /// To retrieve the span of a previously declared identifier,
 								    ///   you must resolve the [`Ident`] object and inspect it.
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								    ///
-												tamer: asg::Ident{Object=>}: Rename

I think this may have been renamed _from_ `Ident` some time ago, but I'm too
lazy to check.  In any case, the name is redundant.

DEV-11864

											
										
										
											2022-05-19 11:17:04 -04:00
+								    /// See [`Ident::declare`] for more information.
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    pub(super) fn lookup_or_missing(
 								        &mut self,
 								        ident: SPair,
 								    ) -> ObjectIndex<Ident> {
-												tamer: asg::Asg::lookup: SymbolId=>SPair

This seems to have been an oversight from when I recently introduced SPairs
to ASG; I noticed it while working on another change and receiving back a
`DUMMY_SPAN`.

DEV-13597

											
										
										
											2023-01-17 14:42:43 -05:00
+								        self.lookup(ident).unwrap_or_else(|| {
-												tamer: asg: New ObjectContainer for Node type

Working with the graph can be confusing with all of the layers
involved.  This begins to provide a better layer of abstraction that can
encapsulate the concept and enforce invariants.

Since I'm better able to enforce invariants now, this also removes the span
from the diagnostic message, since the invariant is now always enforced with
certainty.  I'm not removing the runtime panic, though; we can revisit that
if future profiling shows that it makes a negative impact.

DEV-13160

											
										
										
											2023-01-10 15:06:24 -05:00
+								            let index = self.graph.add_node(Ident::declare(ident).into());
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
-												tamer: asg::Asg::lookup: SymbolId=>SPair

This seems to have been an oversight from when I recently introduced SPairs
to ASG; I noticed it while working on another change and receiving back a
`DUMMY_SPAN`.

DEV-13597

											
										
										
											2023-01-17 14:42:43 -05:00
+								            self.index_identifier(ident.symbol(), index);
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								            ObjectIndex::new(index, ident.span())
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        })
 								    }
 								    /// Perform a state transition on an identifier by name.
 								    ///
 								    /// Look up `ident` or add a missing identifier if it does not yet exist
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								    ///   (see [`Self::lookup_or_missing`]).
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    /// Then invoke `f` with the located identifier and replace the
 								    ///   identifier on the graph with the result.
 								    ///
 								    /// This will safely restore graph state to the original identifier
 								    ///   value on transition failure.
 								    fn with_ident_lookup<F>(
 								        &mut self,
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								        name: SPair,
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        f: F,
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ) -> AsgResult<ObjectIndex<Ident>>
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    where
-												tamer: asg::Ident{Object=>}: Rename

I think this may have been renamed _from_ `Ident` some time ago, but I'm too
lazy to check.  In any case, the name is redundant.

DEV-11864

											
										
										
											2022-05-19 11:17:04 -04:00
+								        F: FnOnce(Ident) -> TransitionResult<Ident>,
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    {
 								        let identi = self.lookup_or_missing(name);
 								        self.with_ident(identi, f)
 								    }
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    /// Perform a state transition on an identifier by [`ObjectIndex`].
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    ///
 								    /// Invoke `f` with the located identifier and replace the identifier on
 								    ///   the graph with the result.
 								    ///
 								    /// This will safely restore graph state to the original identifier
 								    ///   value on transition failure.
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    fn with_ident<F>(
 								        &mut self,
 								        identi: ObjectIndex<Ident>,
 								        f: F,
 								    ) -> AsgResult<ObjectIndex<Ident>>
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    where
-												tamer: asg::Ident{Object=>}: Rename

I think this may have been renamed _from_ `Ident` some time ago, but I'm too
lazy to check.  In any case, the name is redundant.

DEV-11864

											
										
										
											2022-05-19 11:17:04 -04:00
+								        F: FnOnce(Ident) -> TransitionResult<Ident>,
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    {
-												tamer: asg: New ObjectContainer for Node type

Working with the graph can be confusing with all of the layers
involved.  This begins to provide a better layer of abstraction that can
encapsulate the concept and enforce invariants.

Since I'm better able to enforce invariants now, this also removes the span
from the diagnostic message, since the invariant is now always enforced with
certainty.  I'm not removing the runtime panic, though; we can revisit that
if future profiling shows that it makes a negative impact.

DEV-13160

											
										
										
											2023-01-10 15:06:24 -05:00
+								        let container = self.graph.node_weight_mut(identi.into()).unwrap();
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
-												tamer: asg: New ObjectContainer for Node type

Working with the graph can be confusing with all of the layers
involved.  This begins to provide a better layer of abstraction that can
encapsulate the concept and enforce invariants.

Since I'm better able to enforce invariants now, this also removes the span
from the diagnostic message, since the invariant is now always enforced with
certainty.  I'm not removing the runtime panic, though; we can revisit that
if future profiling shows that it makes a negative impact.

DEV-13160

											
										
										
											2023-01-10 15:06:24 -05:00
+								        container
 								            .try_replace_with(f)
 								            .map(|()| identi)
 								            .map_err(Into::into)
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    }
-												tamer: asg: Root package definition

This causes a package definition to be rooted (so that it can be easily
accessed for a graph walk).  This keeps consistent with the new
`ObjectIndex`-based API by introducing a unit `Root` `ObjectKind` and the
boilerplate that goes with it.

This boilerplate, now glaringly obvious, will be refactored at some point,
since its repetition is onerous and distracting.

DEV-13159

											
										
										
											2023-01-31 22:00:51 -05:00
+								    /// Root object.
 								    ///
 								    /// All [`Object`]s reachable from the root will be included in the
 								    ///   compilation unit or linked executable.
 								    ///
 								    /// The `witness` is used in the returned [`ObjectIndex`] and is
 								    ///   intended for diagnostic purposes to highlight the source entity that
 								    ///   triggered the request of the root.
 								    pub fn root(&self, witness: Span) -> ObjectIndex<Root> {
 								        ObjectIndex::new(self.root_node, witness)
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
+								    }
 								    /// Add an object as a root.
 								    ///
 								    /// Roots are always included during a topological sort and any
 								    ///   reachability analysis.
 								    ///
 								    /// Ideally,
 								    ///   roots would be minimal and dependencies properly organized such
 								    ///   that objects will be included if they are a transitive dependency
 								    ///   of some included subsystem.
 								    ///
 								    /// See also [`IdentKind::is_auto_root`].
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    pub fn add_root(&mut self, identi: ObjectIndex<Ident>) {
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								        self.graph.add_edge(
 								            self.root_node,
 								            identi.into(),
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								            (ObjectRelTy::Root, ObjectRelTy::Ident, None),
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								        );
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
+								    }
-												tamer: Refactor asg_builder into obj::xmlo::lower and asg::air

This finally uses `parse` all the way up to aggregation into the ASG, as can
be seen by the mess in `poc`.  This will be further simplified---I just need
to get this committed so that I can mentally get it off my plate.  I've been
separating this commit into smaller commits, but there's a point where it's
just not worth the effort anymore.  I don't like making large changes such
as this one.

There is still work to do here.  First, it's worth re-mentioning that
`poc` means "proof-of-concept", and represents things that still need a
proper home/abstraction.

Secondly, `poc` is retrieving the context of two parsers---`LowerContext`
and `Asg`.  The latter is desirable, since it's the final aggregation point,
but the former needs to be eliminated; in particular, packages need to be
worked into the ASG so that `found` can be removed.

Recursively loading `xmlo` files still happens in `poc`, but the compiler
will need this as well.  Once packages are on the ASG, along with their
state, that responsibility can be generalized as well.

That will then simplify lowering even further, to the point where hopefully
everything has the same shape (once final aggregation has an abstraction),
after which we can then create a final abstraction to concisely stitch
everything together.  Right now, Rust isn't able to infer `S` for
`Lower<S, LS>`, which is unfortunate, but we'll be able to help it along
with a more explicit abstraction.

DEV-11864

											
										
										
											2022-05-27 13:51:29 -04:00
+								    /// Whether an object is rooted.
 								    ///
 								    /// See [`Asg::add_root`] for more information about roots.
 								    #[cfg(test)]
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    pub(super) fn is_rooted(&self, identi: ObjectIndex<Ident>) -> bool {
-												tamer: Refactor asg_builder into obj::xmlo::lower and asg::air

This finally uses `parse` all the way up to aggregation into the ASG, as can
be seen by the mess in `poc`.  This will be further simplified---I just need
to get this committed so that I can mentally get it off my plate.  I've been
separating this commit into smaller commits, but there's a point where it's
just not worth the effort anymore.  I don't like making large changes such
as this one.

There is still work to do here.  First, it's worth re-mentioning that
`poc` means "proof-of-concept", and represents things that still need a
proper home/abstraction.

Secondly, `poc` is retrieving the context of two parsers---`LowerContext`
and `Asg`.  The latter is desirable, since it's the final aggregation point,
but the former needs to be eliminated; in particular, packages need to be
worked into the ASG so that `found` can be removed.

Recursively loading `xmlo` files still happens in `poc`, but the compiler
will need this as well.  Once packages are on the ASG, along with their
state, that responsibility can be generalized as well.

That will then simplify lowering even further, to the point where hopefully
everything has the same shape (once final aggregation has an abstraction),
after which we can then create a final abstraction to concisely stitch
everything together.  Right now, Rust isn't able to infer `S` for
`Lower<S, LS>`, which is unfortunate, but we'll be able to help it along
with a more explicit abstraction.

DEV-11864

											
										
										
											2022-05-27 13:51:29 -04:00
+								        self.graph.contains_edge(self.root_node, identi.into())
 								    }
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								    /// Declare a concrete identifier.
 								    ///
 								    /// An identifier declaration is similar to a declaration in a header
 								    ///   file in a language like C,
 								    ///     describing the structure of the identifier.
 								    /// Once declared,
 								    ///   this information cannot be changed.
 								    ///
-												tamer: Global interners

This is a major change, and I apologize for it all being in one commit.  I
had wanted to break it up, but doing so would have required a significant
amount of temporary work that was not worth doing while I'm the only one
working on this project at the moment.

This accomplishes a number of important things, now that I'm preparing to
write the first compiler frontend for TAMER:

  1. `Symbol` has been removed; `SymbolId` is used in its place.
  2. Consequently, symbols use 16 or 32 bits, rather than a 64-bit pointer.
  3. Using symbols no longer requires dereferencing.
  4. **Lifetimes no longer pollute the entire system! (`'i`)**
  5. Two global interners are offered to produce `SymbolStr` with `'static`
     lifetimes, simplfiying lifetime management and borrowing where strings
     are still needed.
  6. A nice API is provided for interning and lookups (e.g. "foo".intern())
     which makes this look like a core feature of Rust.

Unfortunately, making this change required modifications to...virtually
everything.  And that serves to emphasize why this change was needed:
_everything_ used symbols, and so there's no use in not providing globals.

I implemented this in a way that still provides for loose coupling through
Rust's trait system.  Indeed, Rustc offers a global interner, and I decided
not to go that route initially because it wasn't clear to me that such a
thing was desirable.  It didn't become apparent to me, in fact, until the
recent commit where I introduced `SymbolIndexSize` and saw how many things
had to be touched; the linker evolved so rapidly as I was trying to learn
Rust that I lost track of how bad it got.

Further, this shows how the design of the internment system was a bit
naive---I assumed certain requirements that never panned out.  In
particular, everything using symbols stored `&'i Symbol<'i>`---that is, a
reference (usize) to an object containing an index (32-bit) and a string
slice (128-bit).  So it was a reference to a pretty large value, which was
allocated in the arena alongside the interned string itself.

But, that was assuming that something would need both the symbol index _and_
a readily available string.  That's not the case.  In fact, it's pretty
clear that interning happens at the beginning of execution, that `SymbolId`
is all that's needed during processing (unless an error occurs; more on that
below); and it's not until _the very end_ that we need to retrieve interned
strings from the pool to write either to a file or to display to the
user.  It was horribly wasteful!

So `SymbolId` solves the lifetime issue in itself for most systems, but it
still requires that an interner be available for anything that needs to
create or resolve symbols, which, as it turns out, is still a lot of
things.  Therefore, I decided to implement them as thread-local static
variables, which is very similar to what Rustc does itself (Rustc's are
scoped).  TAMER does not use threads, so the resulting `'static` lifetime
should be just fine for now.  Eventually I'd like to implement `!Send` and
`!Sync`, though, to prevent references from escaping the thread (as noted in
the patch); I can't do that yet, since the feature has not yet been
stabalized.

In the end, this leaves us with a system that's much easier to use and
maintain; hopefully easier for newcomers to get into without having to deal
with so many complex lifetimes; and a nice API that makes it a pleasure to
work with symbols.

Admittedly, the `SymbolIndexSize` adds some complexity, and we'll see if I
end up regretting that down the line, but it exists for an important reason:
the `Span` and other structures that'll be introduced need to pack a lot of
data into 64 bits so they can be freely copied around to keep lifetimes
simple without wreaking havoc in other ways, but a 32-bit symbol size needed
by the linker is too large for that.  (Actually, the linker doesn't yet need
32 bits for our systems, but it's going to in the somewhat near future
unless we optimize away a bunch of symbols...but I'd really rather not have
the linker hit a limit that requires a lot of code changes to resolve).

Rustc uses interned spans when they exceed 8 bytes, but I'd prefer to avoid
that for now.  Most systems can just use on of the `PkgSymbolId` or
`ProgSymbolId` type aliases and not have to worry about it.  Systems that
are actually shared between the compiler and the linker do, though, but it's
not like we don't already have a bunch of trait bounds.

Of course, as we implement link-time optimizations (LTO) in the future, it's
possible most things will need the size and I'll grow frustrated with that
and possibly revisit this.  We shall see.

Anyway, this was exhausting...and...onward to the first frontend!

											
										
										
											2021-08-02 23:54:37 -04:00
+								    /// Identifiers are uniquely identified by a [`SymbolId`] `name`.
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								    /// If an identifier of the same `name` already exists,
 								    ///   then the provided declaration is compared against the existing
 								    ///   declaration---should
 								    ///     they be incompatible,
 								    ///       then the operation will fail;
 								    ///     otherwise,
 								    ///       the existing identifier will be returned.
-												TAMER: Virtual symbol override

											
										
										
											2020-01-15 11:24:56 -05:00
+								    ///
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								    /// If a concrete identifier has already been declared (see
 								    ///   [`Asg::declare`]),
-												[DEV-7087] TAMER: Type compatability check during extern resolution

This properly verifies extern types, and cleans up Asg's API a little so
that externs aren't handled much differently than other declarations.

With that said, after making src optional, I realized that we will indeed
want source information for externs themselves so we can direct the user to
what package is expecting that symbol (as the old linker does).  So this
approach will not work, and I'll have to undo some of those changes.

											
										
										
											2020-03-25 15:37:55 -04:00
+								    ///     then extern declarations will be compared and,
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								    ///       if compatible,
 								    ///       the identifier will be immediately _resolved_ and the object
 								    ///         on the graph will not be altered.
 								    /// Resolution will otherwise fail in error.
-												TAMER: Make Asg generic over object

There's a lot here to make the object stored on the `Asg` generic.  This
introduces `ObjectState` for state transitions and `ObjectData` for pure
data retrieval.  This will allow not only for mocking, but will be useful to
enforce compile-time restrictions on the type of objects expected by the
linker vs. the compiler (e.g. the linker will not have expressions).

This commit intentionally leaves the corresponding tests in their original
location to prove that the functionality has not changed; they'll be moved
in a future commit.

This also leaves the names as "Object" to reduce the number the cognative
overhead of this commit.  It will be renamed to something like "IdentObject"
in the near future to clarify the intent of the current object type and to
open the way for expressions and a type that marries both of them in the
future.

Once all of this is done, we'll finally be able to make changes to the
compatibility logic in state transitions to implement extern compatibility
checks during resolution.

DEV-7087

											
										
										
											2020-03-14 00:10:03 -04:00
+								    ///
-												[DEV-7087] TAMER: Type compatability check during extern resolution

This properly verifies extern types, and cleans up Asg's API a little so
that externs aren't handled much differently than other declarations.

With that said, after making src optional, I realized that we will indeed
want source information for externs themselves so we can direct the user to
what package is expecting that symbol (as the old linker does).  So this
approach will not work, and I'll have to undo some of those changes.

											
										
										
											2020-03-25 15:37:55 -04:00
+								    /// For more information on state transitions that can occur when
 								    ///   redeclaring an identifier that already exists,
-												tamer: asg::Ident{Object=>}: Rename

I think this may have been renamed _from_ `Ident` some time ago, but I'm too
lazy to check.  In any case, the name is redundant.

DEV-11864

											
										
										
											2022-05-19 11:17:04 -04:00
+								    ///     see [`Ident::resolve`].
-												[DEV-7087] TAMER: Type compatability check during extern resolution

This properly verifies extern types, and cleans up Asg's API a little so
that externs aren't handled much differently than other declarations.

With that said, after making src optional, I realized that we will indeed
want source information for externs themselves so we can direct the user to
what package is expecting that symbol (as the old linker does).  So this
approach will not work, and I'll have to undo some of those changes.

											
										
										
											2020-03-25 15:37:55 -04:00
+								    ///
 								    /// A successful declaration will add an identifier to the graph
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ///   and return an [`ObjectIndex`] reference.
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    pub fn declare(
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								        &mut self,
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								        name: SPair,
-												[DEV-7087] TAMER: Type compatability check during extern resolution

This properly verifies extern types, and cleans up Asg's API a little so
that externs aren't handled much differently than other declarations.

With that said, after making src optional, I realized that we will indeed
want source information for externs themselves so we can direct the user to
what package is expecting that symbol (as the old linker does).  So this
approach will not work, and I'll have to undo some of those changes.

											
										
										
											2020-03-25 15:37:55 -04:00
+								        kind: IdentKind,
-												tamer: Remove Ix generalization throughout system

This had the writing on the wall all the same as the `'i` interner lifetime
that came before it.  It was too much of a maintenance burden trying to
accommodate both 16-bit and 32-bit symbols generically.

There is a situation where we do still want 16-bit symbols---the
`Span`.  Therefore, I have left generic support for symbol sizes, as well as
the different global interners, but `SymbolId` now defaults to 32-bit, as
does `Asg`.  Further, the size parameter has been removed from the rest of
the code, with the exception of `Span`.

This cleans things up quite a bit, and is much nicer to work with.  If we
want 16-bit symbols in the future for packing to increase CPU cache
performance, we can handle that situation then in that specific case; it's a
premature optimization that's not at all worth the effort here.

											
										
										
											2021-09-23 14:52:53 -04:00
+								        src: Source,
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ) -> AsgResult<ObjectIndex<Ident>> {
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
+								        let is_auto_root = kind.is_auto_root();
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								        self.with_ident_lookup(name, |obj| obj.resolve(name.span(), kind, src))
-												tamer: Integrate clippy

This invokes clippy as part of `make check` now, which I had previously
avoided doing (I'll elaborate on that below).

This commit represents the changes needed to resolve all the warnings
presented by clippy.  Many changes have been made where I find the lints to
be useful and agreeable, but there are a number of lints, rationalized in
`src/lib.rs`, where I found the lints to be disagreeable.  I have provided
rationale, primarily for those wondering why I desire to deviate from the
default lints, though it does feel backward to rationalize why certain lints
ought to be applied (the reverse should be true).

With that said, this did catch some legitimage issues, and it was also
helpful in getting some older code up-to-date with new language additions
that perhaps I used in new code but hadn't gone back and updated old code
for.  My goal was to get clippy working without errors so that, in the
future, when others get into TAMER and are still getting used to Rust,
clippy is able to help guide them in the right direction.

One of the reasons I went without clippy for so long (though I admittedly
forgot I wasn't using it for a period of time) was because there were a
number of suggestions that I found disagreeable, and I didn't take the time
to go through them and determine what I wanted to follow.  Furthermore, it
was hard to make that judgment when I was new to the language and lacked
the necessary experience to do so.

One thing I would like to comment further on is the use of `format!` with
`expect`, which is also what the diagnostic system convenience methods
do (which clippy does not cover).  Because of all the work I've done trying
to understand Rust and looking at disassemblies and seeing what it
optimizes, I falsely assumed that Rust would convert such things into
conditionals in my otherwise-pure code...but apparently that's not the case,
when `format!` is involved.

I noticed that, after making the suggested fix with `get_ident`, Rust
proceeded to then inline it into each call site and then apply further
optimizations.  It was also previously invoking the thread lock (for the
interner) unconditionally and invoking the `Display` implementation.  That
is not at all what I intended for, despite knowing the eager semantics of
function calls in Rust.

Anyway, possibly more to come on that, I'm just tired of typing and need to
move on.  I'll be returning to investigate further diagnostic messages soon.

											
										
										
											2023-01-12 10:46:48 -05:00
+								            .map(|node| {
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
+								                is_auto_root.then(|| self.add_root(node));
-												tamer: Integrate clippy

This invokes clippy as part of `make check` now, which I had previously
avoided doing (I'll elaborate on that below).

This commit represents the changes needed to resolve all the warnings
presented by clippy.  Many changes have been made where I find the lints to
be useful and agreeable, but there are a number of lints, rationalized in
`src/lib.rs`, where I found the lints to be disagreeable.  I have provided
rationale, primarily for those wondering why I desire to deviate from the
default lints, though it does feel backward to rationalize why certain lints
ought to be applied (the reverse should be true).

With that said, this did catch some legitimage issues, and it was also
helpful in getting some older code up-to-date with new language additions
that perhaps I used in new code but hadn't gone back and updated old code
for.  My goal was to get clippy working without errors so that, in the
future, when others get into TAMER and are still getting used to Rust,
clippy is able to help guide them in the right direction.

One of the reasons I went without clippy for so long (though I admittedly
forgot I wasn't using it for a period of time) was because there were a
number of suggestions that I found disagreeable, and I didn't take the time
to go through them and determine what I wanted to follow.  Furthermore, it
was hard to make that judgment when I was new to the language and lacked
the necessary experience to do so.

One thing I would like to comment further on is the use of `format!` with
`expect`, which is also what the diagnostic system convenience methods
do (which clippy does not cover).  Because of all the work I've done trying
to understand Rust and looking at disassemblies and seeing what it
optimizes, I falsely assumed that Rust would convert such things into
conditionals in my otherwise-pure code...but apparently that's not the case,
when `format!` is involved.

I noticed that, after making the suggested fix with `get_ident`, Rust
proceeded to then inline it into each call site and then apply further
optimizations.  It was also previously invoking the thread lock (for the
interner) unconditionally and invoking the `Display` implementation.  That
is not at all what I intended for, despite knowing the eager semantics of
function calls in Rust.

Anyway, possibly more to come on that, I'm just tired of typing and need to
move on.  I'll be returning to investigate further diagnostic messages soon.

											
										
										
											2023-01-12 10:46:48 -05:00
+								                node
-												tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864

											
										
										
											2022-05-17 10:42:05 -04:00
+								            })
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    }
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
-												[DEV-7087] TAMER: Type compatability check during extern resolution

This properly verifies extern types, and cleans up Asg's API a little so
that externs aren't handled much differently than other declarations.

With that said, after making src optional, I realized that we will indeed
want source information for externs themselves so we can direct the user to
what package is expecting that symbol (as the old linker does).  So this
approach will not work, and I'll have to undo some of those changes.

											
										
										
											2020-03-25 15:37:55 -04:00
+								    /// Declare an abstract identifier.
-												[DEV-7087] TAMER: Asg: Reintroduce declare_extern

There is some duplication here with `declare` that will be cleared up in a
following commit.  Reintroducing this method is necessary so that Source can
be used to represent the source location of the extern itself; it's
currently None to indicate an extern in `declare`.

											
										
										
											2020-03-25 23:49:37 -04:00
+								    ///
 								    /// An _extern_ declaration declares an identifier the same as
 								    ///   [`Asg::declare`],
 								    ///     but omits source information.
 								    /// Externs are identifiers that are expected to be defined somewhere
 								    ///   else ("externally"),
 								    ///     and are resolved at [link-time][crate::ld].
 								    ///
 								    /// If a concrete identifier has already been declared (see
 								    ///   [`Asg::declare`]),
 								    ///     then the declarations will be compared and,
 								    ///       if compatible,
 								    ///       the identifier will be immediately _resolved_ and the object
 								    ///         on the graph will not be altered.
 								    /// Resolution will otherwise fail in error.
 								    ///
-												tamer: asg::Ident{Object=>}: Rename

I think this may have been renamed _from_ `Ident` some time ago, but I'm too
lazy to check.  In any case, the name is redundant.

DEV-11864

											
										
										
											2022-05-19 11:17:04 -04:00
+								    /// See [`Ident::extern_`] and
 								    ///   [`Ident::resolve`] for more information on
-												[DEV-7087] TAMER: Asg: Reintroduce declare_extern

There is some duplication here with `declare` that will be cleared up in a
following commit.  Reintroducing this method is necessary so that Source can
be used to represent the source location of the extern itself; it's
currently None to indicate an extern in `declare`.

											
										
										
											2020-03-25 23:49:37 -04:00
+								    ///   compatibility related to extern resolution.
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    pub fn declare_extern(
-												[DEV-7087] TAMER: Asg: Reintroduce declare_extern

There is some duplication here with `declare` that will be cleared up in a
following commit.  Reintroducing this method is necessary so that Source can
be used to represent the source location of the extern itself; it's
currently None to indicate an extern in `declare`.

											
										
										
											2020-03-25 23:49:37 -04:00
+								        &mut self,
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								        name: SPair,
-												[DEV-7087] TAMER: Asg: Reintroduce declare_extern

There is some duplication here with `declare` that will be cleared up in a
following commit.  Reintroducing this method is necessary so that Source can
be used to represent the source location of the extern itself; it's
currently None to indicate an extern in `declare`.

											
										
										
											2020-03-25 23:49:37 -04:00
+								        kind: IdentKind,
-												tamer: Remove Ix generalization throughout system

This had the writing on the wall all the same as the `'i` interner lifetime
that came before it.  It was too much of a maintenance burden trying to
accommodate both 16-bit and 32-bit symbols generically.

There is a situation where we do still want 16-bit symbols---the
`Span`.  Therefore, I have left generic support for symbol sizes, as well as
the different global interners, but `SymbolId` now defaults to 32-bit, as
does `Asg`.  Further, the size parameter has been removed from the rest of
the code, with the exception of `Span`.

This cleans things up quite a bit, and is much nicer to work with.  If we
want 16-bit symbols in the future for packing to increase CPU cache
performance, we can handle that situation then in that specific case; it's a
premature optimization that's not at all worth the effort here.

											
										
										
											2021-09-23 14:52:53 -04:00
+								        src: Source,
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ) -> AsgResult<ObjectIndex<Ident>> {
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								        self.with_ident_lookup(name, |obj| obj.extern_(name.span(), kind, src))
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    }
-												[DEV-7087] TAMER: Type compatability check during extern resolution

This properly verifies extern types, and cleans up Asg's API a little so
that externs aren't handled much differently than other declarations.

With that said, after making src optional, I realized that we will indeed
want source information for externs themselves so we can direct the user to
what package is expecting that symbol (as the old linker does).  So this
approach will not work, and I'll have to undo some of those changes.

											
										
										
											2020-03-25 15:37:55 -04:00
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								    /// Set the fragment associated with a concrete identifier.
 								    ///
-												TAMER: Make Asg generic over object

There's a lot here to make the object stored on the `Asg` generic.  This
introduces `ObjectState` for state transitions and `ObjectData` for pure
data retrieval.  This will allow not only for mocking, but will be useful to
enforce compile-time restrictions on the type of objects expected by the
linker vs. the compiler (e.g. the linker will not have expressions).

This commit intentionally leaves the corresponding tests in their original
location to prove that the functionality has not changed; they'll be moved
in a future commit.

This also leaves the names as "Object" to reduce the number the cognative
overhead of this commit.  It will be renamed to something like "IdentObject"
in the near future to clarify the intent of the current object type and to
open the way for expressions and a type that marries both of them in the
future.

Once all of this is done, we'll finally be able to make changes to the
compatibility logic in state transitions to implement extern compatibility
checks during resolution.

DEV-7087

											
										
										
											2020-03-14 00:10:03 -04:00
+								    /// Fragments are intended for use by the [linker][crate::ld].
 								    /// For more information,
-												tamer: asg::Ident{Object=>}: Rename

I think this may have been renamed _from_ `Ident` some time ago, but I'm too
lazy to check.  In any case, the name is redundant.

DEV-11864

											
										
										
											2022-05-19 11:17:04 -04:00
+								    ///   see [`Ident::set_fragment`].
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    pub fn set_fragment(
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								        &mut self,
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								        name: SPair,
-												tamer: Remove Ix generalization throughout system

This had the writing on the wall all the same as the `'i` interner lifetime
that came before it.  It was too much of a maintenance burden trying to
accommodate both 16-bit and 32-bit symbols generically.

There is a situation where we do still want 16-bit symbols---the
`Span`.  Therefore, I have left generic support for symbol sizes, as well as
the different global interners, but `SymbolId` now defaults to 32-bit, as
does `Asg`.  Further, the size parameter has been removed from the rest of
the code, with the exception of `Span`.

This cleans things up quite a bit, and is much nicer to work with.  If we
want 16-bit symbols in the future for packing to increase CPU cache
performance, we can handle that situation then in that specific case; it's a
premature optimization that's not at all worth the effort here.

											
										
										
											2021-09-23 14:52:53 -04:00
+								        text: FragmentText,
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ) -> AsgResult<ObjectIndex<Ident>> {
-												tamer: asg::Asg::set_fragment: {ObjectRef=>SymbolId}

In the actual implementation (outside of tests), this is always looking up
before adding the symbol.  This will simplify the API, while still retaining
errors, since the identifier will fail the state transition if the
identifier did not exist before attempting to set a fragment.  So while this
is slower in microbenchmarks, this has no effect on real-world performance.

Further, I'm refactoring toward a streaming ASG aggregation, which is a lot
easier if we do not need to perform lookups in a separate step from the
ASG's primitives.

DEV-11864

											
										
										
											2022-05-16 10:53:07 -04:00
+								        self.with_ident_lookup(name, |obj| obj.set_fragment(text))
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    }
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    /// Create a new object on the graph.
 								    ///
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    /// The provided [`ObjectIndex`] will be augmented with the span
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    ///   of `obj`.
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    pub(super) fn create<O: ObjectKind>(&mut self, obj: O) -> ObjectIndex<O> {
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								        let o = obj.into();
 								        let span = o.span();
-												tamer: asg: New ObjectContainer for Node type

Working with the graph can be confusing with all of the layers
involved.  This begins to provide a better layer of abstraction that can
encapsulate the concept and enforce invariants.

Since I'm better able to enforce invariants now, this also removes the span
from the diagnostic message, since the invariant is now always enforced with
certainty.  I'm not removing the runtime panic, though; we can revisit that
if future profiling shows that it makes a negative impact.

DEV-13160

											
										
										
											2023-01-10 15:06:24 -05:00
+								        let node_id = self.graph.add_node(ObjectContainer::from(o.into()));
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								        ObjectIndex::new(node_id, span)
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    }
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    /// Add an edge from the [`Object`] represented by the
 								    ///   [`ObjectIndex`] `from_oi` to the object represented by `to_oi`.
 								    ///
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								    /// The edge may optionally contain a _contextual [`Span`]_,
 								    ///   in cases where it is important to distinguish between the span
 								    ///   associated with the target and the span associated with the
 								    ///   _reference_ to the target.
 								    ///
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    /// For more information on how the ASG's ontology is enforced statically,
 								    ///   see [`ObjectRelTo`].
-												tamer: asg::object: Move into graph module

The ASG delegates certain operations to Objects so that they may enforce
their own invariants and ontology.  It is therefore important that only
objects have access to certain methods on `Asg`, otherwise those invariants
could be circumvented.

It should be noted that the nesting of this module is such that AIR should
_not_ have privileged access to the ASG---it too must utilize objects to
ensure those invariants are enforced in a single place.

DEV-13597

											
										
										
											2023-01-17 22:58:41 -05:00
+								    fn add_edge<OA: ObjectKind, OB: ObjectKind>(
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								        &mut self,
 								        from_oi: ObjectIndex<OA>,
 								        to_oi: ObjectIndex<OB>,
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								        ctx_span: Option<Span>,
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    ) where
 								        OA: ObjectRelTo<OB>,
 								    {
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								        self.graph.add_edge(
 								            from_oi.into(),
 								            to_oi.into(),
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								            (OA::rel_ty(), OB::rel_ty(), ctx_span),
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								        );
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    }
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    /// Retrieve an object from the graph by [`ObjectIndex`].
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								    ///
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    /// Since an [`ObjectIndex`] should only be produced by an [`Asg`],
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								    ///   and since objects are never deleted from the graph,
 								    ///   this should never fail so long as references are not shared
 								    ///   between multiple graphs.
 								    /// It is nevertheless wrapped in an [`Option`] just in case.
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    #[inline]
-												tamer: asg::Asg::get: Narrow object type

This uses `ObjectIndex` to automatically narrow the type to what is
expected.

Given that `ObjectIndex` is supposed to mean that there must be an object
with that index, perhaps the next step is to remove the `Option` from `get`
as well.

DEV-13160

											
										
										
											2022-12-22 16:32:21 -05:00
+								    pub fn get<O: ObjectKind>(&self, index: ObjectIndex<O>) -> Option<&O> {
-												tamer: asg: New ObjectContainer for Node type

Working with the graph can be confusing with all of the layers
involved.  This begins to provide a better layer of abstraction that can
encapsulate the concept and enforce invariants.

Since I'm better able to enforce invariants now, this also removes the span
from the diagnostic message, since the invariant is now always enforced with
certainty.  I'm not removing the runtime panic, though; we can revisit that
if future profiling shows that it makes a negative impact.

DEV-13160

											
										
										
											2023-01-10 15:06:24 -05:00
+								        self.graph
 								            .node_weight(index.into())
 								            .map(ObjectContainer::get)
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    }
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
-												tamer: asg: Bind transparent ident

This provides the initial implementation allowing an identifier to be
defined (bound to an object and made transparent).

I'm not yet entirely sure whether I'll stick with the "transparent" and
"opaque" terminology when there's also "declare" and "define", but a
`Missing` state is a type of declaration and so the distinction does still
seem to be important.

There is still work to be done on `ObjectIndex::<Ident>::bind_definition`,
which will follow.  I'm going to be balancing work to provide type-level
guarantees, since I don't have the time to go as far as I'd like.

DEV-13597

											
										
										
											2023-01-17 16:31:13 -05:00
+								    /// Attempt to map over an inner [`Object`] referenced by
 								    ///   [`ObjectIndex`].
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    ///
 								    /// The type `O` is the expected type of the [`Object`],
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ///   which should be known to the caller based on the provied
 								    ///   [`ObjectIndex`].
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    /// This method will attempt to narrow to that object type,
 								    ///   panicing if there is a mismatch;
-												tamer: asg::object: Move into graph module

The ASG delegates certain operations to Objects so that they may enforce
their own invariants and ontology.  It is therefore important that only
objects have access to certain methods on `Asg`, otherwise those invariants
could be circumvented.

It should be noted that the nesting of this module is such that AIR should
_not_ have privileged access to the ASG---it too must utilize objects to
ensure those invariants are enforced in a single place.

DEV-13597

											
										
										
											2023-01-17 22:58:41 -05:00
+								    ///     see the [`object` module documentation](object) for more
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    ///     information and rationale on this behavior.
 								    ///
 								    /// Panics
 								    /// ======
 								    /// This method chooses to simplify the API by choosing panics for
 								    ///   situations that ought never to occur and represent significant bugs
 								    ///   in the compiler.
 								    /// Those situations are:
 								    ///
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ///   1. If the provided [`ObjectIndex`] references a node index that is
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    ///        not present on the graph;
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ///   2. If the node referenced by [`ObjectIndex`] exists but its container
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    ///        is empty because an object was taken but never returned; and
 								    ///   3. If an object cannot be narrowed (downcast) to type `O`,
 								    ///        representing a type mismatch between what the caller thinks
 								    ///        this object represents and what the object actually is.
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    #[must_use = "returned ObjectIndex has a possibly-updated and more relevant span"]
-												tamer: asg: Bind transparent ident

This provides the initial implementation allowing an identifier to be
defined (bound to an object and made transparent).

I'm not yet entirely sure whether I'll stick with the "transparent" and
"opaque" terminology when there's also "declare" and "define", but a
`Missing` state is a type of declaration and so the distinction does still
seem to be important.

There is still work to be done on `ObjectIndex::<Ident>::bind_definition`,
which will follow.  I'm going to be balancing work to provide type-level
guarantees, since I don't have the time to go as far as I'd like.

DEV-13597

											
										
										
											2023-01-17 16:31:13 -05:00
+								    pub(super) fn try_map_obj<O: ObjectKind, E>(
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								        &mut self,
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								        index: ObjectIndex<O>,
-												tamer: asg: Bind transparent ident

This provides the initial implementation allowing an identifier to be
defined (bound to an object and made transparent).

I'm not yet entirely sure whether I'll stick with the "transparent" and
"opaque" terminology when there's also "declare" and "define", but a
`Missing` state is a type of declaration and so the distinction does still
seem to be important.

There is still work to be done on `ObjectIndex::<Ident>::bind_definition`,
which will follow.  I'm going to be balancing work to provide type-level
guarantees, since I don't have the time to go as far as I'd like.

DEV-13597

											
										
										
											2023-01-17 16:31:13 -05:00
+								        f: impl FnOnce(O) -> Result<O, (O, E)>,
 								    ) -> Result<ObjectIndex<O>, E> {
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								        let obj_container =
 								            self.graph.node_weight_mut(index.into()).diagnostic_expect(
-												tamer: diagnose::panic: Require thunk or static ref for diagnostic data

Some investigation into the disassembly of TAMER's binaries showed that Rust
was not able to conditionalize `expect`-like expressions as I was hoping due
to eager evaluation language semantics in combination with the use of
`format!`.

This solves the problem for the diagnostic system be creating types that
prevent this situation from occurring statically, without the need for a
lint.

											
										
										
											2023-01-12 16:17:41 -05:00
+								                || diagnostic_node_missing_desc(index),
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								                "invalid ObjectIndex: data are missing from the ASG",
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								            );
-												tamer: asg: Bind transparent ident

This provides the initial implementation allowing an identifier to be
defined (bound to an object and made transparent).

I'm not yet entirely sure whether I'll stick with the "transparent" and
"opaque" terminology when there's also "declare" and "define", but a
`Missing` state is a type of declaration and so the distinction does still
seem to be important.

There is still work to be done on `ObjectIndex::<Ident>::bind_definition`,
which will follow.  I'm going to be balancing work to provide type-level
guarantees, since I don't have the time to go as far as I'd like.

DEV-13597

											
										
										
											2023-01-17 16:31:13 -05:00
+								        obj_container
 								            .try_replace_with(f)
 								            .map(|()| index.overwrite(obj_container.get::<Object>().span()))
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								    }
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    /// Create an iterator over the [`ObjectIndex`]es of the outgoing edges
-												tamer: asg::graph::visit::tree_reconstruction: New graph traversal

This begins to introduce a graph traversal useful for a source
reconstruction from the current state of the ASG.  The idea is to, after
having parsed and ingested the source through the lowering pipeline, to
re-output it to (a) prove that we have parsed correctly and (b) allow
progressively moving things from the XSLT-based compiler into TAMER.

There's quite a bit of documentation here; see that for more
information.  Generalizing this in an appropriate way took some time, but I
think this makes sense (that work began with the introduction of cross edges
in terms of the tree described by the graph's ontology).  But I do need to
come up with an illustration to include in the documentation.

DEV-13708

											
										
										
											2023-02-07 14:59:36 -05:00
+								    ///   of `oi`.
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    ///
 								    /// This is a generic method that simply returns an [`ObjectKind`] of
 								    ///   [`Object`] for each [`ObjectIndex`];
 								    ///     it is the responsibility of the caller to narrow the type to
 								    ///     what is intended.
 								    /// This is sufficient in practice,
 								    ///   since the graph cannot be constructed without adhering to the edge
 								    ///   ontology defined by [`ObjectRelTo`],
 								    ///     but this API is not helpful for catching problems at
 								    ///     compile-time.
 								    ///
 								    /// The reason for providing a generic index to [`Object`] is that it
 								    ///   allows the caller to determine how strict it wants to be with
 								    ///   reading from the graph;
 								    ///     for example,
 								    ///       it may prefer to filter unwanted objects rather than panicing
 								    ///       if they do not match a given [`ObjectKind`],
 								    ///         depending on its ontology.
-												tamer: asg::graph: Static- and runtime-enforced multi-kind edge ontolgoy

This allows for edges to be multiple types, and gives us two important
benefits:

  (a) Compiler-verified correctness to ensure that we don't generate graphs
      that do not adhere to the ontology; and
  (b) Runtime verification of types, so that bugs are still memory safe.

There is a lot more information in the documentation within the patch.

This took a lot of iterating to get something that was tolerable.  There's
quite a bit of boilerplate here, and maybe that'll be abstracted away better
in the future as the graph grows.

In particular, it was challenging to determine how I wanted to actually go
about narrowing and looking up edges.  Initially I had hoped to represent
the subsets as `ObjectKind`s as well so that you could use them anywhere
`ObjectKind` was expected, but that proved to be far too difficult because I
cannot return a reference to a subset of `Object` (the value would be owned
on generation).  And while in a language like C maybe I'd pad structures and
cast between them safely, since they _do_ overlap, I can't confidently do
that here since Rust's discriminant and layout are not under my control.

I tried playing around with `std::mem::Discriminant` as well, but
`discriminant` (the function) requires a _value_, meaning I couldn't get the
discriminant of a static `Object` variant without some dummy value; wasn't
worth it over `ObjectRelTy.`  We further can't assign values to enum
variants unless they hold no data.  Rust a decade from now may be different
and will be interesting to look back on this struggle.

DEV-13597

											
										
										
											2023-01-23 11:40:10 -05:00
+								    fn edges<'a, O: ObjectKind + ObjectRelatable + 'a>(
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								        &'a self,
 								        oi: ObjectIndex<O>,
-												tamer: asg::graph: Static- and runtime-enforced multi-kind edge ontolgoy

This allows for edges to be multiple types, and gives us two important
benefits:

  (a) Compiler-verified correctness to ensure that we don't generate graphs
      that do not adhere to the ontology; and
  (b) Runtime verification of types, so that bugs are still memory safe.

There is a lot more information in the documentation within the patch.

This took a lot of iterating to get something that was tolerable.  There's
quite a bit of boilerplate here, and maybe that'll be abstracted away better
in the future as the graph grows.

In particular, it was challenging to determine how I wanted to actually go
about narrowing and looking up edges.  Initially I had hoped to represent
the subsets as `ObjectKind`s as well so that you could use them anywhere
`ObjectKind` was expected, but that proved to be far too difficult because I
cannot return a reference to a subset of `Object` (the value would be owned
on generation).  And while in a language like C maybe I'd pad structures and
cast between them safely, since they _do_ overlap, I can't confidently do
that here since Rust's discriminant and layout are not under my control.

I tried playing around with `std::mem::Discriminant` as well, but
`discriminant` (the function) requires a _value_, meaning I couldn't get the
discriminant of a static `Object` variant without some dummy value; wasn't
worth it over `ObjectRelTy.`  We further can't assign values to enum
variants unless they hold no data.  Rust a decade from now may be different
and will be interesting to look back on this struggle.

DEV-13597

											
										
										
											2023-01-23 11:40:10 -05:00
+								    ) -> impl Iterator<Item = O::Rel> + 'a {
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								        self.edges_dyn(oi.widen()).map(move |dyn_rel| {
 								            let target_ty = dyn_rel.target_ty();
-												tamer: asg::graph::object::rel::DynObjectRel: Store source data

This is generic over the source, just as the target, defaulting just the
same to `ObjectIndex`.

This allows us to use only the edge information provided rather than having
to perform another lookup on the graph and then assert that we found the
correct edge.  In this case, we're dealing with an `Ident->Expr` edge, of
which there is only one, but in other cases, there may be many such edges,
and it wouldn't be possible to know _which_ was referred to without also
keeping context of the previous edge in the walk.

So, in addition to avoiding more indirection and being more immune to logic
bugs, this also allows us to avoid states in `AsgTreeToXirf` for the purpose
of tracking previous edges in the current path.  And it means that the tree
walk can seed further traversals in conjunction with it, if that is so
needed for deriving sources.

More cleanup will be needed, but this does well to set us up for moving
forward; I was too uncomfortable with having to do the separate
lookup.  This is also a more intuitive API.

But it does have the awkward effect that now I don't need the pair---I just
need the `Object`---but I'm not going to remove it because I suspect I may
need it in the future.  We'll see.

The TODO references the fact that I'm using a convenient `resolve_oi_pairs`
instead of resolving only the target first and then the source only in the
code path that needs it.  I'll want to verify that Rust will properly
optimize to avoid the source resolution in branches that do not need it.

DEV-13708

											
										
										
											2023-02-23 22:45:09 -05:00
+								            dyn_rel.narrow_target::<O>().diagnostic_unwrap(|| {
-												tamer: asg::graph::object::new_rel_dyn: Use Option

Rather than panicing at this level, let's panic at the caller, simplifying
impls and keeping them total.

This can't occur now, but an upcoming change introducing a package type will
allow for such a thing.

DEV-13159

											
										
										
											2023-01-30 11:27:40 -05:00
+								                vec![
 								                    oi.internal_error(format!(
 								                        "encountered invalid outgoing edge type {:?}",
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								                        target_ty,
-												tamer: asg::graph::object::new_rel_dyn: Use Option

Rather than panicing at this level, let's panic at the caller, simplifying
impls and keeping them total.

This can't occur now, but an upcoming change introducing a package type will
allow for such a thing.

DEV-13159

											
										
										
											2023-01-30 11:27:40 -05:00
+								                    )),
 								                    oi.help(
 								                        "this means that Asg did not enforce edge invariants \
 								                            during construction, which is a significant bug",
 								                    ),
 								                ]
 								            })
-												tamer: asg::graph: Static- and runtime-enforced multi-kind edge ontolgoy

This allows for edges to be multiple types, and gives us two important
benefits:

  (a) Compiler-verified correctness to ensure that we don't generate graphs
      that do not adhere to the ontology; and
  (b) Runtime verification of types, so that bugs are still memory safe.

There is a lot more information in the documentation within the patch.

This took a lot of iterating to get something that was tolerable.  There's
quite a bit of boilerplate here, and maybe that'll be abstracted away better
in the future as the graph grows.

In particular, it was challenging to determine how I wanted to actually go
about narrowing and looking up edges.  Initially I had hoped to represent
the subsets as `ObjectKind`s as well so that you could use them anywhere
`ObjectKind` was expected, but that proved to be far too difficult because I
cannot return a reference to a subset of `Object` (the value would be owned
on generation).  And while in a language like C maybe I'd pad structures and
cast between them safely, since they _do_ overlap, I can't confidently do
that here since Rust's discriminant and layout are not under my control.

I tried playing around with `std::mem::Discriminant` as well, but
`discriminant` (the function) requires a _value_, meaning I couldn't get the
discriminant of a static `Object` variant without some dummy value; wasn't
worth it over `ObjectRelTy.`  We further can't assign values to enum
variants unless they hold no data.  Rust a decade from now may be different
and will be interesting to look back on this struggle.

DEV-13597

											
										
										
											2023-01-23 11:40:10 -05:00
+								        })
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    }
-												tamer: asg::graph::visit::tree_reconstruction: New graph traversal

This begins to introduce a graph traversal useful for a source
reconstruction from the current state of the ASG.  The idea is to, after
having parsed and ingested the source through the lowering pipeline, to
re-output it to (a) prove that we have parsed correctly and (b) allow
progressively moving things from the XSLT-based compiler into TAMER.

There's quite a bit of documentation here; see that for more
information.  Generalizing this in an appropriate way took some time, but I
think this makes sense (that work began with the introduction of cross edges
in terms of the tree described by the graph's ontology).  But I do need to
come up with an illustration to include in the documentation.

DEV-13708

											
										
										
											2023-02-07 14:59:36 -05:00
+								    /// Create an iterator over the [`ObjectIndex`]es of the outgoing edges
 								    ///   of `oi` in a dynamic context.
 								    ///
 								    /// _This method should be used only when the types of objects cannot be
 								    ///   statically known,_
 								    ///     which is generally true only for code paths operating on
 								    ///     significant portions of
 								    ///       (or the entirety of)
 								    ///       the graph without distinction.
 								    /// See [`Self::edges`] for more information.
 								    fn edges_dyn<'a>(
 								        &'a self,
 								        oi: ObjectIndex<Object>,
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								    ) -> impl Iterator<Item = DynObjectRel> + 'a {
-												tamer: asg::graph::visit::tree_reconstruction: New graph traversal

This begins to introduce a graph traversal useful for a source
reconstruction from the current state of the ASG.  The idea is to, after
having parsed and ingested the source through the lowering pipeline, to
re-output it to (a) prove that we have parsed correctly and (b) allow
progressively moving things from the XSLT-based compiler into TAMER.

There's quite a bit of documentation here; see that for more
information.  Generalizing this in an appropriate way took some time, but I
think this makes sense (that work began with the introduction of cross edges
in terms of the tree described by the graph's ontology).  But I do need to
come up with an illustration to include in the documentation.

DEV-13708

											
										
										
											2023-02-07 14:59:36 -05:00
+								        self.graph.edges(oi.into()).map(move |edge| {
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								            let (src_ty, target_ty, ctx_span) = edge.weight();
 								            DynObjectRel::new(
 								                *src_ty,
 								                *target_ty,
-												tamer: asg::graph::object::rel::DynObjectRel: Store source data

This is generic over the source, just as the target, defaulting just the
same to `ObjectIndex`.

This allows us to use only the edge information provided rather than having
to perform another lookup on the graph and then assert that we found the
correct edge.  In this case, we're dealing with an `Ident->Expr` edge, of
which there is only one, but in other cases, there may be many such edges,
and it wouldn't be possible to know _which_ was referred to without also
keeping context of the previous edge in the walk.

So, in addition to avoiding more indirection and being more immune to logic
bugs, this also allows us to avoid states in `AsgTreeToXirf` for the purpose
of tracking previous edges in the current path.  And it means that the tree
walk can seed further traversals in conjunction with it, if that is so
needed for deriving sources.

More cleanup will be needed, but this does well to set us up for moving
forward; I was too uncomfortable with having to do the separate
lookup.  This is also a more intuitive API.

But it does have the awkward effect that now I don't need the pair---I just
need the `Object`---but I'm not going to remove it because I suspect I may
need it in the future.  We'll see.

The TODO references the fact that I'm using a convenient `resolve_oi_pairs`
instead of resolving only the target first and then the source only in the
code path that needs it.  I'll want to verify that Rust will properly
optimize to avoid the source resolution in branches that do not need it.

DEV-13708

											
										
										
											2023-02-23 22:45:09 -05:00
+								                oi,
-												tamer: asg::graph::visit::tree_reconstruction: New graph traversal

This begins to introduce a graph traversal useful for a source
reconstruction from the current state of the ASG.  The idea is to, after
having parsed and ingested the source through the lowering pipeline, to
re-output it to (a) prove that we have parsed correctly and (b) allow
progressively moving things from the XSLT-based compiler into TAMER.

There's quite a bit of documentation here; see that for more
information.  Generalizing this in an appropriate way took some time, but I
think this makes sense (that work began with the introduction of cross edges
in terms of the tree described by the graph's ontology).  But I do need to
come up with an illustration to include in the documentation.

DEV-13708

											
										
										
											2023-02-07 14:59:36 -05:00
+								                ObjectIndex::<Object>::new(edge.target(), oi),
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								                *ctx_span,
-												tamer: asg::graph::visit::tree_reconstruction: New graph traversal

This begins to introduce a graph traversal useful for a source
reconstruction from the current state of the ASG.  The idea is to, after
having parsed and ingested the source through the lowering pipeline, to
re-output it to (a) prove that we have parsed correctly and (b) allow
progressively moving things from the XSLT-based compiler into TAMER.

There's quite a bit of documentation here; see that for more
information.  Generalizing this in an appropriate way took some time, but I
think this makes sense (that work began with the introduction of cross edges
in terms of the tree described by the graph's ontology).  But I do need to
come up with an illustration to include in the documentation.

DEV-13708

											
										
										
											2023-02-07 14:59:36 -05:00
+								            )
 								        })
 								    }
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								    /// Incoming edges to `oi` filtered by [`ObjectKind`] `OI`.
 								    ///
 								    /// The rationale behind the filtering is that objects ought to focus
 								    ///   primarily on what they _relate to_,
 								    ///     which is what the ontology is designed around.
 								    /// If an object cares about what has an edge _to_ it,
 								    ///   it should have good reason and a specific use case in mind.
 								    fn incoming_edges_filtered<'a, OI: ObjectKind + ObjectRelatable + 'a>(
 								        &'a self,
 								        oi: ObjectIndex<impl ObjectKind + ObjectRelFrom<OI> + 'a>,
 								    ) -> impl Iterator<Item = ObjectIndex<OI>> + 'a {
 								        self.graph
 								            .edges_directed(oi.into(), Direction::Incoming)
 								            .filter(|edge| edge.weight().0 == OI::rel_ty())
 								            .map(move |edge| ObjectIndex::<OI>::new(edge.source(), oi))
 								    }
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								    /// Retrieve the [`ObjectIndex`] to which the given `ident` is bound,
 								    ///   if any.
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ///
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								    /// The type parameter `O` indicates the _expected_ [`ObjectKind`] to be
 								    ///   bound to the returned [`ObjectIndex`],
 								    ///     which will be used for narrowing (downcasting) the object after
 								    ///     lookup.
 								    /// An incorrect kind will not cause any failures until such a lookup
 								    ///   occurs.
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ///
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								    /// This will return [`None`] if the identifier is either opaque or does
 								    ///   not exist.
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    fn get_ident_oi<O: ObjectKind>(
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								        &self,
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								        ident: SPair,
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								    ) -> Option<ObjectIndex<O>> {
-												tamer: asg::Asg::lookup: SymbolId=>SPair

This seems to have been an oversight from when I recently introduced SPairs
to ASG; I noticed it while working on another change and receiving back a
`DUMMY_SPAN`.

DEV-13597

											
										
										
											2023-01-17 14:42:43 -05:00
+								        self.lookup(ident)
-												tamer: Initial concept for AIR/ASG Expr

This begins to place expressions on the graph---something that I've been
thinking about for a couple of years now, so it's interesting to finally be
doing it.

This is going to evolve; I want to get some things committed so that it's
clear how I'm moving forward.  The ASG makes things a bit awkward for a
number of reasons:

  1. I'm dealing with older code where I had a different model of doing
       things;
  2. It's mutable, rather than the mostly-functional lowering pipeline;
  3. We're dealing with an aggregate ever-evolving blob of data (the graph)
       rather than a stream of tokens; and
  4. We don't have as many type guarantees.

I've shown with the lowering pipeline that I'm able to take a mutable
reference and convert it into something that's both functional and
performant, where I remove it from its container (an `Option`), create a new
version of it, and place it back.  Rust is able to optimize away the memcpys
and such and just directly manipulate the underlying value, which is often a
register with all of the inlining.

_But_ this is a different scenario now.  The lowering pipeline has a narrow
context.  The graph has to keep hitting memory.  So we'll see how this
goes.  But it's most important to get this working and measure how it
performs; I'm not trying to prematurely optimize.  My attempts right now are
for the way that I wish to develop.

Speaking to #4 above, it also sucks that I'm not able to type the
relationships between nodes on the graph.  Rather, it's not that I _can't_,
but a project to created a typed graph library is beyond the scope of this
work and would take far too much time.  I'll leave that to a personal,
non-work project.  Instead, I'm going to have to narrow the type any time
the graph is accessed.  And while that sucks, I'm going to do my best to
encapsulate those details to make it as seamless as possible API-wise.  The
performance hit of performing the narrowing I'm hoping will be very small
relative to all the business logic going on (a single cache miss is bound to
be far more expensive than many narrowings which are just integer
comparisons and branching)...but we'll see.  Introducing branching sucks,
but branch prediction is pretty damn good in modern CPUs.

DEV-13160

											
										
										
											2022-12-21 16:47:04 -05:00
+								            .and_then(|identi| {
 								                self.graph
 								                    .neighbors_directed(identi.into(), Direction::Outgoing)
 								                    .next()
 								            })
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								            // Note that this use of `O` for `ObjectIndex` here means "I
 								            //   _expect_ this to `O`";
 								            //     the type will be verified during narrowing but will panic
 								            //     if this expectation is not met.
 								            .map(|ni| ObjectIndex::<O>::new(ni, ident.span()))
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								    }
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								    /// Retrieve the [`ObjectIndex`] to which the given `ident` is bound,
 								    ///   panicing if the identifier is either opaque or does not exist.
 								    ///
 								    /// Panics
 								    /// ======
 								    /// This method will panic if the identifier is opaque
 								    ///   (has no edge to the object to which it is bound)
 								    ///   or does not exist on the graph.
 								    pub fn expect_ident_oi<O: ObjectKind>(
 								        &self,
 								        ident: SPair,
 								    ) -> ObjectIndex<O> {
 								        self.get_ident_oi(ident).diagnostic_expect(
-												tamer: diagnose::panic: Require thunk or static ref for diagnostic data

Some investigation into the disassembly of TAMER's binaries showed that Rust
was not able to conditionalize `expect`-like expressions as I was hoping due
to eager evaluation language semantics in combination with the use of
`format!`.

This solves the problem for the diagnostic system be creating types that
prevent this situation from occurring statically, without the need for a
lint.

											
										
										
											2023-01-12 16:17:41 -05:00
+								            || diagnostic_opaque_ident_desc(ident),
 								            || {
 								                format!(
 								                    "opaque identifier: {} has no object binding",
 								                    TtQuote::wrap(ident),
 								                )
 								            },
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								        )
 								    }
-												tamer: asg::air: Expression building error cases

This addresses the two outstanding `todo!` match arms representing errors in
lowering expressions into the graph.  As noted in the comments, these errors
are unlikely to be hit when using TAME in the traditional way, since
e.g. XIR and NIR are going to catch the equivalent problems within their own
contexts (unbalanced tags and a valid expression grammar respectively).

_But_, the IR does need to stand on its own, and I further hope that some
tooling maybe can interact more directly with AIR in the future.

DEV-13160

											
										
										
											2023-01-09 12:02:59 -05:00
+								    /// Attempt to retrieve the [`Object`] to which the given `ident` is bound.
 								    ///
 								    /// If the identifier either does not exist on the graph or is opaque
 								    ///   (is not bound to any expression),
 								    ///   then [`None`] will be returned.
 								    ///
 								    /// If the system expects that the identifier must exist and would
 								    ///   otherwise represent a bug in the compiler,
 								    ///     see [`Self::expect_ident_obj`].
 								    ///
 								    /// Panics
 								    /// ======
 								    /// This method will panic if certain graph invariants are not met,
 								    ///   representing an invalid system state that should not be able to
 								    ///   occur through this API.
 								    /// Violations of these invariants represent either a bug in the API
 								    ///   (that allows for the invariant to be violated)
 								    ///   or direct manipulation of the underlying graph.
 								    pub fn get_ident_obj<O: ObjectKind>(&self, ident: SPair) -> Option<&O> {
-												tamer: asg: Add expression edges

This introduces a number of abstractions, whose concepts are not fully
documented yet since I want to see how it evolves in practice first.

This introduces the concept of edge ontology (similar to a schema) using the
type system.  Even though we are not able to determine what the graph will
look like statically---since that's determined by data fed to us at
runtime---we _can_ ensure that the code _producing_ the graph from those
data will produce a graph that adheres to its ontology.

Because of the typed `ObjectIndex`, we're also able to implement operations
that are specific to the type of object that we're operating on.  Though,
since the type is not (yet?) stored on the edge itself, it is possible to
walk the graph without looking at node weights (the `ObjectContainer`) and
therefore avoid panics for invalid type assumptions, which is bad, but I
don't think that'll happen in practice, since we'll want to be resolving
nodes at some point.  But I'll addres that more in the future.

Another thing to note is that walking edges is only done in tests right now,
and so there's no filtering or anything; once there are nodes (if there are
nodes) that allow for different outgoing edge types, we'll almost certainly
want filtering as well, rather than panicing.  We'll also want to be able to
query for any object type, but filter only to what's permitted by the
ontology.

DEV-13160

											
										
										
											2023-01-11 15:49:37 -05:00
+								        self.get_ident_oi::<O>(ident).map(|oi| self.expect_obj(oi))
-												tamer: asg: New ObjectContainer for Node type

Working with the graph can be confusing with all of the layers
involved.  This begins to provide a better layer of abstraction that can
encapsulate the concept and enforce invariants.

Since I'm better able to enforce invariants now, this also removes the span
from the diagnostic message, since the invariant is now always enforced with
certainty.  I'm not removing the runtime panic, though; we can revisit that
if future profiling shows that it makes a negative impact.

DEV-13160

											
										
										
											2023-01-10 15:06:24 -05:00
+								    }
 								    pub(super) fn expect_obj<O: ObjectKind>(&self, oi: ObjectIndex<O>) -> &O {
 								        let obj_container =
 								            self.graph.node_weight(oi.into()).diagnostic_expect(
-												tamer: diagnose::panic: Require thunk or static ref for diagnostic data

Some investigation into the disassembly of TAMER's binaries showed that Rust
was not able to conditionalize `expect`-like expressions as I was hoping due
to eager evaluation language semantics in combination with the use of
`format!`.

This solves the problem for the diagnostic system be creating types that
prevent this situation from occurring statically, without the need for a
lint.

											
										
										
											2023-01-12 16:17:41 -05:00
+								                || diagnostic_node_missing_desc(oi),
-												tamer: asg: New ObjectContainer for Node type

Working with the graph can be confusing with all of the layers
involved.  This begins to provide a better layer of abstraction that can
encapsulate the concept and enforce invariants.

Since I'm better able to enforce invariants now, this also removes the span
from the diagnostic message, since the invariant is now always enforced with
certainty.  I'm not removing the runtime panic, though; we can revisit that
if future profiling shows that it makes a negative impact.

DEV-13160

											
										
										
											2023-01-10 15:06:24 -05:00
+								                "invalid ObjectIndex: data are missing from the ASG",
 								            );
 								        obj_container.get()
-												tamer: asg::air: Expression building error cases

This addresses the two outstanding `todo!` match arms representing errors in
lowering expressions into the graph.  As noted in the comments, these errors
are unlikely to be hit when using TAME in the traditional way, since
e.g. XIR and NIR are going to catch the equivalent problems within their own
contexts (unbalanced tags and a valid expression grammar respectively).

_But_, the IR does need to stand on its own, and I further hope that some
tooling maybe can interact more directly with AIR in the future.

DEV-13160

											
										
										
											2023-01-09 12:02:59 -05:00
+								    }
 								    /// Attempt to retrieve the [`Object`] to which the given `ident` is bound,
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								    ///   panicing if the identifier is opaque or does not exist.
 								    ///
-												tamer: asg::air: Expression building error cases

This addresses the two outstanding `todo!` match arms representing errors in
lowering expressions into the graph.  As noted in the comments, these errors
are unlikely to be hit when using TAME in the traditional way, since
e.g. XIR and NIR are going to catch the equivalent problems within their own
contexts (unbalanced tags and a valid expression grammar respectively).

_But_, the IR does need to stand on its own, and I further hope that some
tooling maybe can interact more directly with AIR in the future.

DEV-13160

											
										
										
											2023-01-09 12:02:59 -05:00
+								    /// This method represents a compiler invariant;
 								    ///   it should _only_ be used when the identifier _must_ exist,
 								    ///     otherwise there is a bug in the compiler.
 								    /// If this is _not_ the case,
 								    ///   use [`Self::get_ident_obj`] to get [`None`] in place of a panic.
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								    ///
 								    /// Panics
 								    /// ======
-												tamer: asg::air: Expression building error cases

This addresses the two outstanding `todo!` match arms representing errors in
lowering expressions into the graph.  As noted in the comments, these errors
are unlikely to be hit when using TAME in the traditional way, since
e.g. XIR and NIR are going to catch the equivalent problems within their own
contexts (unbalanced tags and a valid expression grammar respectively).

_But_, the IR does need to stand on its own, and I further hope that some
tooling maybe can interact more directly with AIR in the future.

DEV-13160

											
										
										
											2023-01-09 12:02:59 -05:00
+								    /// This method will panic if
 								    ///
 								    ///   1. The identifier does not exist on the graph; or
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								    ///   2. The identifier is opaque (has no edge to any object on the
 								    ///        graph).
-												tamer: asg::air: Expression building error cases

This addresses the two outstanding `todo!` match arms representing errors in
lowering expressions into the graph.  As noted in the comments, these errors
are unlikely to be hit when using TAME in the traditional way, since
e.g. XIR and NIR are going to catch the equivalent problems within their own
contexts (unbalanced tags and a valid expression grammar respectively).

_But_, the IR does need to stand on its own, and I further hope that some
tooling maybe can interact more directly with AIR in the future.

DEV-13160

											
										
										
											2023-01-09 12:02:59 -05:00
+								    pub fn expect_ident_obj<O: ObjectKind>(&self, ident: SPair) -> &O {
 								        self.get_ident_obj(ident).diagnostic_expect(
-												tamer: diagnose::panic: Require thunk or static ref for diagnostic data

Some investigation into the disassembly of TAMER's binaries showed that Rust
was not able to conditionalize `expect`-like expressions as I was hoping due
to eager evaluation language semantics in combination with the use of
`format!`.

This solves the problem for the diagnostic system be creating types that
prevent this situation from occurring statically, without the need for a
lint.

											
										
										
											2023-01-12 16:17:41 -05:00
+								            || diagnostic_opaque_ident_desc(ident),
 								            || {
 								                format!(
 								                    "opaque identifier: {} has no object binding",
 								                    TtQuote::wrap(ident),
 								                )
 								            },
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								        )
 								    }
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    /// Retrieve an identifier from the graph by [`ObjectIndex`].
-												tamer: asg::Object: Introduce Object::Ident

This wraps `Ident` in a new `Object` variant and modifies `Asg` so that its
nodes are of type `Object`.

This unfortunately requires runtime type checking.  Whether or not that's
worth alleviating in the future depends on a lot of different things, since
it'll require my own graph implementation, and I have to focus on other
things right now.  Maybe it'll be worth it in the future.

Note that this also gets rid of some doc examples that simply aren't worth
maintaining as the API evolves.

DEV-11864

											
										
										
											2022-05-19 12:31:37 -04:00
+								    ///
 								    /// If the object exists but is not an identifier,
 								    ///   [`None`] will be returned.
 								    #[inline]
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    pub fn get_ident(&self, index: ObjectIndex<Ident>) -> Option<&Ident> {
-												tamer: asg::Asg::get: Narrow object type

This uses `ObjectIndex` to automatically narrow the type to what is
expected.

Given that `ObjectIndex` is supposed to mean that there must be an object
with that index, perhaps the next step is to remove the `Option` from `get`
as well.

DEV-13160

											
										
										
											2022-12-22 16:32:21 -05:00
+								        self.get(index)
-												tamer: asg::Object: Introduce Object::Ident

This wraps `Ident` in a new `Object` variant and modifies `Asg` so that its
nodes are of type `Object`.

This unfortunately requires runtime type checking.  Whether or not that's
worth alleviating in the future depends on a lot of different things, since
it'll require my own graph implementation, and I have to focus on other
things right now.  Maybe it'll be worth it in the future.

Note that this also gets rid of some doc examples that simply aren't worth
maintaining as the API evolves.

DEV-11864

											
										
										
											2022-05-19 12:31:37 -04:00
+								    }
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
+								    /// Attempt to retrieve an identifier from the graph by name.
 								    ///
 								    /// Since only identifiers carry a name,
 								    ///   this method cannot be used to retrieve all possible objects on the
 								    ///   graph---for
 								    ///     that, see [`Asg::get`].
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    #[inline]
-												tamer: asg::Asg::lookup: SymbolId=>SPair

This seems to have been an oversight from when I recently introduced SPairs
to ASG; I noticed it while working on another change and receiving back a
`DUMMY_SPAN`.

DEV-13597

											
										
										
											2023-01-17 14:42:43 -05:00
+								    pub fn lookup(&self, id: SPair) -> Option<ObjectIndex<Ident>> {
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								        let i = id.symbol().as_usize();
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        self.index
-												Revert "tamer: asg::graph::index: Use FxHashMap in place of Vec"

This reverts commit 1b7eac337cd5909c01ede3a5b3fba577898d5961.

I don't actually think this ends up being worth it in the end.  Sure, the
implementation is simpler at a glance, but it is more complex at runtime,
adding more cycles for little benefit.

There are ~220 pre-interned symbols at the time of writing, so ~880 bytes (4
bytes per symbol) are potentially wasted if _none_ of the pre-interned
symbols end up serving as identifiers in the graph.  The reality is that
some of them _will_ but, but using HashMap also introduces overhead, so in
practice, the savings is much less.  On a fairly small package, it was <100
bytes memory saving in `tamec`.  For `tameld`, it actually uses _more_
memory, especially on larger packages, because there are 10s of thousands of
symbols involved.  And we're incurring a rehashing cost on resize, unlike
this original plain `Vec` implementation.

So, I'm leaving this in the history to reference in the future or return to
it if others ask; maybe it'll be worth it in the future.

											
										
										
											2023-01-27 09:54:26 -05:00
+								            .get(i)
 								            .filter(|ni| ni.index() > 0)
 								            .map(|ni| ObjectIndex::new(*ni, id.span()))
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    }
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
 								    /// Declare that `dep` is a dependency of `ident`.
 								    ///
 								    /// An object must be declared as a dependency if its value must be
 								    ///   computed before computing the value of `ident`.
 								    /// The [linker][crate::ld] will ensure this ordering.
-												TAMER: xmle output changes to support Summary Page

Co-Authored-By: Joseph Frazer <joseph.frazer@ryansg.com>

											
										
										
											2020-01-14 16:26:36 -05:00
+								    ///
 								    /// See [`add_dep_lookup`][Asg::add_dep_lookup] if identifiers have to
-												tamer: Global interners

This is a major change, and I apologize for it all being in one commit.  I
had wanted to break it up, but doing so would have required a significant
amount of temporary work that was not worth doing while I'm the only one
working on this project at the moment.

This accomplishes a number of important things, now that I'm preparing to
write the first compiler frontend for TAMER:

  1. `Symbol` has been removed; `SymbolId` is used in its place.
  2. Consequently, symbols use 16 or 32 bits, rather than a 64-bit pointer.
  3. Using symbols no longer requires dereferencing.
  4. **Lifetimes no longer pollute the entire system! (`'i`)**
  5. Two global interners are offered to produce `SymbolStr` with `'static`
     lifetimes, simplfiying lifetime management and borrowing where strings
     are still needed.
  6. A nice API is provided for interning and lookups (e.g. "foo".intern())
     which makes this look like a core feature of Rust.

Unfortunately, making this change required modifications to...virtually
everything.  And that serves to emphasize why this change was needed:
_everything_ used symbols, and so there's no use in not providing globals.

I implemented this in a way that still provides for loose coupling through
Rust's trait system.  Indeed, Rustc offers a global interner, and I decided
not to go that route initially because it wasn't clear to me that such a
thing was desirable.  It didn't become apparent to me, in fact, until the
recent commit where I introduced `SymbolIndexSize` and saw how many things
had to be touched; the linker evolved so rapidly as I was trying to learn
Rust that I lost track of how bad it got.

Further, this shows how the design of the internment system was a bit
naive---I assumed certain requirements that never panned out.  In
particular, everything using symbols stored `&'i Symbol<'i>`---that is, a
reference (usize) to an object containing an index (32-bit) and a string
slice (128-bit).  So it was a reference to a pretty large value, which was
allocated in the arena alongside the interned string itself.

But, that was assuming that something would need both the symbol index _and_
a readily available string.  That's not the case.  In fact, it's pretty
clear that interning happens at the beginning of execution, that `SymbolId`
is all that's needed during processing (unless an error occurs; more on that
below); and it's not until _the very end_ that we need to retrieve interned
strings from the pool to write either to a file or to display to the
user.  It was horribly wasteful!

So `SymbolId` solves the lifetime issue in itself for most systems, but it
still requires that an interner be available for anything that needs to
create or resolve symbols, which, as it turns out, is still a lot of
things.  Therefore, I decided to implement them as thread-local static
variables, which is very similar to what Rustc does itself (Rustc's are
scoped).  TAMER does not use threads, so the resulting `'static` lifetime
should be just fine for now.  Eventually I'd like to implement `!Send` and
`!Sync`, though, to prevent references from escaping the thread (as noted in
the patch); I can't do that yet, since the feature has not yet been
stabalized.

In the end, this leaves us with a system that's much easier to use and
maintain; hopefully easier for newcomers to get into without having to deal
with so many complex lifetimes; and a nice API that makes it a pleasure to
work with symbols.

Admittedly, the `SymbolIndexSize` adds some complexity, and we'll see if I
end up regretting that down the line, but it exists for an important reason:
the `Span` and other structures that'll be introduced need to pack a lot of
data into 64 bits so they can be freely copied around to keep lifetimes
simple without wreaking havoc in other ways, but a 32-bit symbol size needed
by the linker is too large for that.  (Actually, the linker doesn't yet need
32 bits for our systems, but it's going to in the somewhat near future
unless we optimize away a bunch of symbols...but I'd really rather not have
the linker hit a limit that requires a lot of code changes to resolve).

Rustc uses interned spans when they exceed 8 bytes, but I'd prefer to avoid
that for now.  Most systems can just use on of the `PkgSymbolId` or
`ProgSymbolId` type aliases and not have to worry about it.  Systems that
are actually shared between the compiler and the linker do, though, but it's
not like we don't already have a bunch of trait bounds.

Of course, as we implement link-time optimizations (LTO) in the future, it's
possible most things will need the size and I'll grow frustrated with that
and possibly revisit this.  We shall see.

Anyway, this was exhausting...and...onward to the first frontend!

											
										
										
											2021-08-02 23:54:37 -04:00
+								    ///   be looked up by [`SymbolId`] or if they may not yet have been
-												TAMER: xmle output changes to support Summary Page

Co-Authored-By: Joseph Frazer <joseph.frazer@ryansg.com>

											
										
										
											2020-01-14 16:26:36 -05:00
+								    ///   declared.
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    pub fn add_dep<O: ObjectKind>(
 								        &mut self,
 								        identi: ObjectIndex<Ident>,
 								        depi: ObjectIndex<O>,
-												tamer: asg::graph: Static- and runtime-enforced multi-kind edge ontolgoy

This allows for edges to be multiple types, and gives us two important
benefits:

  (a) Compiler-verified correctness to ensure that we don't generate graphs
      that do not adhere to the ontology; and
  (b) Runtime verification of types, so that bugs are still memory safe.

There is a lot more information in the documentation within the patch.

This took a lot of iterating to get something that was tolerable.  There's
quite a bit of boilerplate here, and maybe that'll be abstracted away better
in the future as the graph grows.

In particular, it was challenging to determine how I wanted to actually go
about narrowing and looking up edges.  Initially I had hoped to represent
the subsets as `ObjectKind`s as well so that you could use them anywhere
`ObjectKind` was expected, but that proved to be far too difficult because I
cannot return a reference to a subset of `Object` (the value would be owned
on generation).  And while in a language like C maybe I'd pad structures and
cast between them safely, since they _do_ overlap, I can't confidently do
that here since Rust's discriminant and layout are not under my control.

I tried playing around with `std::mem::Discriminant` as well, but
`discriminant` (the function) requires a _value_, meaning I couldn't get the
discriminant of a static `Object` variant without some dummy value; wasn't
worth it over `ObjectRelTy.`  We further can't assign values to enum
variants unless they hold no data.  Rust a decade from now may be different
and will be interesting to look back on this struggle.

DEV-13597

											
										
										
											2023-01-23 11:40:10 -05:00
+								    ) where
 								        Ident: ObjectRelTo<O>,
 								    {
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								        self.graph.update_edge(
 								            identi.into(),
 								            depi.into(),
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								            (Ident::rel_ty(), O::rel_ty(), None),
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								        );
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    }
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
 								    /// Check whether `dep` is a dependency of `ident`.
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    #[inline]
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    pub fn has_dep(
 								        &self,
 								        ident: ObjectIndex<Ident>,
 								        dep: ObjectIndex<Ident>,
 								    ) -> bool {
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        self.graph.contains_edge(ident.into(), dep.into())
 								    }
-												TAMER: xmle output changes to support Summary Page

Co-Authored-By: Joseph Frazer <joseph.frazer@ryansg.com>

											
										
										
											2020-01-14 16:26:36 -05:00
 								    /// Declare that `dep` is a dependency of `ident`,
 								    ///   regardless of whether they are known.
 								    ///
 								    /// In contrast to [`add_dep`][Asg::add_dep],
 								    ///   this method will add the dependency even if one or both of `ident`
 								    ///   or `dep` have not yet been declared.
 								    /// In such a case,
-												TAMER: Make Asg generic over object

There's a lot here to make the object stored on the `Asg` generic.  This
introduces `ObjectState` for state transitions and `ObjectData` for pure
data retrieval.  This will allow not only for mocking, but will be useful to
enforce compile-time restrictions on the type of objects expected by the
linker vs. the compiler (e.g. the linker will not have expressions).

This commit intentionally leaves the corresponding tests in their original
location to prove that the functionality has not changed; they'll be moved
in a future commit.

This also leaves the names as "Object" to reduce the number the cognative
overhead of this commit.  It will be renamed to something like "IdentObject"
in the near future to clarify the intent of the current object type and to
open the way for expressions and a type that marries both of them in the
future.

Once all of this is done, we'll finally be able to make changes to the
compatibility logic in state transitions to implement extern compatibility
checks during resolution.

DEV-7087

											
										
										
											2020-03-14 00:10:03 -04:00
+								    ///   a missing identifier will be added as a placeholder,
-												TAMER: xmle output changes to support Summary Page

Co-Authored-By: Joseph Frazer <joseph.frazer@ryansg.com>

											
										
										
											2020-01-14 16:26:36 -05:00
+								    ///     allowing the ASG to be built with partial information as
 								    ///     identifiers continue to be discovered.
-												tamer: asg::Ident{Object=>}: Rename

I think this may have been renamed _from_ `Ident` some time ago, but I'm too
lazy to check.  In any case, the name is redundant.

DEV-11864

											
										
										
											2022-05-19 11:17:04 -04:00
+								    /// See [`Ident::declare`] for more information.
-												TAMER: xmle output changes to support Summary Page

Co-Authored-By: Joseph Frazer <joseph.frazer@ryansg.com>

											
										
										
											2020-01-14 16:26:36 -05:00
+								    ///
 								    /// References to both identifiers are returned in argument order.
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								    pub fn add_dep_lookup(
-												TAMER: xmle output changes to support Summary Page

Co-Authored-By: Joseph Frazer <joseph.frazer@ryansg.com>

											
										
										
											2020-01-14 16:26:36 -05:00
+								        &mut self,
-												tamer: asg: Associate spans with identifiers and introduce diagnostics

This ASG implementation is a refactored form of original code from the
proof-of-concept linker, which was well before the span and diagnostic
implementations, and well before I knew for certain how I was going to solve
that problem.

This was quite the pain in the ass, but introduces spans to the AIR tokens
and graph so that we always have useful diagnostic information.  With that
said, there are some important things to note:

  1. Linker spans will originate from the `xmlo` files until we persist
     spans to those object files during `tamec`'s compilation.  But it's
     better than nothing.
  2. Some additional refactoring is still needed for consistency, e.g. use
     of `SPair`.
  3. This is just a preliminary introduction.  More refactoring will come as
     tamec is continued.

DEV-13041

											
										
										
											2022-12-15 12:07:58 -05:00
+								        ident: SPair,
 								        dep: SPair,
-												tamer: asg::object::Object{Ref=>Index}: Associate object type

This makes the system a bit more ergonomic and introduces additional type
safety by associating the narrowed object type with the
`ObjectIndex` (previously `ObjectRef`).  Not only does this allow us to
explicitly state the type of object wherever those indices are stored, but
it also allows the API to automatically narrow to that type when operating
on it again without the caller having to worry about it.

DEV-13160

											
										
										
											2022-12-22 14:24:40 -05:00
+								    ) -> (ObjectIndex<Ident>, ObjectIndex<Ident>) {
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								        let identi = self.lookup_or_missing(ident);
 								        let depi = self.lookup_or_missing(dep);
-												TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).

											
										
										
											2020-01-12 22:59:16 -05:00
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								        self.graph.update_edge(
 								            identi.into(),
 								            depi.into(),
-												tamer: asg::graph: Formalize dynamic relationships (edges)

The `TreePreOrderDfs` iterator needed to expose additional edge context to
the caller (specifically, the `Span`).  This was getting a bit messy, so
this consolodates everything into a new `DynObjectRel`, which also
emphasizes that it is in need of narrowing.

Packing everything up like that also allows us to return more information to
the caller without complicating the API, since the caller does not need to
be concerned with all of those values individually.

Depth is kept separate, since that is a property of the traversal and is not
stored on the graph.  (Rather, it _is_ a property of the graph, but it's not
calculated until traversal.  But, depth will also vary for a given node
because of cross edges, and so we cannot store any concrete depth on the
graph for a given node.  Not even a canonical one, because once we start
doing inlining and common subexpression elimination, there will be shared
edges that are _not_ cross edges (the node is conceptually part of _both_
trees).  Okay, enough of this rambling parenthetical.)

DEV-13708

											
										
										
											2023-02-09 13:11:27 -05:00
+								            (Ident::rel_ty(), Ident::rel_ty(), None),
-												tamer: asg: Introduce edge from Package to Ident

Included in this diff are the corresponding changes to the graph to support
the change.  Adding the edge was easy, but we also need a way to get the
package for an identifier.  The easiest way to do that is to modify the edge
weight to include not just the target node type, but also the source.

DEV-13159

											
										
										
											2023-01-31 16:37:25 -05:00
+								        );
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
 								        (identi, depi)
 								    }
 								}
-												[DEV-8000] ir::asg: Introduce SortableAsgError

This will be used for the next commit, but this change has been isolated
both because it distracts from the implementation change in the next commit,
and because it cleans up the code by removing the need for a type parameter
on `AsgError`.

Note that the sort test cases now use `unwrap` instead of having
`{,Sortable}AsgError` support one or the other---this is because that does
not currently happen in practice, and there is not supposed to be a
hierarchy; they are siblings (though perhaps their name may imply otherwise).

											
										
										
											2020-07-01 13:38:01 -04:00
-												tamer: asg::air::AirAggregate: Initial impl of nested exprs

This introduces a number of concepts together, again to demonstrate that
they were derived.

This introduces support for nested expressions, extending the previous
work.  It also supports error recovery for dangling expressions.

The parser states are a mess; there is a lot of duplicate code here that
needs refactoring, but I wanted to commit this first at a known-good state
so that the diff will demonstrate the need for the change that will
follow; the opportunities for abstraction are plainly visible.

The immutable stack introduced here could be generalized, if needed, in the
future.

Another important note is that Rust optimizes away the `memcpy`s for the
stack that was introduced here.  The initial Parser Context was introduced
because of `ArrayVec` inhibiting that elision, but Vec never had that
problem.  In the future, I may choose to go back and remove ArrayVec, but I
had wanted to keep memory allocation out of the picture as much as possible
to make the disassembly and call graph easier to reason about and to have
confidence that optimizations were being performed as intended.

With that said---it _should_ be eliding in tamec, since we're not doing
anything meaningful yet with the graph.  It does also elide in tameld, but
it's possible that Rust recognizes that those code paths are never taken
because tameld does nothing with expressions.  So I'll have to monitor this
as I progress and adjust accordingly; it's possible a future commit will
call BS on everything I just said.

Of course, the counter-point to that is that Rust is optimizing them away
anyway, but Vec _does_ still require allocation; I was hoping to keep such
allocation at the fringes.  But another counter-point is that it _still_ is
allocated at the fringe, when the context is initialized for the parser as
part of the lowering pipeline.  But I didn't know how that would all come
together back then.

...alright, enough rambling.

DEV-13160

											
										
										
											2023-01-05 15:57:06 -05:00
+								fn diagnostic_node_missing_desc<O: ObjectKind>(
 								    index: ObjectIndex<O>,
 								) -> Vec<AnnotatedSpan<'static>> {
 								    vec![
 								        index.internal_error("this object is missing from the ASG"),
 								        index.help("this means that either an ObjectIndex was malformed, or"),
 								        index.help("  the object no longer exists on the graph, both of"),
 								        index.help("  which are unexpected and possibly represent data"),
 								        index.help("  corruption."),
 								        index.help("The system cannot proceed with confidence."),
 								    ]
 								}
 								fn diagnostic_opaque_ident_desc(ident: SPair) -> Vec<AnnotatedSpan<'static>> {
 								    vec![
 								        ident.internal_error(
 								            "this identifier is not bound to any object on the ASG",
 								        ),
 								        ident.help("the system expects to be able to reach the object that"),
 								        ident.help("  this identifies, but this identifier has no"),
 								        ident.help("  corresponding object present on the graph."),
 								    ]
 								}
-												tamer: asg: Remove generic Asg, rename {Base=>}Asg

This is the beginning of an incremental refactoring to remove generics, to
simplify the ASG.  When I initially wrote the linker, I wasn't sure what
direction I was going in, but I was also negatively influenced by more
traditional approaches to both design and unit testing.

If we're going to call the ASG an IR, then it needs to be one---if the core
of the IR is generic, then it's more like an abstract data structure than
anything.  We can abstract around the IR to slice it up into components that
are a little easier to reason about and understand how responsibilities are
segregated.

DEV-11864

											
										
										
											2022-05-11 16:38:59 -04:00
+								#[cfg(test)]
-												tamer: asg::graph::test: Extract into own file

DEV-13597

											
										
										
											2023-01-17 21:57:50 -05:00
+								mod test;