TAMER: Initial abstract semantic graph (ASG)

This begins to introduce the ASG, backed by Petgraph.  The API will continue
to evolve, and Petgraph will likely be encapsulated so that our
implementation can vary independently from it (or even remove it in the
future).
master
Mike Gerwitz 2020-01-12 22:59:16 -05:00
parent f177b6ae5d
commit bcc2ab1221
11 changed files with 1433 additions and 249 deletions

View File

@ -0,0 +1,81 @@
// Global constants across the entirety of TAMER
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! System-wide static configuration.
//!
//! This module provides a system-wide configuration.
//! Subsystems should reference these values rather than defining their own
//! and risk incompatibilities or maintenance issues as requirements
//! change.
//!
//! By convention,
//! import this entire module rather than individual members and reference
//! them as `global::foo` to emphasize their nature and risk.
use std::num;
/// A size capable of representing every interned string in a package.
pub type PkgSymSize = u16;
/// A non-zero equivalent of [`PkgSymSize`];
pub type NonZeroPkgSymSize = num::NonZeroU16;
/// A size capable of representing every interned string in a program.
pub type ProgSymSize = u32;
/// A non-zero equivalent of [`ProgSymSize`];
pub type NonZeroProgSymSize = num::NonZeroU32;
/// A size capable of representing indexes of each individual identifier
/// within a single package.
///
/// Note that,
/// since TAME is a metalanguage and can easily expand into a great
/// deal of code,
/// this must accommodate far more than the user's expectations
/// working within the provided level of abstraction.
///
/// This must be ≥ [`PkgSymSize`].
pub type PkgIdentSize = u16;
/// A size capable of representing every individual identifier and
/// expression within a single package.
///
/// Note that,
/// since TAME is a metalanguage and can easily expand into a great
/// deal of code,
/// this must accommodate far more than the user's expectations
/// working within the provided level of abstraction.
pub type PkgIdentExprSize = u32;
/// A size capable of representing the union of every identifier of every
/// package used by an entire program.
///
/// This must be ≥ [`ProgSymSize`].
pub type ProgIdentSize = u32;
/// A size capable of representing the union of every identifier and every
/// expression of every package used by an entire program.
///
/// Note that,
/// since TAME is a metalanguage and can easily expand into a great
/// deal of code,
/// this must accommodate far more than the user's expectations
/// working within the provided level of abstraction.
///
/// This must be ≥ [`ProgSymSize`].
pub type ProgIdentExprSize = u32;

View File

@ -0,0 +1,420 @@
// Concrete ASG
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Base concrete [`Asg`] implementation.
use super::graph::{Asg, AsgEdge, AsgError, AsgResult, Node, ObjectRef};
use super::ident::IdentKind;
use super::object::{FragmentText, Object};
use crate::sym::Symbol;
use fixedbitset::FixedBitSet;
use petgraph::graph::{
DiGraph, EdgeIndex, Graph, IndexType, Neighbors, NodeIndex,
};
use petgraph::visit::{GraphBase, IntoNeighbors, Visitable};
/// Concrete ASG.
///
/// This implementation is currently based on [`petgraph`].
///
/// Identifiers are cached by name for `O(1)` lookup.
/// Since [`SymbolIndex`][crate::sym::SymbolIndex] is used for this purpose,
/// the index may contain more entries than nodes and may contain gaps.
///
/// For more information,
/// see [`Asg`].
pub struct BaseAsg<'i, Ix: IndexType> {
/// Directed graph on which objects are stored.
graph: DiGraph<Node<'i>, AsgEdge, Ix>,
/// Map of [`SymbolIndex`][crate::sym::SymbolIndex] to node indexes.
///
/// This allows for `O(1)` lookup of identifiers in the graph.
/// Note that,
/// while we store [`NodeIndex`] internally,
/// the public API encapsulates it within an [`ObjectRef`].
index: Vec<NodeIndex<Ix>>,
/// Empty node indicating that no object exists for a given index.
empty_node: NodeIndex<Ix>,
}
impl<'i, Ix> BaseAsg<'i, Ix>
where
Ix: IndexType,
{
/// Create an ASG with the provided initial capacity.
///
/// The value for `objects` will be used as the capacity for the nodes
/// in the graph,
/// as well as the initial index capacity.
/// The value for `edges` may be more difficult to consider,
/// since edges are used to represent various relationships between
/// different types of objects,
/// but it's safe to say that each object will have at least one
/// edge to another object.
///
/// A basic `new` method is not provided to ensure that callers consider
/// capacity during construction,
/// since graphs can get quite large.
pub fn with_capacity(objects: usize, edges: usize) -> Self {
let mut graph = Graph::with_capacity(objects, edges);
let mut index = Vec::with_capacity(objects);
// Exhaust the first index to be used as a placeholder.
let empty_node = graph.add_node(Some(Object::Empty));
index.push(empty_node);
Self {
graph,
index,
empty_node,
}
}
/// Index the provided symbol `name` as representing the identifier `node`.
///
/// This index permits `O(1)` identifier lookups.
///
/// After an identifier is indexed it is not expected to be reassigned
/// to another node.
/// Debug builds contain an assertion that will panic in this instance.
///
/// Panics
/// ======
/// Will panic if unable to allocate more space for the index.
fn index_identifier(&mut self, name: &'i Symbol<'i>, node: NodeIndex<Ix>) {
let i: usize = name.index().into();
if i >= self.index.len() {
// If this is ever a problem we can fall back to usize max and
// re-compare before panicing
let new_size = (i + 1)
.checked_next_power_of_two()
.expect("internal error: cannot allocate space for ASG index");
self.index.resize(new_size, self.empty_node);
}
// We should never overwrite indexes
debug_assert!(self.index[i] == self.empty_node);
self.index[i] = node;
}
}
impl<'i, Ix> Asg<'i, Ix> for BaseAsg<'i, Ix>
where
Ix: IndexType,
{
fn declare(
&mut self,
name: &'i Symbol<'i>,
kind: IdentKind,
) -> AsgResult<ObjectRef<Ix>> {
// TODO: src check
if let Some(existing) = self.lookup(name) {
return Ok(existing);
}
let node = self.graph.add_node(Some(Object::Ident(name, kind)));
self.index_identifier(name, node);
Ok(ObjectRef(node))
}
fn declare_extern(
&mut self,
name: &'i Symbol<'i>,
expected_kind: IdentKind,
) -> AsgResult<ObjectRef<Ix>> {
// TODO: resolution!
let node = self
.graph
.add_node(Some(Object::Extern(name, expected_kind)));
self.index_identifier(name, node);
Ok(ObjectRef(node))
}
fn set_fragment(
&mut self,
identi: ObjectRef<Ix>,
text: FragmentText,
) -> AsgResult<ObjectRef<Ix>> {
// This should _never_ happen as long as you're only using ObjectRef
// values produced by these methods.
let node = self
.graph
.node_weight_mut(identi.0)
.expect("internal error: BaseAsg::set_fragment bogus identi");
// This should also never happen, since we immediately repopulate
// the node below.
let ty = node
.take()
.expect("internal error: BaseAsg::set_fragment missing Node data");
let result = match ty {
Object::Ident(sym, kind) => {
Ok(Object::IdentFragment(sym, kind, text))
}
_ => {
let err = Err(AsgError::BadFragmentDest(format!(
"identifier is not a Object::Ident): {:?}",
ty,
)));
node.replace(ty);
err
}
}?;
node.replace(result);
Ok(identi)
}
#[inline]
fn get<I: Into<ObjectRef<Ix>>>(&self, index: I) -> Option<&Object<'i>> {
self.graph.node_weight(index.into().0).map(|node| {
node.as_ref()
.expect("internal error: BaseAsg::get missing Node data")
})
}
#[inline]
fn lookup(&self, name: &'i Symbol<'i>) -> Option<ObjectRef<Ix>> {
let i: usize = name.index().into();
self.index
.get(i)
.filter(|ni| ni.index() > 0)
.map(|ni| ObjectRef(*ni))
}
fn add_dep(&mut self, identi: ObjectRef<Ix>, depi: ObjectRef<Ix>) {
self.graph.update_edge(identi.0, depi.0, Default::default());
}
#[inline]
fn has_dep(&self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>) -> bool {
self.graph.contains_edge(ident.0, dep.0)
}
}
// TODO: encapsulate Petgraph API (N.B. this is untested!)
impl<'i, Ix> Visitable for BaseAsg<'i, Ix>
where
Ix: IndexType,
{
type Map = FixedBitSet;
fn visit_map(&self) -> Self::Map {
self.graph.visit_map()
}
fn reset_map(&self, map: &mut Self::Map) {
self.graph.reset_map(map)
}
}
impl<'i, Ix> GraphBase for BaseAsg<'i, Ix>
where
Ix: IndexType,
{
type NodeId = NodeIndex<Ix>;
type EdgeId = EdgeIndex<Ix>;
}
impl<'a, 'i, Ix> IntoNeighbors for &'a BaseAsg<'i, Ix>
where
Ix: IndexType,
{
type Neighbors = Neighbors<'a, AsgEdge, Ix>;
fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
self.graph.neighbors(n)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::sym::SymbolIndex;
type Sut<'i> = BaseAsg<'i, u8>;
#[test]
fn create_with_capacity() {
let node_capacity = 100;
let edge_capacity = 300;
let sut = Sut::with_capacity(node_capacity, edge_capacity);
// breaks encapsulation to introspect; the behavior is
// transparent to callers (aside from performance
// characteristics)
let (nc, ec) = sut.graph.capacity();
assert!(nc >= node_capacity);
assert!(ec >= edge_capacity);
assert!(sut.index.capacity() >= node_capacity);
}
#[test]
fn declare_new_unique_idents() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
// NB: The index ordering is important! We first use a larger
// index to create a gap, and then use an index within that gap
// to ensure that it's not considered an already-defined
// identifier.
let syma = Symbol::new_dummy(SymbolIndex::from_u32(5), "syma");
let symb = Symbol::new_dummy(SymbolIndex::from_u32(1), "symab");
let nodea = sut.declare(&syma, IdentKind::Meta)?;
let nodeb = sut.declare(&symb, IdentKind::Worksheet)?;
assert_ne!(nodea, nodeb);
assert_eq!(
Some(&Object::Ident(&syma, IdentKind::Meta)),
sut.get(nodea),
);
assert_eq!(
Some(&Object::Ident(&symb, IdentKind::Worksheet)),
sut.get(nodeb),
);
Ok(())
}
#[test]
fn lookup_by_symbol() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "lookup");
let node = sut.declare(&sym, IdentKind::Meta)?;
assert_eq!(Some(node), sut.lookup(&sym));
Ok(())
}
#[test]
fn declare_extern() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "extern");
let node = sut.declare_extern(&sym, IdentKind::Meta)?;
assert_eq!(Some(&Object::Extern(&sym, IdentKind::Meta)), sut.get(node),);
Ok(())
}
// TODO: incompatible
#[test]
fn declare_returns_existing_compatible() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "symdup");
let node = sut.declare(&sym, IdentKind::Meta)?;
// Same declaration a second time
let redeclare = sut.declare(&sym, IdentKind::Meta)?;
assert_eq!(node, redeclare);
Ok(())
}
#[test]
fn add_fragment_to_ident() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "tofrag");
let node = sut.declare(&sym, IdentKind::Meta)?;
let fragment = "a fragment".to_string();
let node_with_frag = sut.set_fragment(node, fragment.clone())?;
// Attaching a fragment should _replace_ the node, not create a
// new one
assert_eq!(
node, node_with_frag,
"fragment node does not match original node"
);
assert_eq!(
Some(&Object::IdentFragment(&sym, IdentKind::Meta, fragment)),
sut.get(node)
);
Ok(())
}
#[test]
fn add_fragment_to_fragment_fails() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let node = sut.declare(&sym, IdentKind::Meta)?;
let fragment = "orig fragment".to_string();
sut.set_fragment(node, fragment.clone())?;
// Since it's already a fragment, this should fail.
let err = sut
.set_fragment(node, "replacement".to_string())
.expect_err("Expected failure");
match err {
AsgError::BadFragmentDest(str) if str.contains("sym") => (),
_ => panic!("expected AsgError::BadFragmentDest: {:?}", err),
}
// Make sure we didn't leave the node in an inconsistent state
assert_eq!(
Some(&Object::IdentFragment(&sym, IdentKind::Meta, fragment)),
sut.get(node)
);
Ok(())
}
#[test]
fn add_ident_dep_to_ident() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let dep = Symbol::new_dummy(SymbolIndex::from_u32(1), "dep");
let symnode = sut.declare(&sym, IdentKind::Meta)?;
let depnode = sut.declare(&dep, IdentKind::Meta)?;
sut.add_dep(symnode, depnode);
assert!(sut.has_dep(symnode, depnode));
// sanity check if we re-add a dep
sut.add_dep(symnode, depnode);
assert!(sut.has_dep(symnode, depnode));
Ok(())
}
}

View File

@ -0,0 +1,219 @@
// Graph abstraction
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Abstract graph as the basis for concrete ASGs.
use super::ident::IdentKind;
use super::object::{FragmentText, Object};
use crate::sym::Symbol;
use petgraph::graph::{IndexType, NodeIndex};
use std::result::Result;
/// An abstract semantic graph of [objects][Object].
///
/// This IR focuses on the definition and manipulation of objects and their
/// dependencies.
/// See [`Object`] for a summary of valid object state transitions.
///
/// Objects are never deleted from the graph,
/// so [`ObjectRef`]s will remain valid for the lifetime of the ASG.
///
/// For more information,
/// see the [module-level documentation][self].
pub trait Asg<'i, Ix: IndexType> {
/// Declare a concrete identifier.
///
/// An identifier declaration is similar to a declaration in a header
/// file in a language like C,
/// describing the structure of the identifier.
/// Once declared,
/// this information cannot be changed.
///
/// Identifiers are uniquely identified by a [`Symbol`] `name`.
/// If an identifier of the same `name` already exists,
/// then the provided declaration is compared against the existing
/// declaration---should
/// they be incompatible,
/// then the operation will fail;
/// otherwise,
/// the existing identifier will be returned.
/// A successful declaration will add a [`Object::Ident`] to the graph
/// and return an [`ObjectRef`] reference.
///
/// If an existing identifier is an extern (see
/// [`Asg::declare_extern`]),
/// then the declaration will be compared just the same,
/// but the identifier will be converted from a
/// [`Object::Extern`] into a [`Object::Ident`].
/// When this happens,
/// the extern is said to be _resolved_.
fn declare(
&mut self,
name: &'i Symbol<'i>,
kind: IdentKind,
) -> AsgResult<ObjectRef<Ix>>;
/// Declare an abstract identifier.
///
/// An _extern_ declaration declares an identifier the same as
/// [`Asg::declare`],
/// but instead as [`Object::Extern`].
/// Externs are identifiers that are expected to be defined somewhere
/// else ("externally"),
/// and are resolved at [link-time][crate::ld].
///
/// If a concrete identifier has already been declared (see
/// [`Asg::declare`]),
/// then the declarations will be compared and,
/// if compatible,
/// the identifier will be immediately _resolved_ and the object
/// on the graph will not be altered.
/// Resolution will otherwise fail in error.
fn declare_extern(
&mut self,
name: &'i Symbol<'i>,
expected_kind: IdentKind,
) -> AsgResult<ObjectRef<Ix>>;
/// Set the fragment associated with a concrete identifier.
///
/// This changes the type of the identifier from [`Object::Ident`]
/// into [`Object::IdentFragment`],
/// which is intended for use by the [linker][crate::ld].
fn set_fragment(
&mut self,
identi: ObjectRef<Ix>,
text: FragmentText,
) -> AsgResult<ObjectRef<Ix>>;
/// Retrieve an object from the graph by [`ObjectRef`].
///
/// Since an [`ObjectRef`] should only be produced by an [`Asg`],
/// and since objects are never deleted from the graph,
/// this should never fail so long as references are not shared
/// between multiple graphs.
/// It is nevertheless wrapped in an [`Option`] just in case.
fn get<I: Into<ObjectRef<Ix>>>(&self, index: I) -> Option<&Object<'i>>;
/// Attempt to retrieve an identifier from the graph by name.
///
/// Since only identifiers carry a name,
/// this method cannot be used to retrieve all possible objects on the
/// graph---for
/// that, see [`Asg::get`].
fn lookup(&self, name: &'i Symbol<'i>) -> Option<ObjectRef<Ix>>;
/// Declare that `dep` is a dependency of `ident`.
///
/// An object must be declared as a dependency if its value must be
/// computed before computing the value of `ident`.
/// The [linker][crate::ld] will ensure this ordering.
fn add_dep(&mut self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>);
/// Check whether `dep` is a dependency of `ident`.
fn has_dep(&self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>) -> bool;
}
/// A [`Result`] with a hard-coded [`AsgError`] error type.
///
/// This is the result of every [`Asg`] operation that could potentially
/// fail in error.
pub type AsgResult<T> = Result<T, AsgError>;
/// Reference to an [object][Object] stored within the [`Asg`].
///
/// Object references are integer offsets,
/// not pointers.
/// See the [module-level documentation][self] for more information.
#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
pub struct ObjectRef<Ix>(pub NodeIndex<Ix>);
impl<Ix> From<NodeIndex<Ix>> for ObjectRef<Ix>
where
Ix: IndexType,
{
fn from(index: NodeIndex<Ix>) -> Self {
Self(index)
}
}
impl<Ix> From<ObjectRef<Ix>> for NodeIndex<Ix>
where
Ix: IndexType,
{
fn from(objref: ObjectRef<Ix>) -> Self {
objref.0
}
}
/// There are currently no data stored on edges ("edge weights").
pub type AsgEdge = ();
/// Each node of the graph represents an object.
///
/// Enclosed in an [`Option`] to permit moving owned values out of the
/// graph.
pub type Node<'i> = Option<Object<'i>>;
/// An error from an ASG operation.
///
/// Storing [`Symbol`] would require that this have a lifetime,
/// which is very inconvenient when chaining [`Result`],
/// so this stores only owned values.
/// The caller will know the problem values.
#[derive(Debug, PartialEq)]
pub enum AsgError {
/// The provided identifier is not in a state that is permitted to
/// receive a fragment.
///
/// See [`Asg::set_fragment`] for more information.
BadFragmentDest(String),
}
impl std::fmt::Display for AsgError {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::BadFragmentDest(msg) => {
write!(fmt, "bad fragment destination: {}", msg)
}
}
}
}
impl std::error::Error for AsgError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
None
}
}
#[cfg(test)]
mod test {
use super::*;
mod objref {
use super::*;
#[test]
fn to_from_nodeindex() {
let index = NodeIndex::<u32>::new(5);
let objref: ObjectRef<u32> = ObjectRef::from(index);
assert_eq!(index, objref.0);
assert_eq!(index, objref.into());
}
}
}

View File

@ -0,0 +1,333 @@
// ASG identifiers
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Identifiers (a type of [object][super::object::Object]).
use crate::ir::legacyir::{SymAttrs, SymDtype, SymType};
use std::convert::TryFrom;
/// Types of identifiers.
///
/// Here, the term _calculation_ refers to a composable expression that
/// produces a numeric result.
///
/// These are derived from [`legacyir::SymType`][crate::ir::legacyir::SymType]
/// and will be generalized in the future.
#[derive(Debug, PartialEq, Eq)]
pub enum IdentKind {
/// Classification generator.
///
/// This has the same number of dimensions as its highest-dimension
/// predicate.
/// Every [`Class`][IdentKind::Class] has an associated generator.
Cgen(Dim),
/// Boolean classification.
///
/// This is an artifact of an ancient system.
/// The dimensions here refers to the dimensions of the associated
/// [`Cgen`][IdentKind::Cgen].
Class(Dim),
/// Constant value.
Const(Dim, DataType),
/// Re-usable encapsulated expression.
///
/// Functions are nothing more than expressions that can be re-used with
/// dynamic values at runtime.
/// See also [`Lparam`][IdentKind::Lparam].
Func(Dim, DataType),
/// Generating calculation.
///
/// Generators are associated with iterative expressions,
/// such as sums and products.
/// They always have a parent [`Rate`][IdentKind::Rate].
Gen(Dim, DataType),
/// Local (non-global) parameter.
///
/// Local parameters are lexically scoped to their parent expression:
/// - [`Func`][IdentKind::Func], where there exists one per defined
/// function parameter; and
/// - `let` expression bindings.
///
/// This is not to be confused with the global
/// [`Param`][IdentKind::Param].
Lparam(Dim, DataType),
/// Global parameter.
///
/// These parameters serve as inputs to the system.
/// Input values are bound using [`Map`][IdentKind::Map].
Param(Dim, DataType),
/// Scalar result of a named calculation.
///
/// The verb "rate" is historical,
/// since TAME was developed for insurance rating systems.
/// This represents a named expression that yields a scalar value.
///
/// This serves as a parent to [`Gen`][IdentKind::Gen].
Rate(DataType),
/// Template definition.
///
/// A template is used only at expansion-time and,
/// unlike most other things in the system,
/// have no runtime value.
Tpl,
/// User-defined data type.
///
/// The only types typically defined are enums and unions of enums.
/// The type itself has no runtime value,
/// but each of the enum variants have an associated value of type
/// [`DataType`].
Type(DataType),
/// Input map head (meta identifier generated by compiler for each input
/// map).
MapHead,
/// Input field→param mapping.
///
/// These may only map to [`Param`][IdentKind::Param].
/// The source data is arbitrary and provided at runtime.
Map,
/// Input map tail (meta symbol generated by compiler for each input
/// map).
MapTail,
/// Return map head (meta symbol generated by compiler for each return
/// map).
RetMapHead,
/// Return param→field mapping.
///
/// Return mappings export data to calling systems.
/// They can map back any globally defined numeric expression.
RetMap,
/// Return map tail (meta symbol generated by compiler for each return
/// map).
RetMapTail,
/// Arbitrary metadata.
///
/// This permits the definition of static key/value data that is
/// compiled into the final executable.
Meta,
/// Rating worksheet (generated by compiler for worksheet packages).
///
/// The worksheet exposes intermediate calculation values in a much more
/// concise form than that of the Summary Page.
Worksheet,
}
impl<'i> TryFrom<SymAttrs<'i>> for IdentKind {
type Error = &'static str;
/// Attempt to raise [`SymAttrs`] into an [`IdentKind`].
///
/// Certain [`IdentKind`] require that certain attributes be present,
/// otherwise the conversion will fail.
fn try_from(attrs: SymAttrs<'i>) -> Result<Self, Self::Error> {
let ty = attrs.ty.as_ref().ok_or("missing symbol type")?;
macro_rules! ident {
($to:expr) => {
Ok($to)
};
($to:expr, dim) => {
Ok($to(Dim(attrs.dim.ok_or("missing dim")?)))
};
($to:expr, dtype) => {
Ok($to(attrs.dtype.ok_or("missing dtype")?))
};
($to:expr, dim, dtype) => {
Ok($to(
Dim(attrs.dim.ok_or("missing dim")?),
attrs.dtype.ok_or("missing dtype")?,
))
};
}
match ty {
SymType::Cgen => ident!(Self::Cgen, dim),
SymType::Class => ident!(Self::Class, dim),
SymType::Const => ident!(Self::Const, dim, dtype),
SymType::Func => ident!(Self::Func, dim, dtype),
SymType::Gen => ident!(Self::Gen, dim, dtype),
SymType::Lparam => ident!(IdentKind::Lparam, dim, dtype),
SymType::Param => ident!(IdentKind::Param, dim, dtype),
SymType::Rate => ident!(IdentKind::Rate, dtype),
SymType::Tpl => ident!(IdentKind::Tpl),
SymType::Type => ident!(IdentKind::Type, dtype),
SymType::MapHead => ident!(IdentKind::MapHead),
SymType::Map => ident!(IdentKind::Map),
SymType::MapTail => ident!(IdentKind::MapTail),
SymType::RetMapHead => ident!(IdentKind::RetMapHead),
SymType::RetMap => ident!(IdentKind::RetMap),
SymType::RetMapTail => ident!(IdentKind::RetMapTail),
SymType::Meta => ident!(IdentKind::Meta),
SymType::Worksheet => ident!(IdentKind::Worksheet),
}
}
}
/// Identifier dimensions.
///
/// This determines the number of subscripts needed to access a scalar
/// value.
/// A value of `0` indicates a scalar;
/// a value of `1` indicates a vector;
/// a value of `2` indicates a matrix;
/// and a value of `n` indicates a multi-dimensional array of
/// depth `n`.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct Dim(u8);
/// Underlying datatype of identifier.
pub type DataType = SymDtype;
#[cfg(test)]
mod test {
use super::*;
use std::convert::TryInto;
macro_rules! test_kind {
($name:ident, $src:expr => $dest:expr) => {
#[test]
fn $name() {
assert_eq!(
Ok($dest),
SymAttrs {
ty: Some($src),
..Default::default()
}
.try_into()
);
}
};
($name:ident, $src:expr => $dest:expr, dim) => {
#[test]
fn $name() {
let dim = 1;
assert_eq!(
Ok($dest(Dim(dim))),
SymAttrs {
ty: Some($src),
dim: Some(dim),
..Default::default()
}
.try_into()
);
// no dim
IdentKind::try_from(SymAttrs {
ty: Some($src),
..Default::default()
})
.expect_err("must fail when missing dim");
}
};
($name:ident, $src:expr => $dest:expr, dtype) => {
#[test]
fn $name() {
let dtype = SymDtype::Float;
assert_eq!(
Ok($dest(dtype)),
SymAttrs {
ty: Some($src),
dtype: Some(dtype),
..Default::default()
}
.try_into()
);
// no dtype
IdentKind::try_from(SymAttrs {
ty: Some($src),
..Default::default()
})
.expect_err("must fail when missing dtype");
}
};
($name:ident, $src:expr => $dest:expr, dim, dtype) => {
#[test]
fn $name() {
let dim = 1;
let dtype = SymDtype::Float;
assert_eq!(
Ok($dest(Dim(dim), dtype)),
SymAttrs {
ty: Some($src),
dim: Some(dim),
dtype: Some(dtype),
..Default::default()
}
.try_into()
);
// no dim
IdentKind::try_from(SymAttrs {
ty: Some($src),
dtype: Some(dtype),
..Default::default()
})
.expect_err("must fail when missing dim");
// no dtype
IdentKind::try_from(SymAttrs {
ty: Some($src),
dim: Some(dim),
..Default::default()
})
.expect_err("must fail when missing dtype");
}
};
}
test_kind!(cgen, SymType::Cgen => IdentKind::Cgen, dim);
test_kind!(class, SymType::Class => IdentKind::Class, dim);
test_kind!(r#const, SymType::Const => IdentKind::Const, dim, dtype);
test_kind!(func, SymType::Func => IdentKind::Func, dim, dtype);
test_kind!(gen, SymType::Gen => IdentKind::Gen, dim, dtype);
test_kind!(lparam, SymType::Lparam => IdentKind::Lparam, dim, dtype);
test_kind!(param, SymType::Param => IdentKind::Param, dim, dtype);
test_kind!(rate, SymType::Rate => IdentKind::Rate, dtype);
test_kind!(tpl, SymType::Tpl => IdentKind::Tpl);
test_kind!(r#type, SymType::Type => IdentKind::Type, dtype);
test_kind!(maphead, SymType::MapHead => IdentKind::MapHead);
test_kind!(map, SymType::Map => IdentKind::Map);
test_kind!(maptail, SymType::MapTail => IdentKind::MapTail);
test_kind!(retmaphead, SymType::RetMapHead => IdentKind::RetMapHead);
test_kind!(retmap, SymType::RetMap => IdentKind::RetMap);
test_kind!(retmaptail, SymType::RetMapTail => IdentKind::RetMapTail);
test_kind!(meta, SymType::Meta => IdentKind::Meta);
test_kind!(worksheet, SymType::Worksheet => IdentKind::Worksheet);
}

View File

@ -0,0 +1,144 @@
// Abstract semantic graph (ASG) intermediate representation (IR)
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Abstract semantic graph.
//!
//! The [abstract semantic graph][asg] (ASG) is an IR representing the
//! relationship between objects using a directed [graph][].
//! An _object_ is an identifier or expression.
//!
//! Since TAME is a declarative language,
//! the ASG does not represent control flow;
//! instead, it represents the relationship between objects and their
//! dependencies.
//! Control flow is determined solely by the [linker][crate::ld] based on
//! these dependencies.
//!
//! See [`crate::global`] for available index sizes depending on context.
//! For example,
//! a linker may choose to use [`crate::global::ProgIdentSize`];
//!
//!
//! Graph Structure
//! ===============
//! Each node (vector) in the graph represents an [object][Object],
//! such as an identifier or an expression.
//! Each directed edge `(A->B)` represents that `A` depends upon `B`.
//!
//! Graphs may contain cycles for recursive functions—that is,
//! TAME's ASG is _not_ a DAG.
//! Mutually recursive functions are therefore represented as
//! [strongly connected components][scc].
//!
//! [asg]: https://en.wikipedia.org/wiki/Abstract_semantic_graph
//! [graph]: https://en.wikipedia.org/wiki/Graph_(discrete_mathematics)
//! [scc]: https://en.wikipedia.org/wiki/Strongly_connected_component
//!
//! Each object may have a number of valid states;
//! see [`Object`] for valid object states and transitions.
//!
//!
//! How To Use
//! ==========
//! A suitable concrete [`Asg`] implementation is provided by
//! [`DefaultAsg`].
//!
//! ```
//! use tamer::global;
//! use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object};
//! use tamer::sym::{Interner, DefaultInterner};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Be sure to choose size and initial capacities appropriate for your
//! // situation.
//! let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
//!
//! let interner = DefaultInterner::new();
//! let identa_sym = interner.intern("identa");
//! let identb_sym = interner.intern("identb");
//!
//! let identa = asg.declare(identa_sym, IdentKind::Meta)?;
//! let identb = asg.declare_extern(identb_sym, IdentKind::Meta)?;
//!
//! assert_eq!(
//! Some(&Object::Extern(identb_sym, IdentKind::Meta)),
//! asg.get(identb),
//! );
//!
//! // Dependencies can be declared even if an identifier is
//! // unresolved. This declares `(identa)->(identb)`.
//! asg.add_dep(identa, identb);
//! assert!(asg.has_dep(identa, identb));
//!
//! // TODO: extern resolution
//!
//! // Identifiers are indexed by symbol name.
//! assert_eq!(Some(identa), asg.lookup(identa_sym));
//! #
//! # Ok(()) // main
//! # }
//! ```
//!
//! Fragments
//! ---------
//! A compiled fragment can be attached to any resolved identifier (see
//! [`Object::Ident`]) using [`Asg::set_fragment`].
//! Doing so changes the state of the identifier to [`Object::IdentFragment`],
//! and it is an error to attempt to overwrite that fragment once it is
//! set.
//!
//! ```
//! # use tamer::global;
//! # use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, FragmentText};
//! # use tamer::sym::{Interner, DefaultInterner};
//! #
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
//! # let interner = DefaultInterner::new();
//! #
//! // Fragments can be attached to resolved identifiers.
//! let ident = asg.declare(interner.intern("ident"), IdentKind::Meta)?;
//! asg.set_fragment(ident, FragmentText::from("test fragment"))?;
//!
//! assert_eq!(
//! Some(&Object::IdentFragment(
//! interner.intern("ident"),
//! IdentKind::Meta,
//! FragmentText::from("test fragment"),
//! )),
//! asg.get(ident),
//! );
//!
//! // But overwriting will fail
//! let bad = asg.set_fragment(ident, FragmentText::from("overwrite"));
//! assert!(bad.is_err());
//! #
//! # Ok(()) // main
//! # }
//! ```
mod base;
mod graph;
mod ident;
mod object;
pub use graph::{Asg, AsgResult, ObjectRef};
pub use ident::IdentKind;
pub use object::{FragmentText, Object};
/// Default concrete ASG implementation.
pub type DefaultAsg<'i, Ix> = base::BaseAsg<'i, Ix>;

View File

@ -0,0 +1,77 @@
// Objects represented on ASG
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Objects represented by the ASG.
//!
//! _This is a private module.
//! See [`super`] for available exports._
use super::ident::IdentKind;
use crate::sym::Symbol;
/// Type of object.
///
/// These types represent object states:
///
/// ```text
/// ((Empty)) -> (Extern) -> ((Ident)) -> ((IdentFragment)).
/// \ ^
/// \ /
/// `--------------------`
/// ```
///
/// The [`Empty`][Object::Empty] state is never directly accessable
/// through [`Asg`][super::Asg]'s public API,
/// as it represents the absence of an object at that node within the
/// ASG.
///
/// TODO: Source location (span; see Rustc).
#[derive(Debug, PartialEq)]
pub enum Object<'i> {
/// A resolved identifier.
///
/// This represents an identifier that has been declared with certain
/// type information.
Ident(&'i Symbol<'i>, IdentKind),
/// An identifier that has not yet been resolved.
///
/// Externs are upgraded to [`Object::Ident`] once an identifier of
/// the same name is loaded.
/// It is an error if the loaded identifier does not have a compatible
/// [`IdentKind`].
Extern(&'i Symbol<'i>, IdentKind),
/// Identifier with associated text.
///
/// Code fragments are portions of the target language associated with
/// an identifier.
/// They are produced by the compiler and it is the job of the
/// [linker][crate::ld] to put them into the correct order for the
/// final executable.
IdentFragment(&'i Symbol<'i>, IdentKind, FragmentText),
/// The empty node (default value for indexer).
///
/// This is not a valid state accessible via [`Asg`][super::Asg].
Empty,
}
/// Compiled fragment for identifier.
///
/// This represents the text associated with an identifier.
pub type FragmentText = String;

View File

@ -123,7 +123,8 @@ pub enum SymType {
Func,
/// Generator (from `lv:rate/@generates`).
Gen,
/// Local function parameter (from `lv:function/lv:param/@name`).
/// Local function parameter (from `lv:function/lv:param/@name`) or let
/// binding (from `lv:let/lv:values/lv:value/@name`).
Lparam,
/// Global parameter (from `lv:param/@name`).
Param,

View File

@ -18,7 +18,7 @@
//! Intermediate representations for TAME programs.
//!
//! [Intermediate representations][ir] (IRs) are data structures used to
//! represent source code in a manner most suitable for a particular phase
//! represent source data in a manner most suitable for a particular phase
//! of compilation.
//! A single IR may be used by multiple compilation phases,
//! or by multiple systems (e.g. various compilers or [linkers][]).
@ -26,16 +26,37 @@
//! [ir]: https://en.wikipedia.org/wiki/Intermediate_representation
//! [linkers]: crate::ld
//!
//! Each IR is responsible for raising lower-level IRs or source formats.
//!
//! Summary of IRs
//! --------------
//! Implicit AST
//! ============
//! Each input language begins as an [abstract syntax tree][ast] (AST),
//! produced by the parser.
//! For TAME languages that are XML-based,
//! the production of the AST is handled by [`quick_xml`],
//! and is effectively the same as the source XML.
//! There is no explicit data structure to represent the AST of XML
//! sources.
//!
//! [ast]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
//!
//!
//! Summary of IRs
//! ==============
//! There are currently two IRs:
//!
//! 1. **[Legacy IR](legacyir)** corresponds very closely to the structure
//! of [`xmlo` object files](super::obj::xmlo).
//! It contains a lot of cruft and will be replaced in the future with
//! a more suitable IR.
//! This stores very limited context for the information it provides,
//! so it must quickly translate it to a higher-level IR for further
//! processing before context is lost.
//! 2. The **[Abstract Semantic Graph (ASG)](asg)** is created from
//! lower-level IRs.
//! It stores relationships between identifiers and expressions within
//! a graph data structure,
//! and is capable of representing entire programs composed of many
//! different packages.
pub mod asg;
pub mod legacyir;

View File

@ -18,204 +18,26 @@
//! **This is a poorly-written proof of concept; do not use!** It has been
//! banished to its own file to try to make that more clear.
use crate::obj::xmlo::reader::{XmloEvent, XmloReader};
use crate::global;
use crate::ir::asg::IdentKind;
use crate::ir::asg::{Asg, DefaultAsg, Object, ObjectRef};
use crate::obj::xmlo::reader::{XmloError, XmloEvent, XmloReader};
use crate::sym::{DefaultInterner, Interner};
use fixedbitset::FixedBitSet;
use petgraph::graph::{DiGraph, EdgeIndex, Neighbors, NodeIndex};
use petgraph::visit::{DfsPostOrder, GraphBase, IntoNeighbors, Visitable};
use std::collections::hash_map::{Entry, Iter};
use petgraph::visit::DfsPostOrder;
use std::collections::{HashMap, HashSet};
use std::convert::TryInto;
use std::error::Error;
use std::fs;
use std::io::BufReader;
use std::ops::{Deref, Index};
use std::rc::Rc;
// The term "sym" is used throughout because it's easier to search for that
// in source code than "symbol", which is a generic term with many different
// meanings.
// if mutability is needed:
//#[derive(Debug)]
//struct SymRecord {
// data: SymData,
//
// // the idea is to keep the index encapsulated so that nothing else can
// // ever hold a reference to it, ensuring that it's freed when the node
// // is removed
// index: Rc<RefCell<Option<NodeIndex>>>,
//}
#[derive(Debug)]
struct SymData {
name: Rc<str>,
}
type DepGraphNode = SymEntry;
type DepGraphEdge = ();
struct DepGraph {
graph: DiGraph<DepGraphNode, DepGraphEdge>,
// serves as both a string internment system and graph indexer
index: HashMap<Rc<str>, SymRef>,
// if removals are permitted:
//index: HashMap<Rc<str>, Weak<RefCell<Option<NodeIndex>>>>,
}
// This encapsulates the underlying Graph to enforce certain
// assumptions. For example, we do not permit removing nodes because that
// would invalidate the NodeIndex reference in the index, which would then
// require workarounds like the commented-out code above and below.
//
// While Petgraph's use of indexes to represent graph and edge references
// makes it easy to bypass the borrow checker, it does just that---it's no
// different than a pointer reference (albeit guaranteed to safely reference
// a node rather than an arbitrary memory location) that can change out from
// under you at any moment. As such, much of the planning that went into
// this was determining how to best mitigate that.
//
// The linker has certain needs that may differ as the compiler evolves, so
// it may be desirable to permit deletions in the future. In the meantime,
// if a node needs to be deleted, we can simply remove all edges from it and
// possibly mark it in a way that states it was removed.
//
// This graph uses a separate map to serve a dual role: a string internment
// system and an indexer by symbol name. This will have to evolve in the
// future as the graph ends up containing more stuff.
//
// This is currently called a dependency graph, since that's what we're
// using it for, but in the future the compiler will also use it as an IR,
// so this will likely be renamed.
impl DepGraph {
fn new() -> Self {
Self {
// TODO: with_capacity
graph: DiGraph::new(),
index: HashMap::new(),
}
}
fn declare(&mut self, name: &str) -> SymRef {
match self.index.entry(name.into()) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
let entry = SymEntry::MissingSym {
name: Rc::clone(v.key()),
};
let index = SymRef(self.graph.add_node(entry));
v.insert(index);
index
}
}
}
// will not duplicate dependencies if they already exist
fn declare_dep(&mut self, symbol: SymRef, dep: SymRef) -> () {
self.graph.update_edge(*symbol, *dep, ());
}
fn lookup(&self, name: &str) -> Option<SymRef> {
self.index.get(name.into()).map(|index| *index)
}
fn index_iter(&self) -> Iter<Rc<str>, SymRef> {
self.index.iter()
}
// POC when removals were permitted:
//fn add_symbol(&mut self, sym: SymData) -> NodeIndex {
// let name = Rc::clone(&sym.name);
// let record = SymRecord { data: sym, index: Rc::new(RefCell::new(None)) };
// let index = self.graph.add_node(record);
// let index = Rc::downgrade(&self.graph[index].index);
// self.graph[index].index.replace(Some(index));
// self.index.insert(name, index);
// index
//}
}
impl GraphBase for DepGraph {
type NodeId = NodeIndex;
type EdgeId = EdgeIndex;
}
impl Visitable for DepGraph {
type Map = FixedBitSet;
fn visit_map(&self) -> Self::Map {
self.graph.visit_map()
}
fn reset_map(&self, map: &mut Self::Map) {
self.graph.reset_map(map)
}
}
impl<'a> IntoNeighbors for &'a DepGraph {
type Neighbors = Neighbors<'a, DepGraphEdge>;
fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
self.graph.neighbors(n)
}
}
impl Index<SymRef> for DepGraph {
type Output = DepGraphNode;
fn index(&self, index: SymRef) -> &Self::Output {
&self.graph[*index]
}
}
// TODO: we may not to allow this; using SymRef could be a means to
// guarantee that a lookup has occurred and that it actually exists. We
// don't need this if we set NodeId = SymRef in GraphBase, but that requires
// implementing other traits as well.
impl Index<NodeIndex> for DepGraph {
type Output = DepGraphNode;
fn index(&self, index: NodeIndex) -> &Self::Output {
&self.graph[index]
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
struct SymRef(NodeIndex);
impl From<SymRef> for NodeIndex {
fn from(symref: SymRef) -> Self {
*symref
}
}
impl From<NodeIndex> for SymRef {
fn from(index: NodeIndex) -> Self {
Self(index)
}
}
impl Deref for SymRef {
type Target = NodeIndex;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, PartialEq)]
enum SymEntry {
MissingSym { name: Rc<str> },
}
type LinkerAsg<'i> = DefaultAsg<'i, global::ProgIdentSize>;
type LinkerObjectRef = ObjectRef<global::ProgIdentSize>;
pub fn main() -> Result<(), Box<dyn Error>> {
let mut pkgs_seen = HashSet::<String>::new();
let mut fragments = HashMap::<&str, String>::new();
let mut depgraph = DepGraph::new();
let mut depgraph = LinkerAsg::with_capacity(65536, 65536);
let mut roots = Vec::new();
let interner = DefaultInterner::new();
let package_path = std::env::args().nth(1).expect("Missing argument");
@ -229,6 +51,7 @@ pub fn main() -> Result<(), Box<dyn Error>> {
&mut fragments,
&mut depgraph,
&interner,
&mut roots,
)?;
// println!(
@ -241,7 +64,14 @@ pub fn main() -> Result<(), Box<dyn Error>> {
// .collect::<Vec<_>>()
// );
let sorted = sort_deps(&depgraph);
roots.extend(
vec!["___yield", "___worksheet"]
.iter()
.map(|name| interner.intern(name))
.filter_map(|sym| depgraph.lookup(sym)),
);
let sorted = sort_deps(&depgraph, &roots);
println!("Sorted ({}): {:?}", sorted.len(), sorted);
@ -252,8 +82,9 @@ fn load_xmlo<'a, 'i, I: Interner<'i>>(
path_str: &'a str,
pkgs_seen: &mut HashSet<String>,
fragments: &mut HashMap<&'i str, String>,
depgraph: &mut DepGraph,
depgraph: &mut LinkerAsg<'i>,
interner: &'i I,
roots: &mut Vec<LinkerObjectRef>,
) -> Result<(), Box<dyn Error>> {
let path = fs::canonicalize(path_str)?;
let path_str = path.to_str().unwrap();
@ -269,41 +100,97 @@ fn load_xmlo<'a, 'i, I: Interner<'i>>(
let file = fs::File::open(&path)?;
let reader = BufReader::new(file);
let mut xmlo = XmloReader::new(reader, interner);
let mut elig = None;
loop {
match xmlo.read_event()? {
XmloEvent::Package(_) => {}
match xmlo.read_event() {
Ok(XmloEvent::Package(attrs)) => {
elig = attrs.elig;
}
XmloEvent::SymDeps(sym, deps) => {
Ok(XmloEvent::SymDeps(sym, deps)) => {
// TODO: API needs to expose whether a symbol is already
// known so that we can warn on them
//
// note: using from_utf8_unchecked here did _not_ improve
// performance
let sym_node = depgraph.declare(sym);
let sym_node = depgraph
.lookup(sym)
.expect(&format!("missing sym for deps: `{}`", sym));
for dep_sym in deps {
let dep_node = depgraph.declare(dep_sym);
depgraph.declare_dep(sym_node, dep_node);
let dep_node = depgraph.lookup(dep_sym).expect(&format!(
"missing dep sym for deps: `{}` -> `{}`",
sym, dep_sym
));
depgraph.add_dep(sym_node, dep_node);
}
}
XmloEvent::SymDecl(_sym, attrs) => {
Ok(XmloEvent::SymDecl(sym, attrs)) => {
if let Some(sym_src) = attrs.src {
found.insert(sym_src);
}
let owned = attrs.src.is_none();
let kind = attrs.try_into().map_err(|err| {
format!("sym `{}` attrs error: {}", sym, err)
});
// TODO: should probably track these down in the XSLT linker...
match kind {
Ok(kindval) => {
// TODO: inefficient
let link_root = owned
&& (kindval == IdentKind::Meta
|| sym.starts_with(":map:")
|| sym.starts_with(":retmap:"));
let node = depgraph.declare(sym, kindval)?;
if link_root {
roots.push(node);
}
}
Err(e) => println!("{:?}; skipping...", e),
};
}
XmloEvent::Fragment(sym, text) => {
fragments.insert(sym, text);
Ok(XmloEvent::Fragment(sym, text)) => {
let result = depgraph.set_fragment(
depgraph.lookup(sym).expect(&format!(
"missing symbol for fragment: {}",
sym
)),
text,
);
match result {
Ok(_) => (),
Err(e) => println!("{:?}; skipping...", e),
};
}
// We don't need to read any further than the end of the
// header (symtable, sym-deps, fragments)
XmloEvent::Eoh => break,
Ok(XmloEvent::Eoh) => break,
Err(err @ XmloError::UnassociatedFragment) => {
println!("{:?}; skipping...", err);
}
err @ Err(_) => err.map(|_| ())?,
}
}
if let Some(elig_sym) = elig {
roots.push(depgraph.lookup(elig_sym).expect(
"internal error: package elig references nonexistant symbol",
));
}
let mut dir = path.clone();
dir.pop();
@ -316,64 +203,40 @@ fn load_xmlo<'a, 'i, I: Interner<'i>>(
let path_abs = path_buf.canonicalize().unwrap();
let path = path_abs.to_str().unwrap();
load_xmlo(path, pkgs_seen, fragments, depgraph, interner)?;
load_xmlo(path, pkgs_seen, fragments, depgraph, interner, roots)?;
}
Ok(())
}
fn sort_deps(depgraph: &DepGraph) -> Vec<&SymEntry> {
fn sort_deps<'a, 'i>(
depgraph: &'a LinkerAsg<'i>,
roots: &Vec<LinkerObjectRef>,
) -> Vec<&'a Object<'i>> {
// @type=meta, @preproc:elig-class-yields
// @type={ret}map{,:head,:tail}
let roots = discover_roots(depgraph);
// This is technically a topological sort, but functions have
// cycles. Once we have more symbol metadata, we can filter them out
// and actually invoke toposort.
let mut dfs = DfsPostOrder::empty(&depgraph);
let mut sorted = Vec::new();
//println!("discovered roots: {:?}", roots);
// TODO: we'll be processing various roots separately
for index in roots {
dfs.stack.push(*index);
dfs.stack.push((*index).into());
}
// TODO: can we encapsulate NodeIndex?
while let Some(index) = dfs.next(&depgraph) {
sorted.push(&depgraph[index]);
sorted.push(depgraph.get(index).unwrap());
}
sorted
}
fn discover_roots(depgraph: &DepGraph) -> Vec<SymRef> {
// TODO: filter_map
let mut map_syms = depgraph
.index_iter()
.filter(|(key, _)| {
key.starts_with(":map:") || key.starts_with(":retmap:")
})
.map(|(_, value)| *value)
.collect::<Vec<_>>();
let mut roots = vec!["___yield", "___worksheet"]
.iter()
.filter_map(|sym| depgraph.lookup(sym))
.collect::<Vec<_>>();
roots.append(&mut map_syms);
//println!(
// "found roots: {:?}",
// roots
// .iter()
// .map(|index| &depgraph.graph[*index])
// .collect::<Vec<_>>()
//);
roots
}
#[cfg(test)]
mod test {
#[test]

View File

@ -17,6 +17,7 @@
//! An incremental rewrite of TAME in Rust.
pub mod global;
pub mod ir;
pub mod ld;
pub mod obj;

View File

@ -227,9 +227,9 @@
//! global pool and unsafe rust to cast to a `static` slice.
//! - Rustc identifies symbols by integer value encapsulated within a
//! `Symbol`.
//! - Rustc's [`newtype_index!` macro][rustc-nt] uses [`NonZeroU32`] so
//! that [`Option`] uses no additional space
//! (see [pull request `53315`][rustc-nt-pr]).
//! - Rustc's [`newtype_index!` macro][rustc-nt] uses
//! [`global::NonZeroProgSymSize`] so that [`Option`] uses no
//! additional space (see [pull request `53315`][rustc-nt-pr]).
//! - Differences between TAMER and Rustc's implementations are outlined
//! above.
//!
@ -259,13 +259,14 @@
//! [rustc-fx]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/fx/index.html
//! [hash-rs]: https://github.com/Gankra/hash-rs
use crate::global;
use bumpalo::Bump;
use fxhash::FxBuildHasher;
use std::cell::{Cell, RefCell};
use std::collections::HashMap;
use std::convert::TryInto;
use std::fmt;
use std::hash::BuildHasher;
use std::num::NonZeroU32;
use std::ops::Deref;
/// Unique symbol identifier.
@ -278,10 +279,16 @@ use std::ops::Deref;
/// Note, however, that it provides no defense against mixing symbol indexes
/// between multiple [`Interner`]s.
///
/// The index `0` is never valid because of [`NonZeroU32`],
/// The index `0` is never valid because of [`global::NonZeroProgSymSize`],
/// which allows us to have `Option<SymbolIndex>` at no space cost.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SymbolIndex(NonZeroU32);
pub struct SymbolIndex(global::NonZeroProgSymSize);
impl From<SymbolIndex> for usize {
fn from(value: SymbolIndex) -> usize {
value.0.get().try_into().unwrap()
}
}
impl SymbolIndex {
/// Construct index from a non-zero `u32` value.
@ -290,7 +297,7 @@ impl SymbolIndex {
/// ------
/// Will panic if `n == 0`.
pub fn from_u32(n: u32) -> SymbolIndex {
SymbolIndex(NonZeroU32::new(n).unwrap())
SymbolIndex(global::NonZeroProgSymSize::new(n).unwrap())
}
/// Construct index from an unchecked non-zero `u32` value.
@ -300,7 +307,7 @@ impl SymbolIndex {
/// Unlike [`from_u32`](SymbolIndex::from_u32),
/// this never panics.
unsafe fn from_u32_unchecked(n: u32) -> SymbolIndex {
SymbolIndex(NonZeroU32::new_unchecked(n))
SymbolIndex(global::NonZeroProgSymSize::new_unchecked(n))
}
}
@ -336,6 +343,11 @@ impl<'i> Symbol<'i> {
/// Construct a new interned value.
///
/// _This must only be done by an [`Interner`]._
/// As such,
/// this function is not public.
///
/// For test builds (when `cfg(test)`),
/// `new_dummy` is available to create symbols for tests.
#[inline]
fn new(index: SymbolIndex, str: &'i str) -> Symbol<'i> {
Self { index, str }
@ -350,6 +362,18 @@ impl<'i> Symbol<'i> {
pub fn index(&self) -> SymbolIndex {
self.index
}
/// Construct a new interned value _for testing_.
///
/// This is a public version of [`Symbol::new`] available for test
/// builds.
/// This separate name is meant to strongly imply that you should not be
/// doing this otherwise.
#[cfg(test)]
#[inline(always)]
pub fn new_dummy(index: SymbolIndex, str: &'i str) -> Symbol<'i> {
Self::new(index, str)
}
}
impl<'i> PartialEq for Symbol<'i> {
@ -467,9 +491,9 @@ where
/// Next available symbol index.
///
/// This must always be ≥1.
/// It is not defined as `NonZeroU32` because
/// It is not defined as `NonZeroProgSymSize` because
/// `intern` enforces the invariant.
next_index: Cell<u32>,
next_index: Cell<global::ProgSymSize>,
/// Map of interned strings to their respective [`Symbol`].
///