tamer: asg: Track roots on graph

Previously, since the graph contained only identifiers, discovered roots
were stored in a separate vector and exposed to the caller.  This not only
leaked details, but added complexity; this was left over from the
refactoring of the proof-of-concept linker some time ago.

This moves the root management into the ASG itself, mostly, with one item
being left over for now in the asg_builder (eligibility classifications).

There are two roots that were added automatically:

  - __yield
  - __worksheet

The former has been removed and is now expected to be explicitly mapped in
the return map, which is now enforced with an extern in `core/base`.  This
is still special, in the sense that it is explicitly referenced by the
generated code, but there's nothing inherently special about it and I'll
continue to generalize it into oblivion in the future, such that the final
yield is just a convention.

`__worksheet` is the only symbol of type `IdentKind::Worksheet`, and so that
was generalized just as the meta and map entries were.

The goal in the future will be to have this more under the control of the
source language, and to consolodate individual roots under packages, so that
the _actual_ roots are few.

As far as the actual ASG goes: this introduces a single root node that is
used as the sole reference for reachability analysis and topological
sorting.  The edges of that root node replace the vector that was removed.

DEV-11864
main
Mike Gerwitz 2022-05-17 10:42:05 -04:00
parent 5a866f7735
commit 3e277270a7
8 changed files with 196 additions and 78 deletions

View File

@ -58,8 +58,10 @@ fn sort_1_with_1_000_existing_supernode(bench: &mut Bencher) {
sut.add_dep(root, *to);
});
sut.add_root(root);
bench.iter(|| {
drop(sort(&sut, &[root], Sections::new()));
drop(sort(&sut, Sections::new()));
});
}
@ -92,7 +94,9 @@ fn sort_1_with_1_000_existing_one_edge_per_node_one_path(bench: &mut Bencher) {
let root = orefs[0];
sut.add_root(root);
bench.iter(|| {
drop(sort(&sut, &[root], Sections::new()));
drop(sort(&sut, Sections::new()));
});
}

View File

@ -83,6 +83,10 @@ pub struct Asg {
/// Empty node indicating that no object exists for a given index.
empty_node: NodeIndex<Ix>,
/// The root node used for reachability analysis and topological
/// sorting.
root_node: NodeIndex<Ix>,
}
impl Asg {
@ -111,10 +115,16 @@ impl Asg {
let empty_node = graph.add_node(None);
index.push(empty_node);
// Automatically add the root which will be used to determine what
// identifiers ought to be retained by the final program.
// This is not indexed and is not accessable by name.
let root_node = graph.add_node(Some(IdentObject::Root));
Self {
graph,
index,
empty_node,
root_node,
}
}
@ -215,6 +225,28 @@ impl Asg {
})
}
// TODO: This is transitional;
// remove once [`crate::xmlo::asg_builder`] is removed.
pub fn root(&self) -> NodeIndex<Ix> {
self.root_node
}
/// Add an object as a root.
///
/// Roots are always included during a topological sort and any
/// reachability analysis.
///
/// Ideally,
/// roots would be minimal and dependencies properly organized such
/// that objects will be included if they are a transitive dependency
/// of some included subsystem.
///
/// See also [`IdentKind::is_auto_root`].
pub fn add_root(&mut self, identi: ObjectRef) {
self.graph
.add_edge(self.root_node, identi.into(), Default::default());
}
/// Declare a concrete identifier.
///
/// An identifier declaration is similar to a declaration in a header
@ -252,7 +284,13 @@ impl Asg {
kind: IdentKind,
src: Source,
) -> AsgResult<ObjectRef> {
let is_auto_root = kind.is_auto_root();
self.with_ident_lookup(name, |obj| obj.resolve(kind, src))
.and_then(|node| {
is_auto_root.then(|| self.add_root(node));
Ok(node)
})
}
/// Declare an abstract identifier.
@ -480,6 +518,39 @@ mod test {
Ok(())
}
#[test]
fn declare_kind_auto_root() -> AsgResult<()> {
let mut sut = Sut::new();
let auto_kind = IdentKind::Worksheet;
// Sanity check, in case this changes.
assert!(auto_kind.is_auto_root());
let auto_root_node =
sut.declare("auto_root".intern(), auto_kind, Default::default())?;
// Should have been automatically added as a root.
assert!(sut
.graph
.contains_edge(sut.root_node, auto_root_node.into()));
let no_auto_kind = IdentKind::Tpl;
assert!(!no_auto_kind.is_auto_root());
let no_auto_root_node = sut.declare(
"no_auto_root".intern(),
no_auto_kind,
Default::default(),
)?;
// Non-auto-roots should _not_ be added as roots automatically.
assert!(!sut
.graph
.contains_edge(sut.root_node, no_auto_root_node.into()));
Ok(())
}
#[test]
fn lookup_by_symbol() -> AsgResult<()> {
let mut sut = Sut::new();

View File

@ -169,6 +169,15 @@ impl IdentKind {
Self::Worksheet => st::L_WORKSHEET.as_sym(),
}
}
/// Whether this identifier should be automatically added as a root when
/// declared.
pub fn is_auto_root(&self) -> bool {
match self {
Self::Meta | Self::Map | Self::RetMap | Self::Worksheet => true,
_ => false,
}
}
}
impl std::fmt::Display for IdentKind {

View File

@ -78,6 +78,18 @@ pub enum IdentObject {
/// [linker][crate::ld] to put them into the correct order for the
/// final executable.
IdentFragment(SymbolId, IdentKind, Source, FragmentText),
/// Represents the root of all reachable identifiers.
///
/// Any identifier not reachable from the root will not be linked into
/// the final executable.
///
/// There should be only one identifier of this kind.
///
/// TODO: This is _not_ an identifier;
/// once the ASG supports other types,
/// do not associate with identifiers.
Root,
}
impl IdentObject {
@ -88,6 +100,11 @@ impl IdentObject {
| Self::Ident(name, ..)
| Self::Extern(name, ..)
| Self::IdentFragment(name, ..) => *name,
// This should never be called,
// and can go away when we can stop pretending that the root
// is an identifier.
Self::Root => unimplemented!("IdentObject::name for Root"),
}
}
@ -99,9 +116,12 @@ impl IdentObject {
pub fn kind(&self) -> Option<&IdentKind> {
match self {
Self::Missing(..) => None,
Self::Ident(_, kind, ..)
| Self::Extern(_, kind, ..)
| Self::IdentFragment(_, kind, ..) => Some(kind),
Self::Root => None,
}
}
@ -112,7 +132,8 @@ impl IdentObject {
/// [`None`] is returned.
pub fn src(&self) -> Option<&Source> {
match self {
Self::Missing(..) | Self::Extern(..) => None,
Self::Missing(..) | Self::Extern(..) | Self::Root => None,
Self::Ident(_, _, src, ..) | Self::IdentFragment(_, _, src, ..) => {
Some(src)
}
@ -125,7 +146,11 @@ impl IdentObject {
/// [`None`] is returned.
pub fn fragment(&self) -> Option<FragmentText> {
match self {
Self::Missing(..) | Self::Ident(..) | Self::Extern(..) => None,
Self::Missing(..)
| Self::Ident(..)
| Self::Extern(..)
| Self::Root => None,
Self::IdentFragment(_, _, _, text) => Some(*text),
}
}
@ -295,6 +320,11 @@ impl IdentObject {
}
IdentObject::Ident(..) | IdentObject::IdentFragment(..) => Ok(self),
// This should never be called.
IdentObject::Root => {
unimplemented!("IdentObject::resolved on Root")
}
}
}

View File

@ -39,7 +39,7 @@ use crate::{
},
parse::ParseError,
parse::{ParseState, Parsed},
sym::{GlobalSymbolIntern, GlobalSymbolResolve, SymbolId},
sym::{GlobalSymbolResolve, SymbolId},
xir::reader::XmlXirReader,
xir::{
flat::{self, Object as XirfToken, StateError as XirfError},
@ -74,20 +74,12 @@ pub fn xmle(package_path: &str, output: &str) -> Result<(), TameldError> {
)?;
let AsgBuilderState {
mut roots,
name,
relroot,
found: _,
} = state;
roots.extend(
vec!["___yield", "___worksheet"]
.iter()
.map(|name| name.intern())
.filter_map(|sym| depgraph.lookup(sym)),
);
let sorted = match sort(&depgraph, &roots, Sections::new()) {
let sorted = match sort(&depgraph, Sections::new()) {
Ok(sections) => sections,
Err(SortError::Cycles(cycles)) => {
return Err(TameldError::CycleError(

View File

@ -36,23 +36,16 @@ pub type SortResult<T> = Result<T, SortError>;
/// This performs the equivalent of a topological sort,
/// although function cycles are permitted.
/// The actual operation performed is a post-order depth-first traversal.
pub fn sort<'a, S: XmleSections<'a>>(
asg: &'a Asg,
roots: &[ObjectRef],
mut dest: S,
) -> SortResult<S>
pub fn sort<'a, S: XmleSections<'a>>(asg: &'a Asg, mut dest: S) -> SortResult<S>
where
S: XmleSections<'a>,
{
// TODO: we should check for cycles as we sort (as the POC did).
check_cycles(asg)?;
// TODO: Encapsulate petgraph.
// This is technically a topological sort, but functions have cycles.
let mut dfs = DfsPostOrder::empty(&asg.graph);
for index in roots {
dfs.stack.push((*index).into());
}
let mut dfs = DfsPostOrder::new(&asg.graph, asg.root());
// These are always generated by the map compiler,
// but do not have edges that would allow them to be properly ordered
@ -274,8 +267,9 @@ mod test {
asg.add_dep(a, adep);
asg.add_dep(adep, adepdep);
let sections =
sort(&asg, &vec![a], StubSections { pushed: Vec::new() })?;
asg.add_root(a);
let sections = sort(&asg, StubSections { pushed: Vec::new() })?;
assert_eq!(
sections.pushed,
@ -289,6 +283,10 @@ mod test {
asg.get(adepdep),
asg.get(adep),
asg.get(a),
// TODO: This will go away once Root is no longer considered
// an identifier.
// The real Sections filters this out.
Some(&IdentObject::Root),
// Static tail
asg.lookup(st::L_MAP_UUUTAIL.into())
.and_then(|id| asg.get(id)),
@ -325,7 +323,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym, dep);
match sort(&asg, &vec![sym_node], Sections::new()) {
asg.add_root(sym_node);
match sort(&asg, Sections::new()) {
Ok(_) => panic!("Unexpected success - dependency is not in graph"),
Err(SortError::SectionsError(SectionsError::UnresolvedObject(
_,
@ -351,8 +351,8 @@ mod test {
asg_nonempty_no_roots.add_dep_lookup(sym, dep);
assert_eq!(
sort(&asg_nonempty_no_roots, &vec![], Sections::new()),
sort(&asg_empty, &vec![], Sections::new())
sort(&asg_nonempty_no_roots, Sections::new()),
sort(&asg_empty, Sections::new())
);
Ok(())
@ -393,7 +393,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym, dep);
let (_, _) = asg.add_dep_lookup(dep, sym);
let result = sort(&asg, &vec![sym_node], Sections::new());
asg.add_root(sym_node);
let result = sort(&asg, Sections::new());
let expected: Vec<Vec<ObjectRef>> =
vec![vec![dep_node.into(), sym_node.into()]];
@ -469,7 +471,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym2, dep2);
let (_, _) = asg.add_dep_lookup(dep2, sym2);
let result = sort(&asg, &vec![sym_node], Sections::new());
asg.add_root(sym_node);
let result = sort(&asg, Sections::new());
let expected: Vec<Vec<ObjectRef>> = vec![
vec![dep_node.into(), sym_node.into()],
@ -518,7 +522,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym, dep);
let (_, _) = asg.add_dep_lookup(sym, dep);
let result = sort(&asg, &vec![sym_node], Sections::new());
asg.add_root(sym_node);
let result = sort(&asg, Sections::new());
match result {
Ok(_) => (),
@ -577,7 +583,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym2, sym3);
let (_, _) = asg.add_dep_lookup(sym3, sym1);
let result = sort(&asg, &vec![sym1_node], Sections::new());
asg.add_root(sym1_node);
let result = sort(&asg, Sections::new());
let expected: Vec<Vec<ObjectRef>> =
vec![vec![sym3_node.into(), sym2_node.into(), sym1_node.into()]];
@ -640,7 +648,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym2, sym3);
let (_, _) = asg.add_dep_lookup(sym3, sym1);
let result = sort(&asg, &vec![sym1_node], Sections::new());
asg.add_root(sym1_node);
let result = sort(&asg, Sections::new());
let expected: Vec<Vec<ObjectRef>> =
vec![vec![sym3_node.into(), sym2_node.into(), sym1_node.into()]];
@ -702,7 +712,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym2, sym3);
let (_, _) = asg.add_dep_lookup(sym3, sym1);
let result = sort(&asg, &vec![sym1_node], Sections::new());
asg.add_root(sym1_node);
let result = sort(&asg, Sections::new());
let expected: Vec<Vec<ObjectRef>> =
vec![vec![sym3_node.into(), sym2_node.into(), sym1_node.into()]];
@ -749,7 +761,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym, dep);
let (_, _) = asg.add_dep_lookup(dep, sym);
let result = sort(&asg, &vec![sym_node], Sections::new());
asg.add_root(sym_node);
let result = sort(&asg, Sections::new());
match result {
Ok(_) => (),
@ -808,7 +822,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym2, sym3);
let (_, _) = asg.add_dep_lookup(sym3, sym1);
let result = sort(&asg, &vec![sym1_node], Sections::new());
asg.add_root(sym1_node);
let result = sort(&asg, Sections::new());
let expected: Vec<Vec<ObjectRef>> =
vec![vec![sym3_node.into(), sym2_node.into(), sym1_node.into()]];
@ -868,7 +884,9 @@ mod test {
let (_, _) = asg.add_dep_lookup(sym, dep);
let (_, _) = asg.add_dep_lookup(ignored, sym);
let result = sort(&asg, &vec![sym_node], Sections::new());
asg.add_root(sym_node);
let result = sort(&asg, Sections::new());
match result {
Ok(_) => (),

View File

@ -106,6 +106,11 @@ impl<'a> Sections<'a> {
impl<'a> XmleSections<'a> for Sections<'a> {
fn push(&mut self, ident: &'a IdentObject) -> PushResult {
// TODO: This can go away once we stop treating root as an ident
if matches!(ident, IdentObject::Root) {
return Ok(());
}
self.deps.push(ident);
// TODO: This cannot happen, so use an API without Option.
@ -304,6 +309,16 @@ mod test {
Ok(())
}
// TODO: This can be removed once we no longer treat Root as an
// identifier.
#[test]
fn push_ignores_root() {
let mut sut = Sut::new();
sut.push(&IdentObject::Root).unwrap();
assert!(sut.take_deps().is_empty());
}
// Certain identifiers have no fragments because the code is associated
// with their parents (for now, anyway).
#[test]

View File

@ -41,7 +41,7 @@ use super::{
reader::{XmloResult, XmloToken},
XmloError,
};
use crate::asg::{Asg, AsgError, IdentKind, IdentKindError, ObjectRef, Source};
use crate::asg::{Asg, AsgError, IdentKindError, Source};
use crate::sym::SymbolId;
use std::collections::HashSet;
use std::convert::TryInto;
@ -64,24 +64,11 @@ pub type Result<S> = std::result::Result<AsgBuilderState<S>, AsgBuilderError>;
/// [`relroot`](AsgBuilderState::relroot) are set only once when the first
/// package is processed.
/// A package is considered to be the first if `name` is [`None`].
///
/// [`roots`](AsgBuilderState::roots) is added to as certain identifiers are
/// discovered that should be used as starting points for a topological
/// sort.
/// This is used by the linker to only include dependencies that are
/// actually used by a particular program.
#[derive(Debug, Default)]
pub struct AsgBuilderState<S>
where
S: BuildHasher,
{
/// Discovered roots.
///
/// Roots represent starting points for a topological sort of the
/// graph.
/// They are meaningful to the linker.
pub roots: Vec<ObjectRef>,
/// Relative paths to imported packages that have been discovered.
///
/// The caller will use these to perform recursive loads.
@ -231,22 +218,10 @@ where
if first {
src.pkg_name = None;
}
let link_root = matches!(
kindval,
IdentKind::Meta
| IdentKind::Map
| IdentKind::RetMap
);
if extern_ {
self.declare_extern(sym, kindval, src)?;
} else {
let node = self.declare(sym, kindval, src)?;
if link_root {
state.roots.push(node);
}
self.declare(sym, kindval, src)?;
}
}
}
@ -274,7 +249,7 @@ where
}
if let Some(elig_sym) = elig {
state.roots.push(
self.add_root(
self.lookup(elig_sym)
.ok_or(AsgBuilderError::BadEligRef(elig_sym))?,
);
@ -290,7 +265,7 @@ pub enum AsgBuilderError {
/// Error with the source `xmlo` file.
XmloError(XmloError),
/// Error parsing into [`IdentKind`].
/// Error parsing into [`crate::asg::IdentKind`].
IdentKindError(IdentKindError),
/// [`Asg`] operation error.
@ -352,7 +327,7 @@ impl Error for AsgBuilderError {
#[cfg(test)]
mod test {
use super::*;
use crate::asg::{DefaultAsg, FragmentText, IdentObject};
use crate::asg::{DefaultAsg, FragmentText, IdentKind, IdentObject};
use crate::obj::xmlo::{SymAttrs, SymType};
use crate::span::{DUMMY_SPAN, UNKNOWN_SPAN};
use crate::sym::GlobalSymbolIntern;
@ -404,9 +379,10 @@ mod test {
let evs = vec![Ok(XmloToken::PkgEligClassYields(elig_sym))];
let state = sut.import_xmlo(evs.into_iter(), SutState::new()).unwrap();
sut.import_xmlo(evs.into_iter(), SutState::new()).unwrap();
assert!(state.roots.contains(&elig_node));
// TODO: Graph should be encapsulated.
sut.graph.contains_edge(sut.root(), elig_node.into());
}
#[test]
@ -540,14 +516,17 @@ mod test {
// Both above symbols were local (no `src`).
assert!(state.found.unwrap().len() == 0);
assert_eq!(
vec![
sut.lookup(sym_non_extern).unwrap(),
sut.lookup(sym_map).unwrap(),
sut.lookup(sym_retmap).unwrap(),
],
state.roots
);
// TODO: Graph should be encapsulated.
let root = sut.root();
assert!(sut
.graph
.contains_edge(root, sut.lookup(sym_non_extern).unwrap().into()));
assert!(sut
.graph
.contains_edge(root, sut.lookup(sym_map).unwrap().into()));
assert!(sut
.graph
.contains_edge(root, sut.lookup(sym_retmap).unwrap().into()));
// Note that each of these will have their package names cleared
// since this is considered to be the first package encountered.