TAMER: tameld: Proof-of-concept

This is a POC playing around with Rust to demonstrate how the linker could
be approached and to gather benchmarks.
master
Mike Gerwitz 2019-12-02 15:21:46 -05:00
commit 2c1ff90d0a
14 changed files with 623 additions and 12 deletions

View File

@ -24,6 +24,7 @@ pages:
script:
- mkdir -p public/doc
- mv doc/tame.html/* doc/tame.pdf doc/tame.info public/
- mv tamer/target/doc public/tamer/
artifacts:
paths:
- public/

View File

@ -1 +0,0 @@
state.xml

View File

@ -25,7 +25,7 @@
<import package="base" />
<!-- contains template dependencies -->
<import package="/rater/core/vector/cmatch" export="true" />
<import package="vector/cmatch" export="true" />
This package provides elementary integration with the UI through

43
tamer/Cargo.lock generated
View File

@ -1,6 +1,49 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "fixedbitset"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memchr"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ordermap"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "petgraph"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quick-xml"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "tamer"
version = "0.0.0"
dependencies = [
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
"checksum quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcdba8c8d71275493d966ef052a88726ac8590c15a09968b32158205c672ef"

View File

@ -2,10 +2,20 @@
name = "tamer"
version = "0.0.0"
authors = ["Mike Gerwitz <mike.gerwitz@ryansg.com>"]
description="TAME on Rust"
description="TAME in Rust"
license="GPLv3+"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[profile.dev]
# Release-level optimizations. Spending the extra couple of moments
# compile-time is well worth the huge savings we get at runtime. Note that
# this is still every so slightly slower than a release build; see other
# profile options for release at
# <https://doc.rust-lang.org/cargo/reference/manifest.html>.
opt-level = 3
[dependencies]
quick-xml = ">= 0.17.0"
petgraph = ">= 0.4.13"
# used by petgraph
fixedbitset = ">= 0.1"

View File

@ -16,16 +16,18 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
##
path_src = src
path_test = test
path_aux = build-aux
.DELETE_ON_ERROR:
.PHONY: all
CARGO_BUILD_FLAGS=@CARGO_BUILD_FLAGS@
all:
cargo build
@CARGO@ build $(CARGO_BUILD_FLAGS)
doc: html
html-am:
@CARGO@ doc
# note that 'cargo check' is something else; see 'cargo --help'
test: check

View File

@ -24,3 +24,26 @@ To bootstrap from the source repository, run `./bootstrap`.
To configure the build for your system, run `./configure`. To build, run
`make`. To run tests, run `make check`.
You may also invoke `cargo` directly, which `make` will do for you using
options provided to `configure`.
*Note that the default development build results in terrible runtime
performance!* See [#Build Flags][] below for instructions on how to
generate a release binary.
### Build Flags
The environment variable `CARGO_BUILD_FLAGS` can be used to provide
additional arguments to `cargo build` when invoked via `make`. This can be
provided optionally during `configure` and can be overridden when invoking
`make`. For example:
```sh
# release build
$ ./configure && make CARGO_BUILD_FLAGS=--release
$ ./configure CARGO_BUILD_FLAGS=--release && make
# dev build
$ ./configure && make
$ ./configure CARGO_BUILD_FLAGS=--release && make CARGO_BUILD_FLAGS=
```

View File

@ -52,6 +52,9 @@ AX_COMPARE_VERSION([$rustc_version], [ge], [$rustc_ver_req],
[AC_MSG_RESULT([yes ($rustc_version)])],
[AC_MSG_ERROR([no ($rustc_version)])])
AC_ARG_VAR([CARGO_BUILD_FLAGS],
[Flags to be passed to `cargo build' when invoked via Make])
AC_CONFIG_FILES([Makefile])
AC_OUTPUT

View File

@ -0,0 +1,2 @@
max_width = 80

View File

@ -0,0 +1,43 @@
// TAME linker
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! This is the TAME linker, so named after the traditional `ld` Unix
//! utility. Its job is to take each of the compiled object files and
//! produce a final executable.
//!
//! # Backwards-Compatibility (XSLT System)
//! This linker is part of the TAMER (TAME in Rust) project, which aims to
//! incrementally rewrite TAME in Rust. Consequently, it must be able to
//! serve as a drop-in replacement for the existing (XSLT) linker, which
//! takes as input `xmlo` files and produces as output an `xmle` file. This
//! is not efficient, and future versions will begin to migrate away from
//! this strategy.
//!
//! The output `xmle` file is then fed to a `standalone` command which
//! extracts the JavaScript fragment and places it into its own file. Even
//! when that is replaced (when this just outputs a final JS file directly),
//! the `xmle` file is still needed for other purposes, such as `summary`
//! and `dote` generation.
extern crate tamer;
use std::error::Error;
use tamer::ld::poc;
pub fn main() -> Result<(), Box<dyn Error>> {
poc::main()
}

18
tamer/src/ld.rs 100644
View File

@ -0,0 +1,18 @@
// TAME linker library
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
pub mod poc;

452
tamer/src/ld/poc.rs 100644
View File

@ -0,0 +1,452 @@
// Proof-of-concept TAME linker
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! **This is a poorly-written proof of concept; do not use!** It has been
//! banished to its own file to try to make that more clear.
use fixedbitset::FixedBitSet;
use petgraph::graph::{DiGraph, EdgeIndex, Neighbors, NodeIndex};
use petgraph::visit::{DfsPostOrder, GraphBase, IntoNeighbors, Visitable};
use quick_xml::events::Event;
use quick_xml::Reader;
use std::collections::hash_map::{Entry, Iter};
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fs;
use std::io::BufRead;
use std::ops::{Deref, Index};
use std::rc::Rc;
// The term "sym" is used throughout because it's easier to search for that
// in source code than "symbol", which is a generic term with many different
// meanings.
// if mutability is needed:
//#[derive(Debug)]
//struct SymRecord {
// data: SymData,
//
// // the idea is to keep the index encapsulated so that nothing else can
// // ever hold a reference to it, ensuring that it's freed when the node
// // is removed
// index: Rc<RefCell<Option<NodeIndex>>>,
//}
#[derive(Debug)]
struct SymData {
name: Rc<str>,
}
type DepGraphNode = SymEntry;
type DepGraphEdge = ();
struct DepGraph {
graph: DiGraph<DepGraphNode, DepGraphEdge>,
// serves as both a string internment system and graph indexer
index: HashMap<Rc<str>, SymRef>,
// if removals are permitted:
//index: HashMap<Rc<str>, Weak<RefCell<Option<NodeIndex>>>>,
}
// This encapsulates the underlying Graph to enforce certain
// assumptions. For example, we do not permit removing nodes because that
// would invalidate the NodeIndex reference in the index, which would then
// require workarounds like the commented-out code above and below.
//
// While Petgraph's use of indexes to represent graph and edge references
// makes it easy to bypass the borrow checker, it does just that---it's no
// different than a pointer reference (albeit guaranteed to safely reference
// a node rather than an arbitrary memory location) that can change out from
// under you at any moment. As such, much of the planning that went into
// this was determining how to best mitigate that.
//
// The linker has certain needs that may differ as the compiler evolves, so
// it may be desirable to permit deletions in the future. In the meantime,
// if a node needs to be deleted, we can simply remove all edges from it and
// possibly mark it in a way that states it was removed.
//
// This graph uses a separate map to serve a dual role: a string internment
// system and an indexer by symbol name. This will have to evolve in the
// future as the graph ends up containing more stuff.
//
// This is currently called a dependency graph, since that's what we're
// using it for, but in the future the compiler will also use it as an IR,
// so this will likely be renamed.
impl DepGraph {
fn new() -> Self {
Self {
// TODO: with_capacity
graph: DiGraph::new(),
index: HashMap::new(),
}
}
fn declare(&mut self, name: &str) -> SymRef {
match self.index.entry(name.into()) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
let entry = SymEntry::MissingSym {
name: Rc::clone(v.key()),
};
let index = SymRef(self.graph.add_node(entry));
v.insert(index);
index
}
}
}
// will not duplicate dependencies if they already exist
fn declare_dep(&mut self, symbol: SymRef, dep: SymRef) -> () {
self.graph.update_edge(*symbol, *dep, ());
}
fn lookup(&self, name: &str) -> Option<SymRef> {
self.index.get(name.into()).map(|index| *index)
}
fn index_iter(&self) -> Iter<Rc<str>, SymRef> {
self.index.iter()
}
// POC when removals were permitted:
//fn add_symbol(&mut self, sym: SymData) -> NodeIndex {
// let name = Rc::clone(&sym.name);
// let record = SymRecord { data: sym, index: Rc::new(RefCell::new(None)) };
// let index = self.graph.add_node(record);
// let index = Rc::downgrade(&self.graph[index].index);
// self.graph[index].index.replace(Some(index));
// self.index.insert(name, index);
// index
//}
}
impl GraphBase for DepGraph {
type NodeId = NodeIndex;
type EdgeId = EdgeIndex;
}
impl Visitable for DepGraph {
type Map = FixedBitSet;
fn visit_map(&self) -> Self::Map {
self.graph.visit_map()
}
fn reset_map(&self, map: &mut Self::Map) {
self.graph.reset_map(map)
}
}
impl<'a> IntoNeighbors for &'a DepGraph {
type Neighbors = Neighbors<'a, DepGraphEdge>;
fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
self.graph.neighbors(n)
}
}
impl Index<SymRef> for DepGraph {
type Output = DepGraphNode;
fn index(&self, index: SymRef) -> &Self::Output {
&self.graph[*index]
}
}
// TODO: we may not to allow this; using SymRef could be a means to
// guarantee that a lookup has occurred and that it actually exists. We
// don't need this if we set NodeId = SymRef in GraphBase, but that requires
// implementing other traits as well.
impl Index<NodeIndex> for DepGraph {
type Output = DepGraphNode;
fn index(&self, index: NodeIndex) -> &Self::Output {
&self.graph[index]
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
struct SymRef(NodeIndex);
impl From<SymRef> for NodeIndex {
fn from(symref: SymRef) -> Self {
*symref
}
}
impl From<NodeIndex> for SymRef {
fn from(index: NodeIndex) -> Self {
Self(index)
}
}
impl Deref for SymRef {
type Target = NodeIndex;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, PartialEq)]
enum SymEntry {
MissingSym { name: Rc<str> },
}
pub fn main() -> Result<(), Box<dyn Error>> {
let mut pkgs_seen = HashSet::<String>::new();
let mut fragments = HashMap::<String, String>::new();
let mut depgraph = DepGraph::new();
let package_path = std::env::args().nth(1).expect("Missing argument");
let abs_path = fs::canonicalize(package_path).unwrap();
println!("WARNING: This is proof-of-concept; do not use!");
load_xmlo(
&abs_path.to_str().unwrap().to_string(),
&mut pkgs_seen,
&mut fragments,
&mut depgraph,
)?;
// println!(
// "Graph {:?}",
// depgraph
// .graph
// .raw_nodes()
// .iter()
// .map(|node| &node.weight)
// .collect::<Vec<_>>()
// );
let sorted = sort_deps(&depgraph);
println!("Sorted ({}): {:?}", sorted.len(), sorted);
Ok(())
}
fn load_xmlo<'a>(
path_str: &'a str,
pkgs_seen: &mut HashSet<String>,
fragments: &mut HashMap<String, String>,
depgraph: &mut DepGraph,
) -> Result<(), Box<dyn Error>> {
let path = fs::canonicalize(path_str)?;
let path_str = path.to_str().unwrap();
if !pkgs_seen.insert(path_str.to_string()) {
return Ok(());
}
println!("processing {}", path_str);
let mut found = HashSet::<String>::new();
match Reader::from_file(&path) {
Ok(mut reader) => loop {
let mut buf = Vec::new();
// we know that the XML produced by Saxon is valid
reader.check_end_names(false);
match reader.read_event(&mut buf) {
Ok(Event::Start(ele)) | Ok(Event::Empty(ele)) => {
let mut attrs = ele.attributes();
let mut filtered =
attrs.with_checks(false).filter_map(Result::ok);
match ele.name() {
b"preproc:sym-dep" => filtered
.find(|attr| attr.key == b"name")
.map(|attr| attr.value)
.and_then(|mut name| {
read_deps(&mut reader, depgraph, name.to_mut())
})
.ok_or("Missing name"),
b"preproc:sym" => {
filtered
.find(|attr| attr.key == b"src")
.map(|attr| attr.value.to_owned())
.and_then(|src| {
let path_str =
std::str::from_utf8(&src).unwrap();
found.insert(path_str.to_string());
Some(())
});
Ok(())
}
b"preproc:fragment" => filtered
.find(|attr| attr.key == b"id")
.map(|attr| String::from_utf8(attr.value.to_vec()))
.and_then(|id| {
let fragment = reader
.read_text(ele.name(), &mut Vec::new())
.unwrap_or("".to_string());
fragments.insert(id.unwrap(), fragment);
Some(())
})
.ok_or("Missing fragment id"),
_ => Ok(()),
}
}
Ok(Event::Eof) => break (),
Err(e) => {
panic!("Error at {}: {:?}", reader.buffer_position(), e);
}
_ => Ok(()),
}
.unwrap_or_else(|r| panic!("Parse error: {:?}", r));
buf.clear();
},
Err(e) => panic!("Error {:?}", e),
}
let mut dir = path.clone();
dir.pop();
for relpath in found.iter() {
let mut path_buf = dir.clone();
path_buf.push(relpath);
path_buf.set_extension("xmlo");
//println!("Trying {:?}", path_buf);
let path_abs = path_buf.canonicalize().unwrap();
let path = path_abs.to_str().unwrap();
load_xmlo(path, pkgs_seen, fragments, depgraph)?;
}
Ok(())
}
fn read_deps<B>(
reader: &mut Reader<B>,
depgraph: &mut DepGraph,
name: &[u8],
) -> Option<()>
where
B: BufRead,
{
// TODO: API needs to expose whether a symbol is already known so that
// we can warn on them
// note: using from_utf8_unchecked here did _not_ improve performance
let sym_node = depgraph.declare(std::str::from_utf8(name).unwrap());
//println!("processing deps for {}", sym_name);
loop {
match reader.read_event(&mut Vec::new()) {
Ok(Event::Start(ele)) | Ok(Event::Empty(ele)) => {
let mut attrs = ele.attributes();
let mut filtered =
attrs.with_checks(false).filter_map(Result::ok);
filtered.find(|attr| attr.key == b"name").and_then(
|mut attr| {
let name = attr.value.to_mut();
let str = std::str::from_utf8(name).unwrap();
let dep_node = depgraph.declare(&str);
depgraph.declare_dep(sym_node, dep_node);
Some(())
},
);
//println!("{:?}", ele.attributes().collect::<Vec<_>>());
}
Ok(Event::Eof) | Ok(Event::End(_)) => break Some(()),
Err(e) => {
panic!("Error at {}: {:?}", reader.buffer_position(), e);
}
_ => (),
}
}
}
fn sort_deps(depgraph: &DepGraph) -> Vec<&SymEntry> {
// @type=meta, @preproc:elig-class-yields
// @type={ret}map{,:head,:tail}
let roots = discover_roots(depgraph);
// This is technically a topological sort, but functions have
// cycles. Once we have more symbol metadata, we can filter them out
// and actually invoke toposort.
let mut dfs = DfsPostOrder::empty(&depgraph);
let mut sorted = Vec::new();
// TODO: we'll be processing various roots separately
for index in roots {
dfs.stack.push(*index);
}
while let Some(index) = dfs.next(&depgraph) {
sorted.push(&depgraph[index]);
}
sorted
}
fn discover_roots(depgraph: &DepGraph) -> Vec<SymRef> {
// TODO: filter_map
let mut map_syms = depgraph
.index_iter()
.filter(|(key, _)| {
key.starts_with(":map:") || key.starts_with(":retmap:")
})
.map(|(_, value)| *value)
.collect::<Vec<_>>();
let mut roots = vec!["___yield", "___worksheet"]
.iter()
.filter_map(|sym| depgraph.lookup(sym))
.collect::<Vec<_>>();
roots.append(&mut map_syms);
//println!(
// "found roots: {:?}",
// roots
// .iter()
// .map(|index| &depgraph.graph[*index])
// .collect::<Vec<_>>()
//);
roots
}
#[cfg(test)]
mod tests {
#[test]
fn placeholder() {}
}

18
tamer/src/lib.rs 100644
View File

@ -0,0 +1,18 @@
// TAME in Rust (TAMER)
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
pub mod ld;

View File

@ -1,3 +0,0 @@
fn main() {
println!("Hello, world!");
}