TAMER linker (still partly proof-of-concept)

We will continue to finalize this as we go.  It is currently used in
production, both for performance and because it fixes a bug in the
XSLT-based linker.
master
Mike Gerwitz 2020-03-03 11:29:37 -05:00
commit 777494a602
41 changed files with 7050 additions and 1593 deletions

View File

@ -1,4 +1,4 @@
# This number is incremented for every linker change to force rebuilding
# of xmle files.
0
4

View File

@ -1,4 +1,4 @@
# This number is incremented for every compiler change to force rebuilding
# of xmlo files.
0
2

View File

@ -132,7 +132,8 @@ standalones: $(dest_standalone)
strip: $(dest_standalone_strip) ui/package.strip.js
%.xmle: %.xmlo $(path_tame)/.rev-xmle
$(TAME_TS)
$(TAME) link $< $@
@echo "WARNING: using WIP proof-of-concept linker!"
set -o pipefail; $(path_tame)/tamer/target/release/tameld $< | awk '/^<package/{p=1};p' > $@
%.js: %.xmle
$(TAME_TS)
$(TAME) standalone $< $@

View File

@ -31,21 +31,42 @@
<template mode="preproc:compile-fragments" priority="9"
match="lv:package">
<copy>
<sequence select="@*" />
<apply-templates select="*" mode="preproc:compile-fragments-root" />
</copy>
</template>
<template mode="preproc:compile-fragments-root" priority="1"
match="node()">
<sequence select="." />
</template>
<!-- Position fragments directly after dependencies. This allows TAMER to
halt processing early on, rather than having to read the rest of the
file (fragments used to be placed at the end). -->
<template mode="preproc:compile-fragments-root" priority="5"
match="preproc:sym-deps">
<sequence select="." />
<variable name="package" as="element( lv:package )"
select="parent::lv:package" />
<variable name="symtable-map" as="map( xs:string, element( preproc:sym ) )"
select="map:merge(
for $sym in preproc:symtable/preproc:sym
for $sym in $package/preproc:symtable/preproc:sym
return map{ string( $sym/@name ) : $sym } )" />
<copy>
<sequence select="@*, *" />
<preproc:fragments>
<apply-templates mode="preproc:compile-fragments">
<with-param name="symtable-map" select="$symtable-map"
tunnel="yes" />
</apply-templates>
</preproc:fragments>
</copy>
<preproc:fragments>
<apply-templates select="$package/*"
mode="preproc:compile-fragments">
<with-param name="symtable-map" select="$symtable-map"
tunnel="yes" />
</apply-templates>
</preproc:fragments>
</template>
@ -117,6 +138,11 @@
<apply-templates mode="preproc:compile-fragments" />
</template>
<!-- Do not compile these as consts -->
<template mode="preproc:compile-fragments" priority="9"
match="lv:meta/lv:prop/lv:const">
<!-- ignore -->
</template>
<template mode="preproc:compile-fragments" priority="5"
match="lv:meta/lv:prop">

View File

@ -62,7 +62,7 @@
@return compiled JS
-->
<template match="lv:package" mode="compiler:entry">
<template name="compiler:entry">
<!-- enclose everything in a self-executing function to sandbox our data -->
<text>( function() { </text>
<!-- to store debug information for equations (we have to put this out here
@ -76,7 +76,7 @@
</template>
<template match="lv:package" mode="compiler:entry-rater">
<template name="compiler:entry-rater">
<!-- the rater itself -->
<value-of select="$compiler:nl" />
<text>function rater( arglist, _canterm ) {</text>
@ -107,16 +107,13 @@
<text>/**@expose*/var genclasses = {};</text>
</template>
<template match="lv:package" mode="compiler:entry-classifier">
<template name="compiler:classifier">
<!-- allow classification of any arbitrary dataset -->
<value-of select="$compiler:nl" />
<text>rater.classify = function( args, _canterm ) {</text>
return rater( args, _canterm ).classes;
<text> };</text>
</template>
<template match="lv:package" mode="compiler:exit-classifier">
<!-- TODO: make sure fromMap has actually been compiled -->
<text>rater.classify.fromMap = function( args_base, _canterm ) { </text>
<text>var ret = {}; </text>
<text>rater.fromMap( args_base, function( args ) {</text>
@ -136,8 +133,10 @@
<text> }; </text>
</template>
<template match="lv:package" mode="compiler:exit-rater">
<template name="compiler:exit-rater">
<param name="name" as="xs:string "/>
<param name="symbols" as="element( preproc:sym )*" />
<param name="mapfrom" as="element()*" />
<value-of select="$compiler:nl" />
<text>return { </text>
@ -152,7 +151,7 @@
<!-- make the name of the supplier available -->
<text>/**@expose*/rater.supplier = '</text>
<value-of select="substring-after( @name, '/' )" />
<value-of select="substring-after( $name, '/' )" />
<text>'; </text>
<text>/**@expose*/rater.meta = meta;</text>
@ -174,20 +173,8 @@
$symbols[ @type='class' ] )" />
<text> }; </text>
<variable name="mapfrom" select="
preproc:symtable/preproc:sym[
@type='map'
]/preproc:from[
not(
@name = parent::preproc:sym
/preceding-sibling::preproc:sym[
@type='map'
]/preproc:from/@name
)
]
" />
<!-- mapped fields (external names) -->
<value-of select="$compiler:nl" />
<text>/**@expose*/rater.knownFields = {</text>
<for-each select="$mapfrom">
<if test="position() > 1">
@ -221,6 +208,7 @@
<text>'</text>
<value-of select="substring-after( @name, ':class:' )" />
<text>':'</text>
<!-- yields -->
<value-of select="@yields" />
<text>'</text>
</for-each>
@ -239,7 +227,7 @@
<value-of select="substring-after( @name, ':class:' )" />
<text>':'</text>
<!-- todo: escape -->
<value-of select="translate( @desc, &quot;'&quot;, '' )" />
<value-of select="translate( normalize-space(@desc), &quot;'&quot;, '' )" />
<text>'</text>
</for-each>
</function>

File diff suppressed because it is too large Load Diff

View File

@ -123,7 +123,8 @@
<!-- final result with compiled fragments -->
<lv:package>
<sequence select="$pkg-with-symtable/@*,
$pkg-with-symtable/node()" />
$pkg-with-symtable/preproc:sym-deps/preceding-sibling::*,
$pkg-with-symtable/preproc:sym-deps" />
<preproc:fragments>
<!-- special fragment to be output as the head -->
@ -148,6 +149,8 @@
<text>};</text>
</preproc:fragment>
</preproc:fragments>
<sequence select="$pkg-with-symtable/preproc:sym-deps/following-sibling::*" />
</lv:package>
</template>
@ -197,7 +200,8 @@
<!-- final result with compiled fragments -->
<lv:package>
<sequence select="$pkg-with-symtable/@*,
$pkg-with-symtable/node()" />
$pkg-with-symtable/preproc:sym-deps/preceding-sibling::*,
$pkg-with-symtable/preproc:sym-deps" />
<preproc:fragments>
<!-- special fragment to be output as the head -->
@ -222,6 +226,8 @@
<text>};</text>
</preproc:fragment>
</preproc:fragments>
<sequence select="$pkg-with-symtable/preproc:sym-deps/following-sibling::*" />
</lv:package>
</template>

View File

@ -80,7 +80,8 @@
$displays )" />
<lv:package name="{@name}"
lvmc:type="worksheet">
__rootpath="{$__relroot}"
lvmc:type="worksheet">
<!-- we provide one special symbol -->
<preproc:symtable>
<preproc:sym name="___worksheet"

View File

@ -66,13 +66,27 @@
<text>[depgen] *determining symbol dependencies...</text>
</message>
<apply-templates select="preproc:symtable" mode="preproc:depgen" />
<sequence select="*" />
<apply-templates select="*" mode="preproc:depgen-root" />
</copy>
</template>
<template mode="preproc:depgen-root" priority="1"
match="node()">
<sequence select="." />
</template>
<template mode="preproc:depgen-root" priority="5"
match="preproc:symtable">
<!-- Place symbol table _before_ dependencies. This simplifies
streaming processing in TAMER. -->
<sequence select="." />
<apply-templates select="." mode="preproc:depgen" />
</template>
<template match="preproc:symtable" mode="preproc:depgen" priority="9">
<variable name="symtable" select="." />

View File

@ -514,10 +514,18 @@
<!-- overridden; we're obsolete :( -->
</when>
<!-- if we've gotten this far, then the override is good; clear it -->
<!-- if we've gotten this far, then the override is good; clear it
so as not to trigger override errors -->
<when test="@override='true'">
<copy>
<sequence select="@*[ not( name()='override' ) ], *" />
<sequence select="@*[ not( name()='override' ) ]" />
<!-- mark this has having been overridden for the linker (see
TAMER; we'll hopefully be getting rid of overrides in the
future) -->
<attribute name="isoverride" select="'true'" />
<sequence select="*" />
</copy>
</when>

View File

@ -36,17 +36,17 @@
xmlns:preproc="http://www.lovullo.com/rater/preproc">
<output
indent="yes"
omit-xml-declaration="yes"
/>
<output method="text" />
<include href="include/dslc-base.xsl" />
<!-- compiler -> JS -->
<include href="compiler/linker.xsl" />
<include href="compiler/map.xsl" />
<include href="compiler/js.xsl" />
<include href="include/depgen.xsl" />
<include href="include/preproc/symtable.xsl" />
<include href="include/util.xsl" />
<!-- path to program XML -->
<param name="path-program-ui" />
@ -54,9 +54,22 @@
<template match="/" priority="5">
<!-- the rater itself -->
<text>var rater = </text>
<!-- (moved from linker during TAMER POC linker) -->
<call-template name="compiler:entry" />
<call-template name="compiler:classifier" />
<value-of disable-output-escaping="yes" select="/lv:package/l:static/text()" />
<call-template name="compiler:entry-rater" />
<value-of disable-output-escaping="yes" select="/lv:package/l:exec/text()" />
<text>; </text>
<!--(moved from linker during TAMER POC linker) -->
<call-template name="compiler:exit-rater">
<with-param name="name" select="/*/@name" />
<with-param name="symbols" select="/*/l:dep/preproc:sym" />
<with-param name="mapfrom" select="/*/l:map-from/l:from" />
</call-template>
<call-template name="compiler:static" />
<!-- maps may or may not exist -->
<variable name="map" select="/lv:package/l:map-exec" />
<variable name="retmap" select="/lv:package/l:retmap-exec" />
@ -64,7 +77,7 @@
<!-- store a reference to the mapper in rater.fromMap() -->
<text>rater.fromMap = </text>
<choose>
<when test="$map">
<when test="/lv:package/l:dep/preproc:sym[@type='map'][1]">
<value-of disable-output-escaping="yes" select="$map/text()" />
</when>
@ -79,7 +92,7 @@
<!-- return map -->
<text>rater._retmap = </text>
<choose>
<when test="$retmap">
<when test="/lv:package/l:dep/preproc:sym[@type='retmap'][1]">
<value-of disable-output-escaping="yes" select="$retmap/text()" />
</when>

23
tamer/Cargo.lock generated
View File

@ -1,10 +1,28 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "bumpalo"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fixedbitset"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memchr"
version = "2.2.1"
@ -36,13 +54,18 @@ dependencies = [
name = "tamer"
version = "0.0.0"
dependencies = [
"bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"

View File

@ -17,8 +17,15 @@ opt-level = 3
[profile.release]
lto = true
[profile.bench]
# We want our benchmarks to be representative of how well TAME will perform
# in a release.
lto = true
[dependencies]
quick-xml = ">= 0.17.0"
petgraph = ">= 0.4.13"
bumpalo = ">= 2.6.0"
# used by petgraph
fixedbitset = ">= 0.1"
fxhash = ">= 0.2.1"
petgraph = ">= 0.4.13"
quick-xml = ">= 0.17.0"

View File

@ -18,7 +18,7 @@
.DELETE_ON_ERROR:
.PHONY: all
.PHONY: all fix fmt check-fmt bench
CARGO_BUILD_FLAGS=@CARGO_BUILD_FLAGS@
@ -27,10 +27,23 @@ all:
doc: html
html-am:
@CARGO@ doc
@CARGO@ test --doc
@CARGO@ @CARGO_DOC_FLAGS@ doc --document-private-items
# note that 'cargo check' is something else; see 'cargo --help'
test: check
check-am:
check-am: check-fmt
@CARGO@ test
check-fmt:
@CARGO@ fmt -- --check
bench:
@CARGO@ @CARGO_BENCH_FLAGS@ bench
fix: fmt
fmt:
@CARGO@ fmt
clean-am:
@CARGO@ clean

View File

@ -47,3 +47,51 @@ $ ./configure CARGO_BUILD_FLAGS=--release && make
$ ./configure && make
$ ./configure CARGO_BUILD_FLAGS=--release && make CARGO_BUILD_FLAGS=
```
## Hacking
This section contains advice for those developing TAMER.
### Running Tests
Developers should be using test-driven development (TDD). `make check` will
run all necessary tests.
### Code Format
Rust provides `rustfmt` that can automatically format code for you. This
project mandates its use and therefore eliminates personal preference in
code style (for better or worse).
Formatting checks are run during `make check` and, on failure, will output
the diff that would be applied if you ran `make fmt` (or `make fix`); this
will run `cargo fmt` for you (and will use the binaries configured via
`configure`).
Since developers should be doing test-driven development (TDD) and therefore
should be running `make check` frequently, the hope is that frequent
feedback on formatting issues will allow developers to quickly adjust their
habits to avoid triggering formatting errors at all.
If you want to automatically fix formatting errors and then run tests:
```sh
$ make fmt check
```
## Benchmarking
Benchmarks serve two purposes: external integration tests (which are subject
to module visibility constraints) and actual benchmarking. To run
benchmarks, invoke `make bench`.
Note that link-time optimizations (LTO) are performed on the binary for
benchmarking so that its performance reflects release builds that will be
used in production.
The `configure` script will automatically detect whether the `test` feature
is unstable (as it was as of the time of writing) and, if so, will
automatically fall back to invoking nightly (by running `cargo +nightly
bench`).
If you do not have nightly, run you install it via `rustup install nightly`.

View File

@ -0,0 +1,174 @@
// String internment benchmarks and baselines
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Note that the baseline tests have a _suffix_ rather than a prefix so that
// they are still grouped with the associated test in the output, since it's
// sorted lexically by function name.
#![feature(test)]
extern crate tamer;
extern crate test;
use std::rc::Rc;
use tamer::sym::*;
use test::Bencher;
fn gen_strs(n: usize) -> Vec<String> {
(0..n)
.map(|n| n.to_string() + "foobarbazquuxlongsymbol")
.collect()
}
mod interner {
use super::*;
use std::collections::hash_map::RandomState;
use std::collections::HashSet;
use std::hash::BuildHasher;
pub struct HashSetSut<S = RandomState>
where
S: BuildHasher,
{
pub map: HashSet<Rc<str>, S>,
}
impl<S> HashSetSut<S>
where
S: BuildHasher + Default,
{
#[inline]
fn new() -> Self {
Self {
map: HashSet::with_hasher(Default::default()),
}
}
pub fn intern(&mut self, value: &str) -> Rc<str> {
if !self.map.contains(value) {
self.map.insert(value.into());
}
self.map.get(value).unwrap().clone()
}
}
/// This is our baseline with a raw Rc<str>.
#[bench]
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
let mut sut = HashSetSut::<RandomState>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
#[bench]
fn with_all_new_1000(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
let sut = ArenaInterner::<RandomState>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
#[bench]
/// This is our baseline with a raw Rc<str>.
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
bench.iter(|| {
let mut sut = HashSetSut::<RandomState>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
#[bench]
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
let sut = ArenaInterner::<RandomState>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
mod fx {
use super::*;
use fxhash::FxBuildHasher;
/// This is our baseline with a raw Rc<str>.
#[bench]
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
let mut sut = HashSetSut::<FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
#[bench]
fn with_all_new_1000(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
let sut = ArenaInterner::<FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
#[bench]
/// This is our baseline with a raw Rc<str>.
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
bench.iter(|| {
let mut sut: HashSetSut<FxBuildHasher> = HashSetSut {
map: HashSet::with_hasher(Default::default()),
};
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
#[bench]
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
let sut = ArenaInterner::<FxBuildHasher>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
#[bench]
fn with_one_new_1000_utf8_unchecked(bench: &mut Bencher) {
bench.iter(|| {
let sut = ArenaInterner::<FxBuildHasher>::new();
(0..1000)
.map(|_| unsafe { sut.intern_utf8_unchecked(b"first") })
.for_each(drop);
});
}
/// Since Fx is the best-performing, let's build upon it to demonstrate
/// the benefits of with_capacity
#[bench]
fn with_all_new_1000_with_capacity(bench: &mut Bencher) {
let n = 1000;
let strs = gen_strs(n);
bench.iter(|| {
let sut = ArenaInterner::<FxBuildHasher>::with_capacity(n);
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
}
}

View File

@ -0,0 +1,23 @@
// Feature check for `test`
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// As of the time of writing, this feature is unstable and can only be
// enabled in nightly. This file is intended to be used in the `configure`
// script to determine whether a nightly version of Rust must be used to
// invoke benchmarks.
#![feature(test)]

View File

@ -0,0 +1,23 @@
// Feature check for `test`
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// As of the time of writing, this feature is unstable and can only be
// enabled in nightly. This file is intended to be used in the `configure`
// script to determine whether a nightly version of Rust must be used to
// build documentation.
#![feature(intra_rustdoc_links)]

View File

@ -43,18 +43,57 @@ AC_CHECK_PROGS(CARGO, [cargo])
test -n "$CARGO" || AC_MSG_ERROR([cargo not found])
rustc_ver_req=1.39.0
rustc_ver_req=1.41.0
AC_CHECK_PROGS(RUSTC, [rustc])
AC_MSG_CHECKING([rustc version >= $rustc_ver_req])
rustc_version=$("$RUSTC" --version | cut -d' ' -f2)
AX_COMPARE_VERSION([$rustc_version], [ge], [$rustc_ver_req],
[AC_MSG_RESULT([yes ($rustc_version)])],
[AC_MSG_ERROR([no ($rustc_version)])])
[AC_MSG_RESULT([no ($rustc_version)])
AC_MSG_ERROR([If using rustup, run `rustup update'])])
AC_ARG_VAR([CARGO_BUILD_FLAGS],
[Flags to be passed to `cargo build' when invoked via Make])
# The `intra_rustdoc_links` feature is required for building
# documentation. If unavailable, then it's still an unstable feature and
# we'll need to use nightly. We don't check for nightly here, though---if
# it's missing, then cargo will tell the user what to do.
AC_MSG_CHECKING([`intra_rustdoc_links_check` feature support])
AS_IF(["$RUSTC" --crate-type lib build_aux/intra_rustdoc_links_check.rs &>/dev/null],
[AC_MSG_RESULT(available)],
[AC_MSG_RESULT([no (nightly required)])
AC_SUBST([CARGO_DOC_FLAGS], [+nightly])])
# The `test` feature is required for benchmarking. If unavailable, then
# it's still an unstable feature and we'll need to use nightly. We don't
# check for nightly here, though---if it's missing, then cargo will tell the
# user what to do.
AC_MSG_CHECKING([`test` feature support])
AS_IF(["$RUSTC" --crate-type lib build_aux/bench_check.rs &>/dev/null],
[AC_MSG_RESULT(available)],
[AC_MSG_RESULT([no (nightly required)])
AC_SUBST([CARGO_BENCH_FLAGS], [+nightly])])
# Cargo commands may be available but not necessarily installed for the
# active toolchain. Let's check that.
AC_MSG_CHECKING([whether cargo-fmt is available for active toolchain])
AS_IF([cargo fmt --help &>/dev/null],
[AC_MSG_RESULT(yes)],
[AC_MSG_RESULT(no)
cargo fmt --help # run again so user can see output
AC_MSG_ERROR([missing cargo-fmt for active toolchain])])
# Cargo commands may be available but not necessarily installed for the
# active toolchain. Let's check that.
AC_MSG_CHECKING([whether cargo-doc is available for toolchain])
AS_IF([cargo $CARGO_DOC_FLAGS doc --help &>/dev/null],
[AC_MSG_RESULT(yes)],
[AC_MSG_RESULT(no)
cargo $CARGO_DOC_FLAGS doc --help # run again so user can see output
AC_MSG_ERROR([missing cargo-doc for toolchain])])
AC_CONFIG_FILES([Makefile])
AC_OUTPUT

View File

@ -19,19 +19,8 @@
//! utility. Its job is to take each of the compiled object files and
//! produce a final executable.
//!
//! # Backwards-Compatibility (XSLT System)
//! This linker is part of the TAMER (TAME in Rust) project, which aims to
//! incrementally rewrite TAME in Rust. Consequently, it must be able to
//! serve as a drop-in replacement for the existing (XSLT) linker, which
//! takes as input `xmlo` files and produces as output an `xmle` file. This
//! is not efficient, and future versions will begin to migrate away from
//! this strategy.
//!
//! The output `xmle` file is then fed to a `standalone` command which
//! extracts the JavaScript fragment and places it into its own file. Even
//! when that is replaced (when this just outputs a final JS file directly),
//! the `xmle` file is still needed for other purposes, such as `summary`
//! and `dote` generation.
//! For more information about the linker,
//! see the [`tamer::ld`] module.
extern crate tamer;

View File

@ -0,0 +1,81 @@
// Global constants across the entirety of TAMER
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! System-wide static configuration.
//!
//! This module provides a system-wide configuration.
//! Subsystems should reference these values rather than defining their own
//! and risk incompatibilities or maintenance issues as requirements
//! change.
//!
//! By convention,
//! import this entire module rather than individual members and reference
//! them as `global::foo` to emphasize their nature and risk.
use std::num;
/// A size capable of representing every interned string in a package.
pub type PkgSymSize = u16;
/// A non-zero equivalent of [`PkgSymSize`];
pub type NonZeroPkgSymSize = num::NonZeroU16;
/// A size capable of representing every interned string in a program.
pub type ProgSymSize = u32;
/// A non-zero equivalent of [`ProgSymSize`];
pub type NonZeroProgSymSize = num::NonZeroU32;
/// A size capable of representing indexes of each individual identifier
/// within a single package.
///
/// Note that,
/// since TAME is a metalanguage and can easily expand into a great
/// deal of code,
/// this must accommodate far more than the user's expectations
/// working within the provided level of abstraction.
///
/// This must be ≥ [`PkgSymSize`].
pub type PkgIdentSize = u16;
/// A size capable of representing every individual identifier and
/// expression within a single package.
///
/// Note that,
/// since TAME is a metalanguage and can easily expand into a great
/// deal of code,
/// this must accommodate far more than the user's expectations
/// working within the provided level of abstraction.
pub type PkgIdentExprSize = u32;
/// A size capable of representing the union of every identifier of every
/// package used by an entire program.
///
/// This must be ≥ [`ProgSymSize`].
pub type ProgIdentSize = u32;
/// A size capable of representing the union of every identifier and every
/// expression of every package used by an entire program.
///
/// Note that,
/// since TAME is a metalanguage and can easily expand into a great
/// deal of code,
/// this must accommodate far more than the user's expectations
/// working within the provided level of abstraction.
///
/// This must be ≥ [`ProgSymSize`].
pub type ProgIdentExprSize = u32;

View File

@ -0,0 +1,652 @@
// Concrete ASG
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Base concrete [`Asg`] implementation.
use super::graph::{Asg, AsgEdge, AsgError, AsgResult, Node, ObjectRef};
use super::ident::IdentKind;
use super::object::{FragmentText, Object, Source};
use crate::sym::Symbol;
use fixedbitset::FixedBitSet;
use petgraph::graph::{
DiGraph, EdgeIndex, Graph, IndexType, Neighbors, NodeIndex,
};
use petgraph::visit::{GraphBase, IntoNeighbors, Visitable};
/// Concrete ASG.
///
/// This implementation is currently based on [`petgraph`].
///
/// Identifiers are cached by name for `O(1)` lookup.
/// Since [`SymbolIndex`][crate::sym::SymbolIndex] is used for this purpose,
/// the index may contain more entries than nodes and may contain gaps.
///
/// For more information,
/// see [`Asg`].
pub struct BaseAsg<'i, Ix: IndexType> {
/// Directed graph on which objects are stored.
graph: DiGraph<Node<'i>, AsgEdge, Ix>,
/// Map of [`SymbolIndex`][crate::sym::SymbolIndex] to node indexes.
///
/// This allows for `O(1)` lookup of identifiers in the graph.
/// Note that,
/// while we store [`NodeIndex`] internally,
/// the public API encapsulates it within an [`ObjectRef`].
index: Vec<NodeIndex<Ix>>,
/// Empty node indicating that no object exists for a given index.
empty_node: NodeIndex<Ix>,
}
impl<'i, Ix> BaseAsg<'i, Ix>
where
Ix: IndexType,
{
/// Create an ASG with the provided initial capacity.
///
/// The value for `objects` will be used as the capacity for the nodes
/// in the graph,
/// as well as the initial index capacity.
/// The value for `edges` may be more difficult to consider,
/// since edges are used to represent various relationships between
/// different types of objects,
/// but it's safe to say that each object will have at least one
/// edge to another object.
///
/// A basic `new` method is not provided to ensure that callers consider
/// capacity during construction,
/// since graphs can get quite large.
pub fn with_capacity(objects: usize, edges: usize) -> Self {
let mut graph = Graph::with_capacity(objects, edges);
let mut index = Vec::with_capacity(objects);
// Exhaust the first index to be used as a placeholder.
let empty_node = graph.add_node(Some(Object::Empty));
index.push(empty_node);
Self {
graph,
index,
empty_node,
}
}
/// Index the provided symbol `name` as representing the identifier `node`.
///
/// This index permits `O(1)` identifier lookups.
///
/// After an identifier is indexed it is not expected to be reassigned
/// to another node.
/// Debug builds contain an assertion that will panic in this instance.
///
/// Panics
/// ======
/// Will panic if unable to allocate more space for the index.
fn index_identifier(&mut self, name: &'i Symbol<'i>, node: NodeIndex<Ix>) {
let i: usize = name.index().into();
if i >= self.index.len() {
// If this is ever a problem we can fall back to usize max and
// re-compare before panicing
let new_size = (i + 1)
.checked_next_power_of_two()
.expect("internal error: cannot allocate space for ASG index");
self.index.resize(new_size, self.empty_node);
}
// We should never overwrite indexes
debug_assert!(self.index[i] == self.empty_node);
self.index[i] = node;
}
/// Lookup `ident` or add an [`Object::Missing`] to the graph and
/// return a reference to it.
#[inline]
fn lookup_or_missing(&mut self, ident: &'i Symbol<'i>) -> ObjectRef<Ix> {
self.lookup(ident).unwrap_or_else(|| {
let index = self.graph.add_node(Some(Object::Missing(ident)));
self.index_identifier(ident, index);
ObjectRef(index)
})
}
}
impl<'i, Ix> Asg<'i, Ix> for BaseAsg<'i, Ix>
where
Ix: IndexType,
{
fn declare(
&mut self,
name: &'i Symbol<'i>,
kind: IdentKind,
src: Source<'i>,
) -> AsgResult<ObjectRef<Ix>> {
// TODO: src check
if let Some(existing) = self.lookup(name) {
let node = self.graph.node_weight_mut(existing.0).unwrap();
match node {
Some(Object::Missing(_)) => {
node.replace(Object::Ident(name, kind, src));
return Ok(existing);
}
// TODO: no override-override
Some(Object::Ident(_, _, orig_src))
if orig_src.virtual_ && src.override_ =>
{
*orig_src = src;
return Ok(existing);
}
// TODO: no override-override
Some(Object::IdentFragment(_, _, orig_src, _))
if orig_src.virtual_ && src.override_ =>
{
// clears fragment, which is no longer applicable
node.replace(Object::Ident(name, kind, src));
return Ok(existing);
}
_ => return Ok(existing),
}
}
let node = self.graph.add_node(Some(Object::Ident(name, kind, src)));
self.index_identifier(name, node);
Ok(ObjectRef(node))
}
fn declare_extern(
&mut self,
name: &'i Symbol<'i>,
expected_kind: IdentKind,
) -> AsgResult<ObjectRef<Ix>> {
// TODO: resolution!
let node = self
.graph
.add_node(Some(Object::Extern(name, expected_kind)));
self.index_identifier(name, node);
Ok(ObjectRef(node))
}
fn set_fragment(
&mut self,
identi: ObjectRef<Ix>,
text: FragmentText,
) -> AsgResult<ObjectRef<Ix>> {
// This should _never_ happen as long as you're only using ObjectRef
// values produced by these methods.
let node = self
.graph
.node_weight_mut(identi.0)
.expect("internal error: BaseAsg::set_fragment bogus identi");
// This should also never happen, since we immediately repopulate
// the node below.
let ty = node
.take()
.expect("internal error: BaseAsg::set_fragment missing Node data");
let result = match ty {
Object::Ident(sym, kind, src) => {
Ok(Object::IdentFragment(sym, kind, src, text))
}
_ => {
let err = Err(AsgError::BadFragmentDest(format!(
"identifier is not a Object::Ident): {:?}",
ty,
)));
node.replace(ty);
err
}
}?;
node.replace(result);
Ok(identi)
}
#[inline]
fn get<I: Into<ObjectRef<Ix>>>(&self, index: I) -> Option<&Object<'i>> {
self.graph.node_weight(index.into().0).map(|node| {
node.as_ref()
.expect("internal error: BaseAsg::get missing Node data")
})
}
#[inline]
fn lookup(&self, name: &'i Symbol<'i>) -> Option<ObjectRef<Ix>> {
let i: usize = name.index().into();
self.index
.get(i)
.filter(|ni| ni.index() > 0)
.map(|ni| ObjectRef(*ni))
}
fn add_dep(&mut self, identi: ObjectRef<Ix>, depi: ObjectRef<Ix>) {
self.graph.update_edge(identi.0, depi.0, Default::default());
}
#[inline]
fn has_dep(&self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>) -> bool {
self.graph.contains_edge(ident.0, dep.0)
}
fn add_dep_lookup(
&mut self,
ident: &'i Symbol<'i>,
dep: &'i Symbol<'i>,
) -> (ObjectRef<Ix>, ObjectRef<Ix>) {
let identi = self.lookup_or_missing(ident);
let depi = self.lookup_or_missing(dep);
self.graph.update_edge(identi.0, depi.0, Default::default());
(identi, depi)
}
}
// TODO: encapsulate Petgraph API (N.B. this is untested!)
impl<'i, Ix> Visitable for BaseAsg<'i, Ix>
where
Ix: IndexType,
{
type Map = FixedBitSet;
fn visit_map(&self) -> Self::Map {
self.graph.visit_map()
}
fn reset_map(&self, map: &mut Self::Map) {
self.graph.reset_map(map)
}
}
impl<'i, Ix> GraphBase for BaseAsg<'i, Ix>
where
Ix: IndexType,
{
type NodeId = NodeIndex<Ix>;
type EdgeId = EdgeIndex<Ix>;
}
impl<'a, 'i, Ix> IntoNeighbors for &'a BaseAsg<'i, Ix>
where
Ix: IndexType,
{
type Neighbors = Neighbors<'a, AsgEdge, Ix>;
fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
self.graph.neighbors(n)
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::sym::SymbolIndex;
type Sut<'i> = BaseAsg<'i, u8>;
#[test]
fn create_with_capacity() {
let node_capacity = 100;
let edge_capacity = 300;
let sut = Sut::with_capacity(node_capacity, edge_capacity);
// breaks encapsulation to introspect; the behavior is
// transparent to callers (aside from performance
// characteristics)
let (nc, ec) = sut.graph.capacity();
assert!(nc >= node_capacity);
assert!(ec >= edge_capacity);
assert!(sut.index.capacity() >= node_capacity);
}
#[test]
fn declare_new_unique_idents() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
// NB: The index ordering is important! We first use a larger
// index to create a gap, and then use an index within that gap
// to ensure that it's not considered an already-defined
// identifier.
let syma = Symbol::new_dummy(SymbolIndex::from_u32(5), "syma");
let symb = Symbol::new_dummy(SymbolIndex::from_u32(1), "symab");
let nodea = sut.declare(
&syma,
IdentKind::Meta,
Source {
desc: Some("a".to_string()),
..Default::default()
},
)?;
let nodeb = sut.declare(
&symb,
IdentKind::Worksheet,
Source {
desc: Some("b".to_string()),
..Default::default()
},
)?;
assert_ne!(nodea, nodeb);
assert_eq!(
Some(&Object::Ident(
&syma,
IdentKind::Meta,
Source {
desc: Some("a".to_string()),
..Default::default()
},
)),
sut.get(nodea),
);
assert_eq!(
Some(&Object::Ident(
&symb,
IdentKind::Worksheet,
Source {
desc: Some("b".to_string()),
..Default::default()
},
)),
sut.get(nodeb),
);
Ok(())
}
#[test]
fn lookup_by_symbol() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "lookup");
let node = sut.declare(
&sym,
IdentKind::Meta,
Source {
generated: true,
..Default::default()
},
)?;
assert_eq!(Some(node), sut.lookup(&sym));
Ok(())
}
#[test]
fn declare_extern() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "extern");
let node = sut.declare_extern(&sym, IdentKind::Meta)?;
assert_eq!(Some(&Object::Extern(&sym, IdentKind::Meta)), sut.get(node),);
Ok(())
}
// TODO: incompatible
#[test]
fn declare_returns_existing_compatible() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "symdup");
let node = sut.declare(&sym, IdentKind::Meta, Source::default())?;
// Same declaration a second time
let redeclare =
sut.declare(&sym, IdentKind::Meta, Source::default())?;
assert_eq!(node, redeclare);
Ok(())
}
// TODO: incompatible
#[test]
fn declare_override_virtual_ident() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "virtual");
let over_src = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
let virt_node = sut.declare(
&sym,
IdentKind::Meta,
Source {
virtual_: true,
..Default::default()
},
)?;
let over_src = Source {
override_: true,
src: Some(&over_src),
..Default::default()
};
let over_node = sut.declare(&sym, IdentKind::Meta, over_src.clone())?;
assert_eq!(virt_node, over_node);
assert_eq!(
sut.get(over_node),
Some(&Object::Ident(&sym, IdentKind::Meta, over_src,))
);
Ok(())
}
// TODO: incompatible
#[test]
fn declare_override_virtual_ident_fragment() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "virtual");
let over_src = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
let virt_node = sut.declare(
&sym,
IdentKind::Meta,
Source {
virtual_: true,
..Default::default()
},
)?;
sut.set_fragment(virt_node, FragmentText::from("remove me"))?;
let over_src = Source {
override_: true,
src: Some(&over_src),
..Default::default()
};
let over_node = sut.declare(&sym, IdentKind::Meta, over_src.clone())?;
assert_eq!(virt_node, over_node);
// The act of overriding the node should have cleared any existing
// fragment, making way for a new fragment to take its place as soon
// as it is discovered. (So, back to an Object::Ident.)
assert_eq!(
sut.get(over_node),
Some(&Object::Ident(&sym, IdentKind::Meta, over_src,))
);
Ok(())
}
#[test]
fn add_fragment_to_ident() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "tofrag");
let src = Source {
generated: true,
..Default::default()
};
let node = sut.declare(&sym, IdentKind::Meta, src.clone())?;
let fragment = "a fragment".to_string();
let node_with_frag = sut.set_fragment(node, fragment.clone())?;
// Attaching a fragment should _replace_ the node, not create a
// new one
assert_eq!(
node, node_with_frag,
"fragment node does not match original node"
);
assert_eq!(
Some(&Object::IdentFragment(&sym, IdentKind::Meta, src, fragment)),
sut.get(node)
);
Ok(())
}
#[test]
fn add_fragment_to_fragment_fails() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let node = sut.declare(&sym, IdentKind::Meta, Source::default())?;
let fragment = "orig fragment".to_string();
sut.set_fragment(node, fragment.clone())?;
// Since it's already a fragment, this should fail.
let err = sut
.set_fragment(node, "replacement".to_string())
.expect_err("Expected failure");
match err {
AsgError::BadFragmentDest(str) if str.contains("sym") => (),
_ => panic!("expected AsgError::BadFragmentDest: {:?}", err),
}
// Make sure we didn't leave the node in an inconsistent state
assert_eq!(
Some(&Object::IdentFragment(
&sym,
IdentKind::Meta,
Default::default(),
fragment
)),
sut.get(node)
);
Ok(())
}
#[test]
fn add_ident_dep_to_ident() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let dep = Symbol::new_dummy(SymbolIndex::from_u32(1), "dep");
let symnode = sut.declare(&sym, IdentKind::Meta, Source::default())?;
let depnode = sut.declare(&dep, IdentKind::Meta, Source::default())?;
sut.add_dep(symnode, depnode);
assert!(sut.has_dep(symnode, depnode));
// sanity check if we re-add a dep
sut.add_dep(symnode, depnode);
assert!(sut.has_dep(symnode, depnode));
Ok(())
}
// same as above test
#[test]
fn add_dep_lookup_existing() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
let _ = sut.declare(&sym, IdentKind::Meta, Source::default())?;
let _ = sut.declare(&dep, IdentKind::Meta, Source::default())?;
let (symnode, depnode) = sut.add_dep_lookup(&sym, &dep);
assert!(sut.has_dep(symnode, depnode));
Ok(())
}
#[test]
fn add_dep_lookup_missing() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
// both of these are missing
let (symnode, depnode) = sut.add_dep_lookup(&sym, &dep);
assert!(sut.has_dep(symnode, depnode));
assert_eq!(Some(&Object::Missing(&sym)), sut.get(symnode));
assert_eq!(Some(&Object::Missing(&dep)), sut.get(depnode));
Ok(())
}
#[test]
fn declare_return_missing_symbol() -> AsgResult<()> {
let mut sut = Sut::with_capacity(0, 0);
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
// both of these are missing, see add_dep_lookup_missing
let (symnode, _) = sut.add_dep_lookup(&sym, &dep);
let src = Source {
desc: Some("Tamer is NOT lamer.".to_string()),
..Default::default()
};
// Check with a declared value
let declared = sut.declare(&sym, IdentKind::Meta, src.clone())?;
assert_eq!(symnode, declared);
assert_eq!(
Some(&Object::Ident(&sym, IdentKind::Meta, src)),
sut.get(declared),
);
Ok(())
}
}

View File

@ -0,0 +1,249 @@
// Graph abstraction
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Abstract graph as the basis for concrete ASGs.
use super::ident::IdentKind;
use super::object::{FragmentText, Object, Source};
use crate::sym::Symbol;
use petgraph::graph::{IndexType, NodeIndex};
use std::result::Result;
/// An abstract semantic graph of [objects][Object].
///
/// This IR focuses on the definition and manipulation of objects and their
/// dependencies.
/// See [`Object`] for a summary of valid object state transitions.
///
/// Objects are never deleted from the graph,
/// so [`ObjectRef`]s will remain valid for the lifetime of the ASG.
///
/// For more information,
/// see the [module-level documentation][self].
pub trait Asg<'i, Ix: IndexType> {
/// Declare a concrete identifier.
///
/// An identifier declaration is similar to a declaration in a header
/// file in a language like C,
/// describing the structure of the identifier.
/// Once declared,
/// this information cannot be changed.
///
/// Identifiers are uniquely identified by a [`Symbol`] `name`.
/// If an identifier of the same `name` already exists,
/// then the provided declaration is compared against the existing
/// declaration---should
/// they be incompatible,
/// then the operation will fail;
/// otherwise,
/// the existing identifier will be returned.
/// A successful declaration will add a [`Object::Ident`] to the graph
/// and return an [`ObjectRef`] reference.
///
/// If an existing identifier is an extern (see
/// [`Asg::declare_extern`]),
/// then the declaration will be compared just the same,
/// but the identifier will be converted from a
/// [`Object::Extern`] into a [`Object::Ident`].
/// When this happens,
/// the extern is said to be _resolved_.
///
/// If a virtual identifier of type [`Object::IdentFragment`] is
/// overridden,
/// then its fragment is cleared
/// (it returns to a [`Object::Ident`])
/// to make way for the fragment of the override.
fn declare(
&mut self,
name: &'i Symbol<'i>,
kind: IdentKind,
src: Source<'i>,
) -> AsgResult<ObjectRef<Ix>>;
/// Declare an abstract identifier.
///
/// An _extern_ declaration declares an identifier the same as
/// [`Asg::declare`],
/// but instead as [`Object::Extern`].
/// Externs are identifiers that are expected to be defined somewhere
/// else ("externally"),
/// and are resolved at [link-time][crate::ld].
///
/// If a concrete identifier has already been declared (see
/// [`Asg::declare`]),
/// then the declarations will be compared and,
/// if compatible,
/// the identifier will be immediately _resolved_ and the object
/// on the graph will not be altered.
/// Resolution will otherwise fail in error.
fn declare_extern(
&mut self,
name: &'i Symbol<'i>,
expected_kind: IdentKind,
) -> AsgResult<ObjectRef<Ix>>;
/// Set the fragment associated with a concrete identifier.
///
/// This changes the type of the identifier from [`Object::Ident`]
/// into [`Object::IdentFragment`],
/// which is intended for use by the [linker][crate::ld].
fn set_fragment(
&mut self,
identi: ObjectRef<Ix>,
text: FragmentText,
) -> AsgResult<ObjectRef<Ix>>;
/// Retrieve an object from the graph by [`ObjectRef`].
///
/// Since an [`ObjectRef`] should only be produced by an [`Asg`],
/// and since objects are never deleted from the graph,
/// this should never fail so long as references are not shared
/// between multiple graphs.
/// It is nevertheless wrapped in an [`Option`] just in case.
fn get<I: Into<ObjectRef<Ix>>>(&self, index: I) -> Option<&Object<'i>>;
/// Attempt to retrieve an identifier from the graph by name.
///
/// Since only identifiers carry a name,
/// this method cannot be used to retrieve all possible objects on the
/// graph---for
/// that, see [`Asg::get`].
fn lookup(&self, name: &'i Symbol<'i>) -> Option<ObjectRef<Ix>>;
/// Declare that `dep` is a dependency of `ident`.
///
/// An object must be declared as a dependency if its value must be
/// computed before computing the value of `ident`.
/// The [linker][crate::ld] will ensure this ordering.
///
/// See [`add_dep_lookup`][Asg::add_dep_lookup] if identifiers have to
/// be looked up by [`Symbol`] or if they may not yet have been
/// declared.
fn add_dep(&mut self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>);
/// Check whether `dep` is a dependency of `ident`.
fn has_dep(&self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>) -> bool;
/// Declare that `dep` is a dependency of `ident`,
/// regardless of whether they are known.
///
/// In contrast to [`add_dep`][Asg::add_dep],
/// this method will add the dependency even if one or both of `ident`
/// or `dep` have not yet been declared.
/// In such a case,
/// an [`Object::Missing`] will be added as a placeholder for the
/// missing identifier,
/// allowing the ASG to be built with partial information as
/// identifiers continue to be discovered.
///
/// References to both identifiers are returned in argument order.
fn add_dep_lookup(
&mut self,
ident: &'i Symbol<'i>,
dep: &'i Symbol<'i>,
) -> (ObjectRef<Ix>, ObjectRef<Ix>);
}
/// A [`Result`] with a hard-coded [`AsgError`] error type.
///
/// This is the result of every [`Asg`] operation that could potentially
/// fail in error.
pub type AsgResult<T> = Result<T, AsgError>;
/// Reference to an [object][Object] stored within the [`Asg`].
///
/// Object references are integer offsets,
/// not pointers.
/// See the [module-level documentation][self] for more information.
#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
pub struct ObjectRef<Ix>(pub NodeIndex<Ix>);
impl<Ix> From<NodeIndex<Ix>> for ObjectRef<Ix>
where
Ix: IndexType,
{
fn from(index: NodeIndex<Ix>) -> Self {
Self(index)
}
}
impl<Ix> From<ObjectRef<Ix>> for NodeIndex<Ix>
where
Ix: IndexType,
{
fn from(objref: ObjectRef<Ix>) -> Self {
objref.0
}
}
/// There are currently no data stored on edges ("edge weights").
pub type AsgEdge = ();
/// Each node of the graph represents an object.
///
/// Enclosed in an [`Option`] to permit moving owned values out of the
/// graph.
pub type Node<'i> = Option<Object<'i>>;
/// An error from an ASG operation.
///
/// Storing [`Symbol`] would require that this have a lifetime,
/// which is very inconvenient when chaining [`Result`],
/// so this stores only owned values.
/// The caller will know the problem values.
#[derive(Debug, PartialEq)]
pub enum AsgError {
/// The provided identifier is not in a state that is permitted to
/// receive a fragment.
///
/// See [`Asg::set_fragment`] for more information.
BadFragmentDest(String),
}
impl std::fmt::Display for AsgError {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::BadFragmentDest(msg) => {
write!(fmt, "bad fragment destination: {}", msg)
}
}
}
}
impl std::error::Error for AsgError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
None
}
}
#[cfg(test)]
mod test {
use super::*;
mod objref {
use super::*;
#[test]
fn to_from_nodeindex() {
let index = NodeIndex::<u32>::new(5);
let objref: ObjectRef<u32> = ObjectRef::from(index);
assert_eq!(index, objref.0);
assert_eq!(index, objref.into());
}
}
}

View File

@ -0,0 +1,376 @@
// ASG identifiers
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Identifiers (a type of [object][super::object::Object]).
use crate::ir::legacyir::{SymAttrs, SymDtype, SymType};
use std::convert::TryFrom;
/// Types of identifiers.
///
/// Here, the term _calculation_ refers to a composable expression that
/// produces a numeric result.
///
/// These are derived from [`legacyir::SymType`][crate::ir::legacyir::SymType]
/// and will be generalized in the future.
#[derive(Debug, PartialEq, Eq)]
pub enum IdentKind {
/// Classification generator.
///
/// This has the same number of dimensions as its highest-dimension
/// predicate.
/// Every [`Class`][IdentKind::Class] has an associated generator.
Cgen(Dim),
/// Boolean classification.
///
/// This is an artifact of an ancient system.
/// The dimensions here refers to the dimensions of the associated
/// [`Cgen`][IdentKind::Cgen].
Class(Dim),
/// Constant value.
Const(Dim, DataType),
/// Re-usable encapsulated expression.
///
/// Functions are nothing more than expressions that can be re-used with
/// dynamic values at runtime.
/// See also [`Lparam`][IdentKind::Lparam].
Func(Dim, DataType),
/// Generating calculation.
///
/// Generators are associated with iterative expressions,
/// such as sums and products.
/// They always have a parent [`Rate`][IdentKind::Rate].
Gen(Dim, DataType),
/// Local (non-global) parameter.
///
/// Local parameters are lexically scoped to their parent expression:
/// - [`Func`][IdentKind::Func], where there exists one per defined
/// function parameter; and
/// - `let` expression bindings.
///
/// This is not to be confused with the global
/// [`Param`][IdentKind::Param].
Lparam(Dim, DataType),
/// Global parameter.
///
/// These parameters serve as inputs to the system.
/// Input values are bound using [`Map`][IdentKind::Map].
Param(Dim, DataType),
/// Scalar result of a named calculation.
///
/// The verb "rate" is historical,
/// since TAME was developed for insurance rating systems.
/// This represents a named expression that yields a scalar value.
///
/// This serves as a parent to [`Gen`][IdentKind::Gen].
Rate(DataType),
/// Template definition.
///
/// A template is used only at expansion-time and,
/// unlike most other things in the system,
/// have no runtime value.
Tpl,
/// User-defined data type.
///
/// The only types typically defined are enums and unions of enums.
/// The type itself has no runtime value,
/// but each of the enum variants have an associated value of type
/// [`DataType`].
Type(DataType),
/// Input map head (meta identifier generated by compiler for each input
/// map).
MapHead,
/// Input field→param mapping.
///
/// These may only map to [`Param`][IdentKind::Param].
/// The source data is arbitrary and provided at runtime.
Map,
/// Input map tail (meta symbol generated by compiler for each input
/// map).
MapTail,
/// Return map head (meta symbol generated by compiler for each return
/// map).
RetMapHead,
/// Return param→field mapping.
///
/// Return mappings export data to calling systems.
/// They can map back any globally defined numeric expression.
RetMap,
/// Return map tail (meta symbol generated by compiler for each return
/// map).
RetMapTail,
/// Arbitrary metadata.
///
/// This permits the definition of static key/value data that is
/// compiled into the final executable.
Meta,
/// Rating worksheet (generated by compiler for worksheet packages).
///
/// The worksheet exposes intermediate calculation values in a much more
/// concise form than that of the Summary Page.
Worksheet,
}
impl<'i> TryFrom<SymAttrs<'i>> for IdentKind {
type Error = &'static str;
/// Attempt to raise [`SymAttrs`] into an [`IdentKind`].
///
/// Certain [`IdentKind`] require that certain attributes be present,
/// otherwise the conversion will fail.
fn try_from(attrs: SymAttrs<'i>) -> Result<Self, Self::Error> {
Self::try_from(&attrs)
}
}
impl<'i> TryFrom<&SymAttrs<'i>> for IdentKind {
type Error = &'static str;
/// Attempt to raise [`SymAttrs`] into an [`IdentKind`].
///
/// Certain [`IdentKind`] require that certain attributes be present,
/// otherwise the conversion will fail.
fn try_from(attrs: &SymAttrs<'i>) -> Result<Self, Self::Error> {
let ty = attrs.ty.as_ref().ok_or("missing symbol type")?;
macro_rules! ident {
($to:expr) => {
Ok($to)
};
($to:expr, dim) => {
Ok($to(Dim(attrs.dim.ok_or("missing dim")?)))
};
($to:expr, dtype) => {
Ok($to(attrs.dtype.ok_or("missing dtype")?))
};
($to:expr, dim, dtype) => {
Ok($to(
Dim(attrs.dim.ok_or("missing dim")?),
attrs.dtype.ok_or("missing dtype")?,
))
};
}
match ty {
SymType::Cgen => ident!(Self::Cgen, dim),
SymType::Class => ident!(Self::Class, dim),
SymType::Const => ident!(Self::Const, dim, dtype),
SymType::Func => ident!(Self::Func, dim, dtype),
SymType::Gen => ident!(Self::Gen, dim, dtype),
SymType::Lparam => ident!(IdentKind::Lparam, dim, dtype),
SymType::Param => ident!(IdentKind::Param, dim, dtype),
SymType::Rate => ident!(IdentKind::Rate, dtype),
SymType::Tpl => ident!(IdentKind::Tpl),
SymType::Type => ident!(IdentKind::Type, dtype),
SymType::MapHead => ident!(IdentKind::MapHead),
SymType::Map => ident!(IdentKind::Map),
SymType::MapTail => ident!(IdentKind::MapTail),
SymType::RetMapHead => ident!(IdentKind::RetMapHead),
SymType::RetMap => ident!(IdentKind::RetMap),
SymType::RetMapTail => ident!(IdentKind::RetMapTail),
SymType::Meta => ident!(IdentKind::Meta),
SymType::Worksheet => ident!(IdentKind::Worksheet),
}
}
}
/// Identifier dimensions.
///
/// This determines the number of subscripts needed to access a scalar
/// value.
/// A value of `0` indicates a scalar;
/// a value of `1` indicates a vector;
/// a value of `2` indicates a matrix;
/// and a value of `n` indicates a multi-dimensional array of
/// depth `n`.
#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
pub struct Dim(u8);
/// Underlying datatype of identifier.
///
/// TODO: This will always be 0≤n≤9, so let's introduce a newtype for it.
impl AsRef<str> for Dim {
fn as_ref(&self) -> &str {
match self.0 {
0 => &"0",
1 => &"1",
2 => &"2",
3 => &"3",
4 => &"4",
5 => &"5",
6 => &"6",
7 => &"7",
8 => &"8",
9 => &"9",
_ => unreachable!(),
}
}
}
/// Underlying datatype of identifier.
pub type DataType = SymDtype;
#[cfg(test)]
mod test {
use super::*;
use std::convert::TryInto;
#[test]
fn dim_to_str() {
// we'll just test high and low
let low: &str = Dim(0).as_ref();
let high: &str = Dim(9).as_ref();
assert_eq!("0", low);
assert_eq!("9", high);
}
macro_rules! test_kind {
($name:ident, $src:expr => $dest:expr) => {
#[test]
fn $name() {
assert_eq!(
Ok($dest),
SymAttrs {
ty: Some($src),
..Default::default()
}
.try_into()
);
}
};
($name:ident, $src:expr => $dest:expr, dim) => {
#[test]
fn $name() {
let dim = 1;
assert_eq!(
Ok($dest(Dim(dim))),
SymAttrs {
ty: Some($src),
dim: Some(dim),
..Default::default()
}
.try_into()
);
// no dim
IdentKind::try_from(SymAttrs {
ty: Some($src),
..Default::default()
})
.expect_err("must fail when missing dim");
}
};
($name:ident, $src:expr => $dest:expr, dtype) => {
#[test]
fn $name() {
let dtype = SymDtype::Float;
assert_eq!(
Ok($dest(dtype)),
SymAttrs {
ty: Some($src),
dtype: Some(dtype),
..Default::default()
}
.try_into()
);
// no dtype
IdentKind::try_from(SymAttrs {
ty: Some($src),
..Default::default()
})
.expect_err("must fail when missing dtype");
}
};
($name:ident, $src:expr => $dest:expr, dim, dtype) => {
#[test]
fn $name() {
let dim = 1;
let dtype = SymDtype::Float;
assert_eq!(
Ok($dest(Dim(dim), dtype)),
SymAttrs {
ty: Some($src),
dim: Some(dim),
dtype: Some(dtype),
..Default::default()
}
.try_into()
);
// no dim
IdentKind::try_from(SymAttrs {
ty: Some($src),
dtype: Some(dtype),
..Default::default()
})
.expect_err("must fail when missing dim");
// no dtype
IdentKind::try_from(SymAttrs {
ty: Some($src),
dim: Some(dim),
..Default::default()
})
.expect_err("must fail when missing dtype");
}
};
}
test_kind!(cgen, SymType::Cgen => IdentKind::Cgen, dim);
test_kind!(class, SymType::Class => IdentKind::Class, dim);
test_kind!(r#const, SymType::Const => IdentKind::Const, dim, dtype);
test_kind!(func, SymType::Func => IdentKind::Func, dim, dtype);
test_kind!(gen, SymType::Gen => IdentKind::Gen, dim, dtype);
test_kind!(lparam, SymType::Lparam => IdentKind::Lparam, dim, dtype);
test_kind!(param, SymType::Param => IdentKind::Param, dim, dtype);
test_kind!(rate, SymType::Rate => IdentKind::Rate, dtype);
test_kind!(tpl, SymType::Tpl => IdentKind::Tpl);
test_kind!(r#type, SymType::Type => IdentKind::Type, dtype);
test_kind!(maphead, SymType::MapHead => IdentKind::MapHead);
test_kind!(map, SymType::Map => IdentKind::Map);
test_kind!(maptail, SymType::MapTail => IdentKind::MapTail);
test_kind!(retmaphead, SymType::RetMapHead => IdentKind::RetMapHead);
test_kind!(retmap, SymType::RetMap => IdentKind::RetMap);
test_kind!(retmaptail, SymType::RetMapTail => IdentKind::RetMapTail);
test_kind!(meta, SymType::Meta => IdentKind::Meta);
test_kind!(worksheet, SymType::Worksheet => IdentKind::Worksheet);
}

View File

@ -0,0 +1,192 @@
// Abstract semantic graph (ASG) intermediate representation (IR)
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Abstract semantic graph.
//!
//! The [abstract semantic graph][asg] (ASG) is an IR representing the
//! relationship between objects using a directed [graph][].
//! An _object_ is an identifier or expression.
//!
//! Since TAME is a declarative language,
//! the ASG does not represent control flow;
//! instead, it represents the relationship between objects and their
//! dependencies.
//! Control flow is determined solely by the [linker][crate::ld] based on
//! these dependencies.
//!
//! See [`crate::global`] for available index sizes depending on context.
//! For example,
//! a linker may choose to use [`crate::global::ProgIdentSize`];
//!
//!
//! Graph Structure
//! ===============
//! Each node (vector) in the graph represents an [object][Object],
//! such as an identifier or an expression.
//! Each directed edge `(A->B)` represents that `A` depends upon `B`.
//!
//! Graphs may contain cycles for recursive functions—that is,
//! TAME's ASG is _not_ a DAG.
//! Mutually recursive functions are therefore represented as
//! [strongly connected components][scc].
//!
//! [asg]: https://en.wikipedia.org/wiki/Abstract_semantic_graph
//! [graph]: https://en.wikipedia.org/wiki/Graph_(discrete_mathematics)
//! [scc]: https://en.wikipedia.org/wiki/Strongly_connected_component
//!
//! Each object may have a number of valid states;
//! see [`Object`] for valid object states and transitions.
//!
//!
//! How To Use
//! ==========
//! A suitable concrete [`Asg`] implementation is provided by
//! [`DefaultAsg`].
//!
//! ```
//! use tamer::global;
//! use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, Source};
//! use tamer::sym::{Interner, DefaultInterner};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Be sure to choose size and initial capacities appropriate for your
//! // situation.
//! let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
//!
//! let interner = DefaultInterner::new();
//! let identa_sym = interner.intern("identa");
//! let identb_sym = interner.intern("identb");
//!
//! let identa = asg.declare(identa_sym, IdentKind::Meta, Source::default())?;
//! let identb = asg.declare_extern(identb_sym, IdentKind::Meta)?;
//!
//! assert_eq!(
//! Some(&Object::Extern(identb_sym, IdentKind::Meta)),
//! asg.get(identb),
//! );
//!
//! // Dependencies can be declared even if an identifier is
//! // unresolved. This declares `(identa)->(identb)`.
//! asg.add_dep(identa, identb);
//! assert!(asg.has_dep(identa, identb));
//!
//! // TODO: extern resolution
//!
//! // Identifiers are indexed by symbol name.
//! assert_eq!(Some(identa), asg.lookup(identa_sym));
//! #
//! # Ok(()) // main
//! # }
//! ```
//!
//! Missing Identifiers
//! -------------------
//! Since identifiers in TAME can be defined in any order relative to their
//! dependencies within a source file,
//! it is often the case that a dependency will have to be added to the
//! graph before it is resolved.
//! For example,
//! [`Asg::add_dep_lookup`] will add an [`Object::Missing`] to the graph
//! if either identifier has not yet been declared.
//!
//! ```
//! # use tamer::global;
//! # use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, FragmentText, Source};
//! # use tamer::sym::{Interner, DefaultInterner};
//! #
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
//! # let interner = DefaultInterner::new();
//! #
//! let identa_sym = interner.intern("identa");
//! let identb_sym = interner.intern("identb");
//! let (identa, identb) = asg.add_dep_lookup(identa_sym, identb_sym);
//!
//! assert_eq!(Some(&Object::Missing(identa_sym)), asg.get(identa));
//! assert_eq!(Some(&Object::Missing(identb_sym)), asg.get(identb));
//!
//! // The identifiers returned above are proper objects on the graph.
//! assert_eq!(Some(identa), asg.lookup(identa_sym));
//! assert_eq!(Some(identb), asg.lookup(identb_sym));
//!
//! // Once declared, the missing identifier changes state and dependencies
//! // are retained.
//! asg.declare(identa_sym, IdentKind::Meta, Source::default())?;
//!
//! assert_eq!(
//! Some(&Object::Ident(identa_sym, IdentKind::Meta, Source::default())),
//! asg.get(identa),
//! );
//!
//! assert!(asg.has_dep(identa, identb));
//! #
//! # Ok(()) // main
//! # }
//! ```
//!
//! Fragments
//! ---------
//! A compiled fragment can be attached to any resolved identifier (see
//! [`Object::Ident`]) using [`Asg::set_fragment`].
//! Doing so changes the state of the identifier to [`Object::IdentFragment`],
//! and it is an error to attempt to overwrite that fragment once it is
//! set.
//!
//! ```
//! # use tamer::global;
//! # use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, FragmentText, Source};
//! # use tamer::sym::{Interner, DefaultInterner};
//! #
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
//! # let interner = DefaultInterner::new();
//! #
//! // Fragments can be attached to resolved identifiers.
//! let ident = asg.declare(
//! interner.intern("ident"), IdentKind::Meta, Source::default()
//! )?;
//! asg.set_fragment(ident, FragmentText::from("test fragment"))?;
//!
//! assert_eq!(
//! Some(&Object::IdentFragment(
//! interner.intern("ident"),
//! IdentKind::Meta,
//! Source::default(),
//! FragmentText::from("test fragment"),
//! )),
//! asg.get(ident),
//! );
//!
//! // But overwriting will fail
//! let bad = asg.set_fragment(ident, FragmentText::from("overwrite"));
//! assert!(bad.is_err());
//! #
//! # Ok(()) // main
//! # }
//! ```
mod base;
mod graph;
mod ident;
mod object;
pub use graph::{Asg, AsgResult, ObjectRef};
pub use ident::{Dim, IdentKind};
pub use object::{FragmentText, Object, Source};
/// Default concrete ASG implementation.
pub type DefaultAsg<'i, Ix> = base::BaseAsg<'i, Ix>;

View File

@ -0,0 +1,230 @@
// Objects represented on ASG
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Objects represented by the ASG.
//!
//! _This is a private module.
//! See [`super`] for available exports._
use super::ident::IdentKind;
use crate::ir::legacyir::SymAttrs;
use crate::sym::Symbol;
/// Type of object.
///
/// These types represent object states:
///
/// ```text
/// ,-> (Missing) -------.
/// / \ \
/// / v v
/// ((Empty)) -> (Extern) -> ((Ident)) -> ((IdentFragment)).
/// \ ^ /
/// \ / \ /
/// `--------------------` `-----------'
/// ```
///
/// The [`Empty`][Object::Empty] state is never directly accessable
/// through [`Asg`][super::Asg]'s public API,
/// as it represents the _absence_ of an object at that node within the
/// ASG.
#[derive(Debug, PartialEq)]
pub enum Object<'i> {
/// An identifier is expected to be defined but is not yet available.
///
/// This variant contains the symbol representing the name of the
/// expected identifier.
/// By defining an object as missing,
/// this allows the graph to be built incrementally as objects are
/// discovered.
///
/// Note that this is different than [`Empty`][Object::Empty].
Missing(&'i Symbol<'i>),
/// A resolved identifier.
///
/// This represents an identifier that has been declared with certain
/// type information.
Ident(&'i Symbol<'i>, IdentKind, Source<'i>),
/// An identifier that has not yet been resolved.
///
/// Externs are upgraded to [`Object::Ident`] once an identifier of
/// the same name is loaded.
/// It is an error if the loaded identifier does not have a compatible
/// [`IdentKind`].
Extern(&'i Symbol<'i>, IdentKind),
/// Identifier with associated text.
///
/// Code fragments are portions of the target language associated with
/// an identifier.
/// They are produced by the compiler and it is the job of the
/// [linker][crate::ld] to put them into the correct order for the
/// final executable.
IdentFragment(&'i Symbol<'i>, IdentKind, Source<'i>, FragmentText),
/// The empty node (default value for indexer).
///
/// This is not a valid state accessible via [`Asg`][super::Asg].
///
/// Note that this is different than [`Missing`][Object::Missing].
Empty,
}
/// Compiled fragment for identifier.
///
/// This represents the text associated with an identifier.
pub type FragmentText = String;
/// Metadata about the source of an object.
///
/// This contains information from the symbol table that does not belong on
/// [`IdentKind`],
/// since that stores _type_ information.
///
/// TODO: This does not currently store byte offsets within the source file
/// since the original XSLT-based compiler did not have that capability;
/// this will provide that information in the future.
#[derive(Debug, Default, PartialEq, Clone)]
pub struct Source<'i> {
/// Name of package containing reference to this object.
pub pkg_name: Option<&'i Symbol<'i>>,
/// Relative path to the source of this object,
/// if not present in the current package.
pub src: Option<&'i Symbol<'i>>,
/// The identifier from which this one is derived.
///
/// See [`IdentKind`] for more information on parents.
/// For example,
/// a [`IdentKind::Cgen`] always has a parent [`IdentKind::Class`].
pub parent: Option<&'i Symbol<'i>>,
/// Child identifier associated with this identifier.
///
/// For [`IdentKind::Class`],
/// this represents an associated [`IdentKind::Cgen`].
pub yields: Option<&'i Symbol<'i>>,
/// User-friendly identifier description.
///
/// This is used primarily by [`IdentKind::Class`] and
/// [`IdentKind::Gen`].
pub desc: Option<String>,
/// Whether this identifier was generated by the compiler.
///
/// A generated identifier is representative of an internal
/// implementation detail that should remain encapsulated from the
/// user and is subject to change over time.
///
/// Identifiers created by templates are not considered to be generated.
pub generated: bool,
/// Related identifiers.
///
/// These data represent a kluge created to add additional symbol
/// information in two different contexts:
///
/// - [`IdentKind::Map`] includes the name of the source field; and
/// - [`IdentKind::Func`] lists params in order (so that the compiler
/// knows application order).
///
/// TODO: We have `parent`, `yields`, and `from`.
/// We should begin to consolodate.
pub from: Option<Vec<&'i Symbol<'i>>>,
/// Whether identifier is virtual (can be overridden).
///
/// This feature adds complexity and will ideally be removed in the
/// future.
///
/// See also [`override`][Source::override_].
pub virtual_: bool,
/// Whether identifier overrides a virtual identifier.
///
/// This feature adds complexity and will ideally be removed in the
/// future.
///
/// See also [`virtual_`][Source::virtual_].
pub override_: bool,
}
impl<'i> From<SymAttrs<'i>> for Source<'i> {
/// Raise Legacy IR [`SymAttrs`].
///
/// This simply extracts a subset of fields from the source attributes.
fn from(attrs: SymAttrs<'i>) -> Self {
Source {
pkg_name: attrs.pkg_name,
src: attrs.src,
generated: attrs.generated,
parent: attrs.parent,
yields: attrs.yields,
desc: attrs.desc,
from: attrs.from,
virtual_: attrs.virtual_,
override_: attrs.override_,
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::sym::SymbolIndex;
#[test]
fn source_from_sym_attrs() {
let nsym = Symbol::new_dummy(SymbolIndex::from_u32(1), "name");
let ssym = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
let psym = Symbol::new_dummy(SymbolIndex::from_u32(3), "parent");
let ysym = Symbol::new_dummy(SymbolIndex::from_u32(4), "yields");
let fsym = Symbol::new_dummy(SymbolIndex::from_u32(5), "from");
let attrs = SymAttrs {
pkg_name: Some(&nsym),
src: Some(&ssym),
generated: true,
parent: Some(&psym),
yields: Some(&ysym),
desc: Some("sym desc".to_string()),
from: Some(vec![&fsym]),
virtual_: true,
override_: true,
..Default::default()
};
assert_eq!(
Source {
pkg_name: Some(&nsym),
src: Some(&ssym),
generated: attrs.generated,
parent: attrs.parent,
yields: attrs.yields,
desc: Some("sym desc".to_string()),
from: Some(vec![&fsym]),
virtual_: true,
override_: true,
},
attrs.into(),
);
}
}

View File

@ -0,0 +1,347 @@
// Legacy IR
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Legacy IR faithful to the XSLT-based compiler.
//!
//! This represents the intermediate format (IR) used by the `xmlo` files
//! (see [`crate::obj::xmlo`]) originally produced by the XSLT-based
//! compiler.
//! It consists largely of metadata for object symbols.
//!
//! This IR should be converted into a higher-level IR quickly,
//! especially considering that it will be going away in the future.
use crate::sym::Symbol;
use std::convert::TryFrom;
use std::result::Result;
/// Toplevel package attributes.
#[derive(Debug, Default, PartialEq, Eq)]
pub struct PackageAttrs<'i> {
/// Unique package identifier.
///
/// The package name is derived from the filename relative to the
/// project root during compilation (see `relroot`).
pub name: Option<&'i Symbol<'i>>,
/// Relative path from package to project root.
pub relroot: Option<String>,
/// Whether this package is a program.
///
/// A _program_ is a package intended to be linked into a final
/// executable.
/// Programs cannot be imported by other packages.
/// Non-program packages cannot be linked.
pub program: bool,
/// Symbol representing package eligibility.
///
/// A package is _eligible_ for computation if certain invariants are
/// met.
/// This symbol is responsible for including each of those invariants as
/// dependencies so that they are included at link-time.
pub elig: Option<&'i Symbol<'i>>,
}
/// Symbol attributes.
///
/// This is a subset of all available attributes available on the
/// `preproc:sym` nodes;
/// more will be added as needed.
///
/// Not all symbols share the same set of attributes,
/// so this represents the union of all possible attribute sets.
///
/// Due to the number of possible attributes,
/// this is not an opaque type.
/// Consequently,
/// valid values should be enforced by the Rust's type system.
#[derive(Debug, Default, PartialEq, Eq)]
pub struct SymAttrs<'i> {
/// Relative path to the package that defined this symbol.
///
/// Object files store relative paths so that they are somewhat
/// portable—the
/// entire project root should be able to be relocated.
pub src: Option<&'i Symbol<'i>>,
/// Symbol type.
///
/// The type describes the purpose of the symbol and determines both how
/// it is compiled and its location in the final executable.
pub ty: Option<SymType>,
/// Number of dimensions.
///
/// This determines the number of subscripts needed to access a scalar
/// value.
/// A value of `0` indicates a scalar;
/// a value of `1` indicates a vector;
/// a value of `2` indicates a matrix;
/// and a value of `n` indicates a multi-dimensional array of
/// depth `n`.
pub dim: Option<u8>,
/// Type of underlying data.
///
/// This is not a primitive,
/// and mostly represents whether or not floating point computations
/// will take place.
pub dtype: Option<SymDtype>,
/// Whether the symbol's location will be determined at link-time.
///
/// Externs allow symbols to be referenced without having yet been given
/// a concrete definition,
/// provided that an eventual concrete definition matches the
/// provided declaration.
/// The linker (see [`crate::ld`]) is responsible for ensuring that the
/// extern is satisfied and properly located in the final executable.
pub extern_: bool,
/// Unique package identifier.
///
/// The name of a package is automatically derived from the package path
/// relative to the project root.
/// _Note that this is problematic if one wants to compile the equivalent
/// of shared libraries._
pub pkg_name: Option<&'i Symbol<'i>>,
/// The identifier from which this one is derived.
///
/// For example,
/// [`SymType::Cgen`] has a parent [`SymType::Class`] and
/// [`SymType::Gen`] has a parent [`SymType::Rate`].
pub parent: Option<&'i Symbol<'i>>,
/// Whether this identifier was generated by the compiler.
///
/// A generated identifier is representative of an internal
/// implementation detail that should remain encapsulated from the
/// user and is subject to change over time.
///
/// Identifiers created by templates are not considered to be generated.
pub generated: bool,
/// Child identifier associated with this identifier.
///
/// For [`SymType::Class`],
/// this represents an associated [`SymType::Cgen`].
pub yields: Option<&'i Symbol<'i>>,
/// User-friendly identifier description.
///
/// This is used primarily by [`SymType::Class`] and [`SymType::Gen`].
pub desc: Option<String>,
/// Related identifiers.
///
/// These data represent a kluge created to add additional symbol
/// information in two different contexts:
///
/// - [`SymType::Map`] includes the name of the source field; and
/// - [`SymType::Func`] lists params in order (so that the compiler
/// knows application order).
pub from: Option<Vec<&'i Symbol<'i>>>,
/// Whether symbol can be overridden.
///
/// See also [`override`][SymAttrs::override_].
pub virtual_: bool,
/// Whether symbol is an override of a virtual symbol.
///
/// See also [`virtual`][SymAttrs::virtual_].
pub override_: bool,
}
/// Legacy symbol types.
///
/// This enum represents all symbol types represented in the `xmlo` files.
/// They are overly specialized and will be deprecated in favor of more
/// generalized dependent types in later IRs.
#[derive(Debug, PartialEq, Eq)]
pub enum SymType {
/// Classification generator (from `lv:classify/@yields`).
Cgen,
/// Classification (from `lv:classify/@as`).
Class,
/// Constant (from `lv:const/@name`).
Const,
/// Function (from `lv:function/@name`).
Func,
/// Generator (from `lv:rate/@generates`).
Gen,
/// Local function parameter (from `lv:function/lv:param/@name`) or let
/// binding (from `lv:let/lv:values/lv:value/@name`).
Lparam,
/// Global parameter (from `lv:param/@name`).
Param,
/// Scalar calculation result (from `lv:rate/@yields`).
Rate,
/// Template (from `lv:template/@name`).
Tpl,
/// Typedef (from `lv:type/@name`).
Type,
/// Input map head (meta symbol generated by compiler for each input map).
MapHead,
/// Input field→param mapping (from `lvm:map`, `lvm:pass`).
Map,
/// Input map tail (meta symbol generated by compiler for each input map).
MapTail,
/// Return map head (meta symbol generated by compiler for each return map).
RetMapHead,
/// Return param→field mapping (from `lvm:map`, `lvm:pass`).
RetMap,
/// Return map tail (meta symbol generated by compiler for each return map).
RetMapTail,
/// Arbitrary metadata (from `lv:meta`).
Meta,
/// Rating worksheet (generated by compiler for worksheet packages).
Worksheet,
}
impl TryFrom<&[u8]> for SymType {
type Error = String;
/// Determine symbol type from source `preproc:sym/@type`.
///
/// This raises source `xmlo` data into this IR.
/// See [`crate::obj::xmlo::reader`].
fn try_from(value: &[u8]) -> Result<SymType, Self::Error> {
match value {
b"cgen" => Ok(SymType::Cgen),
b"class" => Ok(SymType::Class),
b"const" => Ok(SymType::Const),
b"func" => Ok(SymType::Func),
b"gen" => Ok(SymType::Gen),
b"lparam" => Ok(SymType::Lparam),
b"param" => Ok(SymType::Param),
b"rate" => Ok(SymType::Rate),
b"tpl" => Ok(SymType::Tpl),
b"type" => Ok(SymType::Type),
b"retmap:head" => Ok(SymType::RetMapHead),
b"retmap" => Ok(SymType::RetMap),
b"retmap:tail" => Ok(SymType::RetMapTail),
b"map:head" => Ok(SymType::MapHead),
b"map" => Ok(SymType::Map),
b"map:tail" => Ok(SymType::MapTail),
b"meta" => Ok(SymType::Meta),
b"worksheet" => Ok(SymType::Worksheet),
_ => Err(format!(
"unknown symbol type `{}`",
String::from_utf8(value.to_vec())
.unwrap_or("(invalid UTF8)".into())
)),
}
}
}
/// Underlying datatype.
///
/// This is the type of scalar data stored within the given symbol.
///
/// *NB:* This was _not enforced_ by the XSLT-based compiler.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum SymDtype {
/// {⊥,} = {0,1} ⊂ ℤ
Boolean,
///
Integer,
///
Float,
/// ∅
Empty,
}
impl AsRef<str> for SymDtype {
/// Produce `xmlo`-compatible representation.
fn as_ref(&self) -> &str {
match self {
SymDtype::Boolean => &"boolean",
SymDtype::Integer => &"integer",
SymDtype::Float => &"float",
SymDtype::Empty => &"empty",
}
}
}
impl TryFrom<&[u8]> for SymDtype {
type Error = String;
/// Determine data type from source `preproc:sym/@dtype`.
///
/// This raises source `xmlo` data into this IR.
/// See [`crate::obj::xmlo::reader`].
fn try_from(value: &[u8]) -> Result<SymDtype, Self::Error> {
match value {
b"boolean" => Ok(SymDtype::Boolean),
b"integer" => Ok(SymDtype::Integer),
b"float" => Ok(SymDtype::Float),
b"empty" => Ok(SymDtype::Empty),
_ => Err(format!(
"unknown symbol dtype `{}`",
String::from_utf8(value.to_vec())
.unwrap_or("(invalid UTF8)".into())
)),
}
}
}
#[cfg(test)]
mod test {
use super::*;
// We're not going to check every possible value here since we'd be
// maintaining the mapping in two places; we can leave that to
// integration tests.
#[test]
fn symtype_from_u8() {
assert_eq!(Ok(SymType::Cgen), SymType::try_from(b"cgen" as &[u8]));
}
#[test]
fn symtype_failure_from_unknown_u8() {
match SymType::try_from(b"unknown" as &[u8]) {
Err(s) => assert!(s.contains("unknown")),
bad => panic!("expected error: {:?}", bad),
}
}
#[test]
fn symdtype_from_u8() {
assert_eq!(
Ok(SymDtype::Integer),
SymDtype::try_from(b"integer" as &[u8])
);
}
#[test]
fn symdtype_failure_from_unknown_u8() {
match SymDtype::try_from(b"unknownd" as &[u8]) {
Err(s) => assert!(s.contains("unknownd")),
bad => panic!("expected error: {:?}", bad),
}
}
#[test]
fn symdtype_as_str() {
let boolean: &str = SymDtype::Boolean.as_ref();
assert_eq!("boolean", boolean);
}
}

View File

@ -0,0 +1,62 @@
// Intermediate representations (IRs)
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Intermediate representations for TAME programs.
//!
//! [Intermediate representations][ir] (IRs) are data structures used to
//! represent source data in a manner most suitable for a particular phase
//! of compilation.
//! A single IR may be used by multiple compilation phases,
//! or by multiple systems (e.g. various compilers or [linkers][]).
//!
//! [ir]: https://en.wikipedia.org/wiki/Intermediate_representation
//! [linkers]: crate::ld
//!
//!
//! Implicit AST
//! ============
//! Each input language begins as an [abstract syntax tree][ast] (AST),
//! produced by the parser.
//! For TAME languages that are XML-based,
//! the production of the AST is handled by [`quick_xml`],
//! and is effectively the same as the source XML.
//! There is no explicit data structure to represent the AST of XML
//! sources.
//!
//! [ast]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
//!
//!
//! Summary of IRs
//! ==============
//! There are currently two IRs:
//!
//! 1. **[Legacy IR](legacyir)** corresponds very closely to the structure
//! of [`xmlo` object files](super::obj::xmlo).
//! It contains a lot of cruft and will be replaced in the future with
//! a more suitable IR.
//! This stores very limited context for the information it provides,
//! so it must quickly translate it to a higher-level IR for further
//! processing before context is lost.
//! 2. The **[Abstract Semantic Graph (ASG)](asg)** is created from
//! lower-level IRs.
//! It stores relationships between identifiers and expressions within
//! a graph data structure,
//! and is capable of representing entire programs composed of many
//! different packages.
pub mod asg;
pub mod legacyir;

View File

@ -15,4 +15,96 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Combine [object files](crate::obj) into a final executable.
//!
//! It's user-facing binary is [`tameld`](../../tameld).
//!
//!
//! Background Information
//! ======================
//! A [linker][] is responsible for combining individually compiled
//! [object files](crate::obj) containing relocatable code into a final
//! executable.
//! This involves putting the compiled code fragments into the right order
//! and into the right place within the executable.
//!
//! [linker]: https://en.wikipedia.org/wiki/Linker_(computing)
//!
//! _See below for more information on why this linker currently produces
//! another intermediate format (`xmle`) rather than a final executable._
//!
//! The type of relocatable code depends on the _target_.
//! Currently, the only target is JavaScript.
//!
//!
//! Backwards-Compatibility With XSLT System
//! -------------------------------------
//! This linker is part of the TAMER (TAME in Rust) project,
//! which aims to incrementally rewrite TAME in Rust.
//! Consequently, it must be able to serve as a drop-in replacement for the
//! existing (XSLT) linker,
//! which takes as input `xmlo` files and produces as output an `xmle`
//! file.
//! *This is not efficient*,
//! and future versions will begin to migrate away from this strategy.
//!
//! The output `xmle` file can then be fed to a `standalone` command which
//! extracts the JavaScript fragment and places it into its own file.
//! Even when that is replaced
//! (when this just outputs a final JS file directly),
//! the `xmle` file is still needed for other purposes,
//! such as `summary` and `dote` generation.
//! Those too will eventually be linker targets.
//!
//!
//! Linking Process
//! ===============
//! The linker works in the following steps:
//!
//! 1. [Object files](crate::obj) are recursively read.
//! They are used in a streaming manner for the next step of the
//! process;
//! they do not persist in memory.
//! Only the required portions of the file are loaded.
//! See the [Legacy IR](crate::ir::legacyir) for more information.
//!
//! 2. This information is used to populate the [ASG].
//! Information is added to the graph as it is discovered during object
//! file loading,
//! so the graph initially contains edges to missing identifiers.
//! Expressions are _not_ added to the graph,
//! as they are not needed for linking.
//! Once all data are loaded,
//! the ASG contains relocatable code fragments for each identifier.
//!
//! 3. The ASG is [sorted topologically][topo-sort] so that dependencies
//! will be written to the executable file before identifiers that
//! depend on them.
//! Roots for the sort are specified by the return map.
//! _Identifiers that are not accessable from one of those roots will be
//! omitted from the executable output._
//!
//! 4. Relocatable code fragments are output into various sections in the
//! executable file.
//! This output file is currently `xmle`.
//! (**TODO**: Link to new `xmle` crate.)
//!
//! [ASG]: crate::ir::asg
//! [topo-sort]: https://en.wikipedia.org/wiki/Topological_sorting
//!
//! Steps 1 and 2 are performed at the same time:
//! object files are used to immediately populate the [ASG][].
//! Since the ASG contains only partial information,
//! it must perform other validations (such as extern resolution) during
//! this process;
//! see [crate::ir::asg] for more information.
//!
//! Because the topological sort only considered explicitly defined roots,
//! identifiers are only included in the final executable if they are
//! either a root or are a dependency of a root.
//! This makes it possible to create large reusable packages without
//! incurring a runtime cost for unused objects,
//! which is especially important since templates may expand into many
//! identifiers.
pub mod poc;

View File

@ -18,216 +18,44 @@
//! **This is a poorly-written proof of concept; do not use!** It has been
//! banished to its own file to try to make that more clear.
use fixedbitset::FixedBitSet;
use petgraph::graph::{DiGraph, EdgeIndex, Neighbors, NodeIndex};
use petgraph::visit::{DfsPostOrder, GraphBase, IntoNeighbors, Visitable};
use quick_xml::events::Event;
use quick_xml::Reader;
use std::collections::hash_map::{Entry, Iter};
use std::collections::{HashMap, HashSet};
use crate::global;
use crate::ir::asg::IdentKind;
use crate::ir::asg::{Asg, DefaultAsg, Object, ObjectRef, Source};
use crate::obj::xmle::writer::{Sections, XmleWriter};
use crate::obj::xmlo::reader::{XmloError, XmloEvent, XmloReader};
use crate::sym::{DefaultInterner, Interner, Symbol};
use fxhash::{FxHashMap, FxHashSet};
use petgraph::visit::DfsPostOrder;
use std::convert::TryInto;
use std::error::Error;
use std::fs;
use std::io::BufRead;
use std::ops::{Deref, Index};
use std::rc::Rc;
use std::io::BufReader;
use std::io::Cursor;
// The term "sym" is used throughout because it's easier to search for that
// in source code than "symbol", which is a generic term with many different
// meanings.
// if mutability is needed:
//#[derive(Debug)]
//struct SymRecord {
// data: SymData,
//
// // the idea is to keep the index encapsulated so that nothing else can
// // ever hold a reference to it, ensuring that it's freed when the node
// // is removed
// index: Rc<RefCell<Option<NodeIndex>>>,
//}
#[derive(Debug)]
struct SymData {
name: Rc<str>,
}
type DepGraphNode = SymEntry;
type DepGraphEdge = ();
struct DepGraph {
graph: DiGraph<DepGraphNode, DepGraphEdge>,
// serves as both a string internment system and graph indexer
index: HashMap<Rc<str>, SymRef>,
// if removals are permitted:
//index: HashMap<Rc<str>, Weak<RefCell<Option<NodeIndex>>>>,
}
// This encapsulates the underlying Graph to enforce certain
// assumptions. For example, we do not permit removing nodes because that
// would invalidate the NodeIndex reference in the index, which would then
// require workarounds like the commented-out code above and below.
//
// While Petgraph's use of indexes to represent graph and edge references
// makes it easy to bypass the borrow checker, it does just that---it's no
// different than a pointer reference (albeit guaranteed to safely reference
// a node rather than an arbitrary memory location) that can change out from
// under you at any moment. As such, much of the planning that went into
// this was determining how to best mitigate that.
//
// The linker has certain needs that may differ as the compiler evolves, so
// it may be desirable to permit deletions in the future. In the meantime,
// if a node needs to be deleted, we can simply remove all edges from it and
// possibly mark it in a way that states it was removed.
//
// This graph uses a separate map to serve a dual role: a string internment
// system and an indexer by symbol name. This will have to evolve in the
// future as the graph ends up containing more stuff.
//
// This is currently called a dependency graph, since that's what we're
// using it for, but in the future the compiler will also use it as an IR,
// so this will likely be renamed.
impl DepGraph {
fn new() -> Self {
Self {
// TODO: with_capacity
graph: DiGraph::new(),
index: HashMap::new(),
}
}
fn declare(&mut self, name: &str) -> SymRef {
match self.index.entry(name.into()) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
let entry = SymEntry::MissingSym {
name: Rc::clone(v.key()),
};
let index = SymRef(self.graph.add_node(entry));
v.insert(index);
index
}
}
}
// will not duplicate dependencies if they already exist
fn declare_dep(&mut self, symbol: SymRef, dep: SymRef) -> () {
self.graph.update_edge(*symbol, *dep, ());
}
fn lookup(&self, name: &str) -> Option<SymRef> {
self.index.get(name.into()).map(|index| *index)
}
fn index_iter(&self) -> Iter<Rc<str>, SymRef> {
self.index.iter()
}
// POC when removals were permitted:
//fn add_symbol(&mut self, sym: SymData) -> NodeIndex {
// let name = Rc::clone(&sym.name);
// let record = SymRecord { data: sym, index: Rc::new(RefCell::new(None)) };
// let index = self.graph.add_node(record);
// let index = Rc::downgrade(&self.graph[index].index);
// self.graph[index].index.replace(Some(index));
// self.index.insert(name, index);
// index
//}
}
impl GraphBase for DepGraph {
type NodeId = NodeIndex;
type EdgeId = EdgeIndex;
}
impl Visitable for DepGraph {
type Map = FixedBitSet;
fn visit_map(&self) -> Self::Map {
self.graph.visit_map()
}
fn reset_map(&self, map: &mut Self::Map) {
self.graph.reset_map(map)
}
}
impl<'a> IntoNeighbors for &'a DepGraph {
type Neighbors = Neighbors<'a, DepGraphEdge>;
fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
self.graph.neighbors(n)
}
}
impl Index<SymRef> for DepGraph {
type Output = DepGraphNode;
fn index(&self, index: SymRef) -> &Self::Output {
&self.graph[*index]
}
}
// TODO: we may not to allow this; using SymRef could be a means to
// guarantee that a lookup has occurred and that it actually exists. We
// don't need this if we set NodeId = SymRef in GraphBase, but that requires
// implementing other traits as well.
impl Index<NodeIndex> for DepGraph {
type Output = DepGraphNode;
fn index(&self, index: NodeIndex) -> &Self::Output {
&self.graph[index]
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
struct SymRef(NodeIndex);
impl From<SymRef> for NodeIndex {
fn from(symref: SymRef) -> Self {
*symref
}
}
impl From<NodeIndex> for SymRef {
fn from(index: NodeIndex) -> Self {
Self(index)
}
}
impl Deref for SymRef {
type Target = NodeIndex;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, PartialEq)]
enum SymEntry {
MissingSym { name: Rc<str> },
}
type LinkerAsg<'i> = DefaultAsg<'i, global::ProgIdentSize>;
type LinkerObjectRef = ObjectRef<global::ProgIdentSize>;
pub fn main() -> Result<(), Box<dyn Error>> {
let mut pkgs_seen = HashSet::<String>::new();
let mut fragments = HashMap::<String, String>::new();
let mut depgraph = DepGraph::new();
let mut pkgs_seen: FxHashSet<String> = Default::default();
let mut fragments: FxHashMap<&str, String> = Default::default();
let mut depgraph = LinkerAsg::with_capacity(65536, 65536);
let mut roots = Vec::new();
let interner = DefaultInterner::new();
let package_path = std::env::args().nth(1).expect("Missing argument");
let abs_path = fs::canonicalize(package_path).unwrap();
println!("WARNING: This is proof-of-concept; do not use!");
load_xmlo(
let (name, relroot) = load_xmlo(
&abs_path.to_str().unwrap().to_string(),
&mut pkgs_seen,
&mut fragments,
&mut depgraph,
)?;
&interner,
&mut roots,
)?
.expect("missing root package information");
// println!(
// "Graph {:?}",
@ -239,92 +67,152 @@ pub fn main() -> Result<(), Box<dyn Error>> {
// .collect::<Vec<_>>()
// );
let sorted = sort_deps(&depgraph);
roots.extend(
vec!["___yield", "___worksheet"]
.iter()
.map(|name| interner.intern(name))
.filter_map(|sym| depgraph.lookup(sym)),
);
println!("Sorted ({}): {:?}", sorted.len(), sorted);
let mut sorted = sort_deps(&depgraph, &roots);
//println!("Sorted ({}): {:?}", sorted.len(), sorted);
output_xmle(
&depgraph,
&interner,
&mut sorted,
name.expect("missing root package name"),
relroot.expect("missing root package relroot"),
)?;
Ok(())
}
fn load_xmlo<'a>(
fn load_xmlo<'a, 'i, I: Interner<'i>>(
path_str: &'a str,
pkgs_seen: &mut HashSet<String>,
fragments: &mut HashMap<String, String>,
depgraph: &mut DepGraph,
) -> Result<(), Box<dyn Error>> {
pkgs_seen: &mut FxHashSet<String>,
fragments: &mut FxHashMap<&'i str, String>,
depgraph: &mut LinkerAsg<'i>,
interner: &'i I,
roots: &mut Vec<LinkerObjectRef>,
) -> Result<Option<(Option<&'i Symbol<'i>>, Option<String>)>, Box<dyn Error>> {
let path = fs::canonicalize(path_str)?;
let path_str = path.to_str().unwrap();
let first = pkgs_seen.len() == 0;
if !pkgs_seen.insert(path_str.to_string()) {
return Ok(());
return Ok(None);
}
println!("processing {}", path_str);
//println!("processing {}", path_str);
let mut found = HashSet::<String>::new();
let mut found: FxHashSet<&str> = Default::default();
match Reader::from_file(&path) {
Ok(mut reader) => loop {
let mut buf = Vec::new();
let file = fs::File::open(&path)?;
let reader = BufReader::new(file);
let mut xmlo = XmloReader::new(reader, interner);
let mut elig = None;
// we know that the XML produced by Saxon is valid
reader.check_end_names(false);
let mut name: Option<&'i Symbol<'i>> = None;
let mut relroot: Option<String> = None;
match reader.read_event(&mut buf) {
Ok(Event::Start(ele)) | Ok(Event::Empty(ele)) => {
let mut attrs = ele.attributes();
let mut filtered =
attrs.with_checks(false).filter_map(Result::ok);
loop {
match xmlo.read_event() {
Ok(XmloEvent::Package(attrs)) => {
if first {
name = attrs.name;
relroot = attrs.relroot;
}
elig = attrs.elig;
}
match ele.name() {
b"preproc:sym-dep" => filtered
.find(|attr| attr.key == b"name")
.map(|attr| attr.value)
.and_then(|mut name| {
read_deps(&mut reader, depgraph, name.to_mut())
})
.ok_or("Missing name"),
Ok(XmloEvent::SymDeps(sym, deps)) => {
// TODO: API needs to expose whether a symbol is already
// known so that we can warn on them
b"preproc:sym" => {
filtered
.find(|attr| attr.key == b"src")
.map(|attr| attr.value.to_owned())
.and_then(|src| {
let path_str =
std::str::from_utf8(&src).unwrap();
found.insert(path_str.to_string());
Some(())
});
Ok(())
}
b"preproc:fragment" => filtered
.find(|attr| attr.key == b"id")
.map(|attr| String::from_utf8(attr.value.to_vec()))
.and_then(|id| {
let fragment = reader
.read_text(ele.name(), &mut Vec::new())
.unwrap_or("".to_string());
fragments.insert(id.unwrap(), fragment);
Some(())
})
.ok_or("Missing fragment id"),
_ => Ok(()),
// Maps should not pull in symbols since we may end up
// mapping to params that are never actually used
if !sym.starts_with(":map:") {
for dep_sym in deps {
depgraph.add_dep_lookup(sym, dep_sym);
}
}
Ok(Event::Eof) => break (),
Err(e) => {
panic!("Error at {}: {:?}", reader.buffer_position(), e);
}
_ => Ok(()),
}
.unwrap_or_else(|r| panic!("Parse error: {:?}", r));
buf.clear();
},
Err(e) => panic!("Error {:?}", e),
Ok(XmloEvent::SymDecl(sym, attrs)) => {
if let Some(sym_src) = attrs.src {
found.insert(sym_src);
} else if attrs.extern_ {
// TODO: externs (they're implicitly handled, without
// checks, by Missing)
// depgraph.declare_extern(sym, kind);
} else {
let owned = attrs.src.is_none();
let kind = (&attrs).try_into().map_err(|err| {
format!("sym `{}` attrs error: {}", sym, err)
});
let mut src: Source = attrs.into();
// Existing convention is to omit @src of local package
// (in this case, the program being linked)
if first {
src.pkg_name = None;
}
// TODO: should probably track these down in the XSLT linker...
match kind {
Ok(kindval) => {
// TODO: inefficient
let link_root = owned
&& (kindval == IdentKind::Meta
|| kindval == IdentKind::Map
|| kindval == IdentKind::RetMap);
let node = depgraph.declare(sym, kindval, src)?;
if link_root {
roots.push(node);
}
}
Err(e) => println!("{:?}; skipping...", e),
};
}
}
Ok(XmloEvent::Fragment(sym, text)) => {
let result = depgraph.set_fragment(
depgraph.lookup(sym).unwrap_or_else(|| {
panic!("missing symbol for fragment: {}", sym)
}),
text,
);
match result {
Ok(_) => (),
Err(e) => println!("{:?}; skipping...", e),
};
}
// We don't need to read any further than the end of the
// header (symtable, sym-deps, fragments)
Ok(XmloEvent::Eoh) => break,
Err(err @ XmloError::UnassociatedFragment) => {
println!("{:?}; skipping...", err);
}
err @ Err(_) => err.map(|_| ())?,
}
}
if let Some(elig_sym) = elig {
roots.push(depgraph.lookup(elig_sym).expect(
"internal error: package elig references nonexistant symbol",
));
}
let mut dir = path.clone();
@ -335,118 +223,131 @@ fn load_xmlo<'a>(
path_buf.push(relpath);
path_buf.set_extension("xmlo");
//println!("Trying {:?}", path_buf);
// println!("Trying {:?}", path_buf);
let path_abs = path_buf.canonicalize().unwrap();
let path = path_abs.to_str().unwrap();
load_xmlo(path, pkgs_seen, fragments, depgraph)?;
load_xmlo(path, pkgs_seen, fragments, depgraph, interner, roots)?;
}
Ok(())
}
fn read_deps<B>(
reader: &mut Reader<B>,
depgraph: &mut DepGraph,
name: &[u8],
) -> Option<()>
where
B: BufRead,
{
// TODO: API needs to expose whether a symbol is already known so that
// we can warn on them
// note: using from_utf8_unchecked here did _not_ improve performance
let sym_node = depgraph.declare(std::str::from_utf8(name).unwrap());
//println!("processing deps for {}", sym_name);
loop {
match reader.read_event(&mut Vec::new()) {
Ok(Event::Start(ele)) | Ok(Event::Empty(ele)) => {
let mut attrs = ele.attributes();
let mut filtered =
attrs.with_checks(false).filter_map(Result::ok);
filtered.find(|attr| attr.key == b"name").and_then(
|mut attr| {
let name = attr.value.to_mut();
let str = std::str::from_utf8(name).unwrap();
let dep_node = depgraph.declare(&str);
depgraph.declare_dep(sym_node, dep_node);
Some(())
},
);
//println!("{:?}", ele.attributes().collect::<Vec<_>>());
}
Ok(Event::Eof) | Ok(Event::End(_)) => break Some(()),
Err(e) => {
panic!("Error at {}: {:?}", reader.buffer_position(), e);
}
_ => (),
}
if first {
Ok(Some((name, relroot)))
} else {
Ok(None)
}
}
fn sort_deps(depgraph: &DepGraph) -> Vec<&SymEntry> {
fn sort_deps<'a, 'i>(
depgraph: &'a LinkerAsg<'i>,
roots: &Vec<LinkerObjectRef>,
) -> Sections<'a, 'i> {
// @type=meta, @preproc:elig-class-yields
// @type={ret}map{,:head,:tail}
let roots = discover_roots(depgraph);
let mut deps: Sections = Sections::new();
// This is technically a topological sort, but functions have
// cycles. Once we have more symbol metadata, we can filter them out
// and actually invoke toposort.
let mut dfs = DfsPostOrder::empty(&depgraph);
let mut sorted = Vec::new();
//println!("discovered roots: {:?}", roots);
// TODO: we'll be processing various roots separately
for index in roots {
dfs.stack.push(*index);
dfs.stack.push((*index).into());
}
// TODO: can we encapsulate NodeIndex?
while let Some(index) = dfs.next(&depgraph) {
sorted.push(&depgraph[index]);
let ident = depgraph.get(index).unwrap();
match ident {
Object::Ident(_, kind, _)
| Object::IdentFragment(_, kind, _, _) => match kind {
IdentKind::Meta => deps.meta.push_body(ident),
IdentKind::Worksheet => deps.worksheet.push_body(ident),
IdentKind::Param(_, _) => deps.params.push_body(ident),
IdentKind::Type(_) => deps.types.push_body(ident),
IdentKind::Func(_, _) => deps.funcs.push_body(ident),
IdentKind::MapHead | IdentKind::Map | IdentKind::MapTail => {
deps.map.push_body(ident)
}
IdentKind::RetMapHead
| IdentKind::RetMap
| IdentKind::RetMapTail => deps.retmap.push_body(ident),
_ => deps.rater.push_body(ident),
},
_ => panic!("unexpected node: {:?}", ident),
}
}
sorted
deps
}
fn discover_roots(depgraph: &DepGraph) -> Vec<SymRef> {
// TODO: filter_map
let mut map_syms = depgraph
.index_iter()
.filter(|(key, _)| {
key.starts_with(":map:") || key.starts_with(":retmap:")
})
.map(|(_, value)| *value)
.collect::<Vec<_>>();
fn get_interner_value<'a, 'i, I: Interner<'i>>(
depgraph: &'a LinkerAsg<'i>,
interner: &'i I,
name: &str,
) -> &'a Object<'i> {
depgraph
.get(
depgraph
.lookup(interner.intern(name))
.unwrap_or_else(|| panic!("missing identifier: {}", name)),
)
.expect("Could not get interner value")
}
let mut roots = vec!["___yield", "___worksheet"]
.iter()
.filter_map(|sym| depgraph.lookup(sym))
.collect::<Vec<_>>();
fn output_xmle<'a, 'i, I: Interner<'i>>(
depgraph: &'a LinkerAsg<'i>,
interner: &'i I,
sorted: &mut Sections<'a, 'i>,
name: &'i Symbol<'i>,
relroot: String,
) -> Result<(), Box<dyn Error>> {
if !sorted.map.is_empty() {
sorted.map.push_head(get_interner_value(
depgraph,
interner,
&String::from(":map:___head"),
));
sorted.map.push_tail(get_interner_value(
depgraph,
interner,
&String::from(":map:___tail"),
));
}
roots.append(&mut map_syms);
if !sorted.retmap.is_empty() {
sorted.retmap.push_head(get_interner_value(
depgraph,
interner,
&String::from(":retmap:___head"),
));
sorted.retmap.push_tail(get_interner_value(
depgraph,
interner,
&String::from(":retmap:___tail"),
));
}
//println!(
// "found roots: {:?}",
// roots
// .iter()
// .map(|index| &depgraph.graph[*index])
// .collect::<Vec<_>>()
//);
let writer = Cursor::new(Vec::new());
let mut xmle_writer = XmleWriter::new(writer);
xmle_writer
.write(&sorted, name, &relroot)
.expect("Could not write xmle output");
roots
print!(
"{}",
String::from_utf8(xmle_writer.into_inner().into_inner())?
);
Ok(())
}
#[cfg(test)]
mod tests {
mod test {
#[test]
fn placeholder() {}
}

View File

@ -15,4 +15,13 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! An incremental rewrite of TAME in Rust.
pub mod global;
pub mod ir;
pub mod ld;
pub mod obj;
pub mod sym;
#[cfg(test)]
pub mod test;

View File

@ -0,0 +1,35 @@
// Object files
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Object file construction and processing.
//!
//! An _[object file][]_ contains relocatable compiled code, symbol tables,
//! and other information produced by the compiler.
//! It is the responsibility of the [linker](super::ld) to construct a final
//! executable from these files.
//!
//! [object file]: https://en.wikipedia.org/wiki/Object_file
//!
//! The only object file currently supported by TAMER is the [`xmlo`]
//! format,
//! produced by the XSLT compiler.
//! It will likely be replaced with [ELF] object files in the future.
//!
//! [ELF]: https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
pub mod xmle;
pub mod xmlo;

View File

@ -0,0 +1,69 @@
// xmle object files
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! `xmle` file construction and processing.
//!
//! This file format exists for compatibility with the old compiler
//! written in XSLT; it will be removed in the future.
//!
//!
//! `xmle` Files
//! ===================
//! An `xmle` file is produced by the for each source file.
//! The format is XML because the original compiler was written in XSLT.
//!
//! The general structure of an `xmle` file consists of different sections:
//! - map
//! - return map
//! - statics
//! - rater
//!
//! For example (with some extra information omitted):
//!
//! ```xml
//! <package xmlns="http://www.lovullo.com/rater"
//! xmlns:preproc="http://www.lovullo.com/rater/preproc"
//! xmlns:l="http://www.lovullo.com/rater/linker"
//! title="suppliers/tax"
//! program="true"
//! name="suppliers/tax"
//! __rootpath="../">
//! <l:dep>
//! <preproc:sym type="func"
//! dim="0"
//! dtype="float"
//! name="min"
//! src="../rater/core/numeric/minmax"
//! desc="Return the lesser value"/>
//! </l:dep>
//! <l:map-from>
//! <l:from name="latest_operation_hour"/>
//! </l:map-from>
//! <l:map-exec>
//! function( input, callback ) {)
//! </l:map-exec>
//! <l:retmap-exec>
//! function( input, callback ) {)
//! </l:retmap-exec>
//! <l:static>
//! function func_min( args , min1, min2) {return min1;}
//! </l:static>
//! <l:exec>consts[&apos;CMP_OP_EQ&apos;] = 1;</l:exec>
//! </package>
//! ```
pub mod writer;

View File

@ -0,0 +1,47 @@
// Object file writer
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! xmle file writer.
//!
//! This defines a lower-level event-based `XmleWriter` similar to that of
//! `quick_xml`, where the events are a slightly higher-level abstraction
//! over the types of nodes present in the file.
//!
//! For more information on xmle files, see the [parent crate][`super`].
//!
//! The example below is incomplete, but shows the general usage.
//!
//! ```
//! use tamer::obj::xmle::writer::{Sections, XmleWriter};
//! use tamer::sym::{DefaultInterner, Interner, Symbol};
//! use std::io::Cursor;
//!
//! let interner = DefaultInterner::new();
//! let name = interner.intern(&String::from("foo"));
//!
//! let sections = Sections::new();
//! let writer = Cursor::new(Vec::new());
//! let mut xmle_writer = XmleWriter::new(writer);
//! xmle_writer.write(&sections, name, &String::from(""));
//! ```
mod writer;
mod xmle;
pub use writer::{Result, Section, Sections, Writer, WriterError};
pub use xmle::XmleWriter;

View File

@ -0,0 +1,320 @@
// xmle object file writer
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use crate::ir::asg::Object;
use crate::sym::Symbol;
use quick_xml::Error as XmlError;
use std::io::{Error as IoError, Write};
use std::result;
use std::str::Utf8Error;
type ObjectRef<'a, 'i> = &'a Object<'i>;
pub type Result<T = ()> = result::Result<T, WriterError>;
pub type ObjectVec<'a, 'i> = Vec<ObjectRef<'a, 'i>>;
/// A wrapper around a `Write` object
///
/// This is used to take the [`Sections`] and write out the xmle files.
pub trait Writer<W: Write> {
fn write(
&mut self,
sections: &Sections,
name: Symbol,
relroot: &str,
) -> Result<()>
where
Self: Sized;
}
/// A Section that needs to be written to the buffer
///
/// Most sections will only need a `body`, but some innlude `head` and `tail`
/// information. Rather than dealing with those differently, each `Section`
/// will have a `head` and `tail` that are empty by default.
#[derive(Clone, Default)]
pub struct Section<'a, 'i> {
head: ObjectVec<'a, 'i>,
body: ObjectVec<'a, 'i>,
tail: ObjectVec<'a, 'i>,
}
impl<'a, 'i> Section<'a, 'i> {
/// Constructor for Sections
///
/// ```
/// use tamer::obj::xmle::writer::Section;
///
/// let section = Section::new();
/// ```
pub fn new() -> Self {
Self {
head: Vec::new(),
body: Vec::new(),
tail: Vec::new(),
}
}
/// The length of the `Section`
pub fn len(&self) -> usize {
self.head.len() + self.body.len() + self.tail.len()
}
/// Check if the `Section` is empty
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Push an `Object` into a `Section`'s head
pub fn push_head(&mut self, obj: ObjectRef<'a, 'i>) {
self.head.push(&obj)
}
/// Push an `Object` into a `Section`'s body
pub fn push_body(&mut self, obj: ObjectRef<'a, 'i>) {
self.body.push(&obj)
}
/// Push an `Object` into a `Section`'s tail
pub fn push_tail(&mut self, obj: ObjectRef<'a, 'i>) {
self.tail.push(&obj)
}
/// Merge the parts of a `Section` into one [`SectionIterator`]
///
/// The `Section` internals need to be iterated as a group so we needed to
/// create a custom iterator, [`SectionIterator`] to do this for us. This
/// method allows us to access the iterator.
///
/// ```
/// use tamer::obj::xmle::writer::Section;
/// use tamer::ir::asg::Object;
///
/// let mut section = Section::new();
/// let obj = Object::Empty;
/// let expect = vec![&obj, &obj, &obj];
///
/// section.push_head(&obj);
/// section.push_body(&obj);
/// section.push_tail(&obj);
/// let section_iter = section.iter();
///
/// for object in section_iter {
/// assert_eq!(&obj, object);
/// }
/// ```
pub fn iter(&self) -> SectionIterator {
SectionIterator {
inner: Box::new(
self.head
.iter()
.chain(self.body.iter())
.chain(self.tail.iter())
.copied(),
),
}
}
}
/// Wrapper for an Iterator
///
/// This allows us to iterate over all parts of a [`Section`].
pub struct SectionIterator<'a, 'i> {
inner: Box<dyn Iterator<Item = &'a Object<'i>> + 'a>,
}
impl<'a, 'i> Iterator for SectionIterator<'a, 'i> {
type Item = &'a Object<'i>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
/// Sections that need to be written to a buffer
///
/// All the properties are public [`Section`] objects and will be accessed
/// directly by the [`Writer`].
#[derive(Default)]
pub struct Sections<'a, 'i> {
pub map: Section<'a, 'i>,
pub retmap: Section<'a, 'i>,
pub meta: Section<'a, 'i>,
pub worksheet: Section<'a, 'i>,
pub params: Section<'a, 'i>,
pub types: Section<'a, 'i>,
pub funcs: Section<'a, 'i>,
pub rater: Section<'a, 'i>,
}
impl<'a, 'i> Sections<'a, 'i> {
/// Constructor for Sections
///
/// ```
/// use tamer::obj::xmle::writer::Sections;
///
/// let sections = Sections::new();
/// ```
pub fn new() -> Self {
Self {
map: Section::new(),
retmap: Section::new(),
meta: Section::new(),
worksheet: Section::new(),
params: Section::new(),
types: Section::new(),
funcs: Section::new(),
rater: Section::new(),
}
}
}
/// Error implementations for the writer
#[derive(Debug)]
pub enum WriterError {
Io(IoError),
Utf8(Utf8Error),
XmlError(XmlError),
ExpectedFragment(String),
}
impl From<IoError> for WriterError {
fn from(err: IoError) -> Self {
WriterError::Io(err)
}
}
impl From<Utf8Error> for WriterError {
fn from(err: Utf8Error) -> Self {
WriterError::Utf8(err)
}
}
impl From<XmlError> for WriterError {
fn from(err: XmlError) -> Self {
WriterError::XmlError(err)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn section_empty() {
let section = Section::new();
assert!(section.head.is_empty());
assert!(section.body.is_empty());
assert!(section.tail.is_empty());
}
#[test]
fn section_head() {
let mut section = Section::new();
let obj = Object::Empty;
assert!(section.head.is_empty());
section.push_head(&obj);
assert_eq!(Some(&&obj), section.head.get(0));
}
#[test]
fn section_body() {
let mut section = Section::new();
let obj = Object::Empty;
assert!(section.body.is_empty());
section.push_body(&obj);
let body = section.body;
assert_eq!(Some(&&obj), body.get(0));
}
#[test]
fn section_tail() {
let mut section = Section::new();
let obj = Object::Empty;
assert!(section.tail.is_empty());
section.push_tail(&obj);
assert_eq!(Some(&&obj), section.tail.get(0));
}
#[test]
fn section_len() {
let mut section = Section::new();
let obj = Object::Empty;
assert_eq!(0, section.len());
section.push_head(&obj);
assert_eq!(1, section.len());
section.push_body(&obj);
assert_eq!(2, section.len());
section.push_tail(&obj);
assert_eq!(3, section.len());
}
#[test]
fn section_is_empty_head() {
let mut section = Section::new();
let obj = Object::Empty;
assert!(section.is_empty());
section.push_head(&obj);
assert!(!section.is_empty());
}
#[test]
fn section_is_empty_body() {
let mut section = Section::new();
let obj = Object::Empty;
assert!(section.is_empty());
section.push_body(&obj);
assert!(!section.is_empty());
}
#[test]
fn section_is_empty_tail() {
let mut section = Section::new();
let obj = Object::Empty;
assert!(section.is_empty());
section.push_tail(&obj);
assert!(!section.is_empty());
}
#[test]
fn section_iterator() {
let mut section = Section::new();
let obj = Object::Empty;
let expect = vec![&obj, &obj, &obj];
section.push_head(&obj);
section.push_body(&obj);
section.push_tail(&obj);
let collection: Vec<_> = section.iter().collect();
assert_eq!(expect, collection);
}
}

View File

@ -0,0 +1,749 @@
// Concrete xmle writer
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::writer::{Result, SectionIterator, Sections, WriterError};
use crate::ir::asg::{IdentKind, Object};
use crate::sym::Symbol;
use fxhash::FxHashSet;
#[cfg(test)]
use mock::MockXmlWriter as XmlWriter;
use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
#[cfg(not(test))]
use quick_xml::Writer as XmlWriter;
use std::io::Write;
/// Responsible for writing to the xmle files
pub struct XmleWriter<W: Write> {
writer: XmlWriter<W>,
}
impl<W: Write> XmleWriter<W> {
/// Create a new instance of `XmleWriter`
/// ```
/// use std::io::Cursor;
/// use tamer::obj::xmle::writer::XmleWriter;
///
/// let writer = Cursor::new(Vec::new());
/// let xmle_writer = XmleWriter::new(writer);
/// ```
pub fn new(write: W) -> Self {
let writer = XmlWriter::new_with_indent(write, b' ', 2);
Self { writer }
}
/// Consume the `XmleWriter` and return the inner `Write` object
///
/// ```
/// use std::io::Cursor;
/// use tamer::obj::xmle::writer::XmleWriter;
///
/// let writer = Cursor::new(Vec::new());
/// let xmle_writer = XmleWriter::new(writer);
/// assert!(xmle_writer.into_inner().into_inner().is_empty());
/// ```
pub fn into_inner(self) -> W {
self.writer.into_inner()
}
/// Write xmle
///
/// Goes through each of the pre-ordered [`Sections`] and writes to the
/// buffer.
///
/// ```
/// use std::io::Cursor;
/// use tamer::obj::xmle::writer::{Sections, XmleWriter};
/// use tamer::sym::{Symbol, SymbolIndex};
/// use tamer::sym::{DefaultInterner, Interner};
///
/// let writer = Cursor::new(Vec::new());
/// let mut xmle_writer = XmleWriter::new(writer);
/// let sections = Sections::new();
/// let a = "foo";
/// let interner = DefaultInterner::new();
/// let name = interner.intern(&a);
/// xmle_writer.write(
/// &sections,
/// &name,
/// &String::from(""),
/// );
/// let buf = xmle_writer.into_inner().into_inner();
/// assert!(!buf.is_empty(), "something was written to the buffer");
/// ```
pub fn write(
&mut self,
sections: &Sections,
name: &Symbol,
relroot: &str,
) -> Result {
self.write_start_package(name, &relroot)?
.write_element(b"l:dep", |writer| {
writer.write_sections(&sections, &relroot)
})?
// This was not in the original linker, but we need to be able to
// convey this information for `standalones` (which has received
// some logic from the old linker for the time being).
.write_element(b"l:map-from", |writer| {
writer.write_froms(&sections)
})?
.write_element(b"l:map-exec", |writer| {
writer.write_section(sections.map.iter())
})?
.write_element(b"l:retmap-exec", |writer| {
writer.write_section(sections.retmap.iter())
})?
.write_element(b"l:static", |writer| {
writer
.write_section(sections.meta.iter())?
.write_section(sections.worksheet.iter())?
.write_section(sections.params.iter())?
.write_section(sections.types.iter())?
.write_section(sections.funcs.iter())
})?
.write_element(b"l:exec", |writer| {
writer.write_section(sections.rater.iter())
})?
.write_end_tag(b"package")?;
Ok(())
}
/// Write an element
///
/// This writes the opening tag, the content, and the closing tag for a
/// given element. The callback is what will write the element's body.
#[inline]
fn write_element<F>(
&mut self,
name: &[u8],
callback: F,
) -> Result<&mut XmleWriter<W>>
where
F: FnOnce(&mut Self) -> Result<&mut XmleWriter<W>>,
{
self.write_start_tag(name)?;
(callback)(self)?;
self.write_end_tag(name)?;
Ok(self)
}
/// Open the `package` element
///
/// The `package` element's opening tag needs attributes, so it cannot use
/// `write_start_tag` directly.
fn write_start_package(
&mut self,
name: &Symbol,
relroot: &str,
) -> Result<&mut XmleWriter<W>> {
let root =
BytesStart::owned_name(b"package".to_vec()).with_attributes(vec![
("xmlns", "http://www.lovullo.com/rater"),
("xmlns:preproc", "http://www.lovullo.com/rater/preproc"),
("xmlns:l", "http://www.lovullo.com/rater/linker"),
("title", &name), // TODO
("program", "true"),
("name", &name),
("__rootpath", &relroot),
]);
self.writer.write_event(Event::Start(root))?;
Ok(self)
}
/// Open an element's tag
fn write_start_tag(&mut self, name: &[u8]) -> Result<&mut XmleWriter<W>> {
self.writer
.write_event(Event::Start(BytesStart::borrowed_name(name)))?;
Ok(self)
}
/// Close an element's tag
fn write_end_tag(&mut self, name: &[u8]) -> Result<&mut XmleWriter<W>> {
self.writer
.write_event(Event::End(BytesEnd::borrowed(name)))?;
Ok(self)
}
/// Write all [`Sections`]
///
/// All the [`Sections`] found need to be written out using the `writer`
/// object.
fn write_sections(
&mut self,
sections: &Sections,
relroot: &str,
) -> Result<&mut XmleWriter<W>> {
let all = sections
.meta
.iter()
.chain(sections.map.iter())
.chain(sections.retmap.iter())
.chain(sections.worksheet.iter())
.chain(sections.params.iter())
.chain(sections.types.iter())
.chain(sections.funcs.iter())
.chain(sections.rater.iter());
for ident in all {
match ident {
Object::Ident(sym, kind, src)
| Object::IdentFragment(sym, kind, src, _) => {
let name: &str = sym;
// this'll be formalized more sanely
let mut attrs = match kind {
IdentKind::Cgen(dim) => {
vec![("type", "cgen"), ("dim", dim.as_ref())]
}
IdentKind::Class(dim) => {
vec![("type", "class"), ("dim", dim.as_ref())]
}
IdentKind::Const(dim, dtype) => vec![
("type", "const"),
("dim", dim.as_ref()),
("dtype", dtype.as_ref()),
],
IdentKind::Func(dim, dtype) => vec![
("type", "func"),
("dim", dim.as_ref()),
("dtype", dtype.as_ref()),
],
IdentKind::Gen(dim, dtype) => vec![
("type", "gen"),
("dim", dim.as_ref()),
("dtype", dtype.as_ref()),
],
IdentKind::Lparam(dim, dtype) => vec![
("type", "lparam"),
("dim", dim.as_ref()),
("dtype", dtype.as_ref()),
],
IdentKind::Param(dim, dtype) => vec![
("type", "param"),
("dim", dim.as_ref()),
("dtype", dtype.as_ref()),
],
IdentKind::Rate(dtype) => {
vec![("type", "rate"), ("dtype", dtype.as_ref())]
}
IdentKind::Tpl => vec![("type", "tpl")],
IdentKind::Type(dtype) => {
vec![("type", "type"), ("dtype", dtype.as_ref())]
}
IdentKind::MapHead => vec![("type", "map:head")],
IdentKind::Map => vec![("type", "map")],
IdentKind::MapTail => vec![("type", "map:tail")],
IdentKind::RetMapHead => vec![("type", "retmap:head")],
IdentKind::RetMap => vec![("type", "retmap")],
IdentKind::RetMapTail => vec![("type", "retmap:tail")],
IdentKind::Meta => vec![("type", "meta")],
IdentKind::Worksheet => vec![("type", "worksheet")],
};
attrs.push(("name", name));
if src.generated {
attrs.push(("preproc:generated", "true"));
}
let srcpath: String;
if let Some(pkg_name) = src.pkg_name {
srcpath = format!("{}{}", relroot, pkg_name);
attrs.push(("src", &srcpath));
}
if let Some(parent) = src.parent {
attrs.push(("parent", parent));
}
if let Some(yields) = src.yields {
attrs.push(("yields", yields));
}
if let Some(desc) = &src.desc {
attrs.push(("desc", &desc));
}
let sym = BytesStart::owned_name(b"preproc:sym".to_vec())
.with_attributes(attrs);
self.writer.write_event(Event::Empty(sym))?;
}
_ => unreachable!("filtered out during sorting"),
}
}
Ok(self)
}
/// Write the source `from`
///
/// If a `map` object has a `from` attribute in its source, we need to
/// write them using the `writer`'s `write_event`.
fn write_froms(
&mut self,
sections: &Sections,
) -> Result<&mut XmleWriter<W>> {
let mut map_froms: FxHashSet<&str> = Default::default();
let map_iter = sections.map.iter();
for map_ident in map_iter {
match map_ident {
Object::Ident(_, _, src)
| Object::IdentFragment(_, _, src, _) => {
if let Some(froms) = &src.from {
for from in froms {
map_froms.insert(from);
}
}
}
_ => unreachable!("filtered out during sorting"),
}
}
for from in map_froms {
let name: &str = from;
self.writer.write_event(Event::Empty(
BytesStart::borrowed_name(b"l:from")
.with_attributes(vec![("name", name)]),
))?;
}
Ok(self)
}
/// Write a ['Section`]
///
/// Iterates through the parts of a `Section` and writes them using the
/// `writer`'s 'write_event`.
fn write_section(
&mut self,
idents: SectionIterator,
) -> Result<&mut XmleWriter<W>> {
for ident in idents {
match ident {
Object::IdentFragment(_, _, _, frag) => {
self.writer.write_event(Event::Text(
BytesText::from_plain_str(frag),
))?;
}
// Cgen, Gen, and Lparam are not expected to be present, so we
// can ignore them when we determeing when to return an Err.
Object::Ident(_, IdentKind::Cgen(_), _)
| Object::Ident(_, IdentKind::Gen(_, _), _)
| Object::Ident(_, IdentKind::Lparam(_, _), _) => (),
obj => {
return Err(WriterError::ExpectedFragment(format!(
"fragment expected: {:?}",
obj
)));
}
}
}
Ok(self)
}
}
#[cfg(test)]
mod mock {
use super::*;
pub struct MockXmlWriter<W: Write> {
inner: W,
pub write_callback: Option<Box<dyn for<'a> Fn(&Event<'a>) -> Result>>,
}
impl<W: Write> MockXmlWriter<W> {
pub fn new(inner: W) -> Self {
Self {
inner,
write_callback: None,
}
}
pub fn new_with_indent(inner: W, _: u8, _: u8) -> Self {
Self::new(inner)
}
pub fn write_event<'a, E: AsRef<Event<'a>>>(
&mut self,
event: E,
) -> Result<usize> {
(self
.write_callback
.as_ref()
.expect("missing mock write_callback"))(
event.as_ref()
)?;
Ok(0)
}
pub fn into_inner(self) -> W {
self.inner
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::ir::asg::Dim;
use crate::ir::asg::Source;
use crate::ir::legacyir::SymAttrs;
use crate::obj::xmle::writer::Section;
use crate::sym::{Symbol, SymbolIndex};
use std::str;
type Sut<W> = XmleWriter<W>;
#[test]
fn writer_uses_inner_buffer() -> Result {
let expected = vec![1, 2, 3];
let buf = expected.clone();
let sut = Sut::new(buf);
assert_eq!(expected, sut.into_inner());
Ok(())
}
#[test]
fn write_start_package() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
Event::Start(bytes_start) => {
let name = str::from_utf8(bytes_start.name());
match name {
Ok("package") => {
let attributes = bytes_start.attributes();
assert_eq!(7, attributes.count());
Ok(())
}
_ => panic!("unreachable"),
}
}
_ => panic!("did not match expected event"),
}));
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
sut.write_start_package(&sym, &String::from(""))?;
Ok(())
}
#[test]
fn write_start_tag() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
Event::Start(bytes_start) => {
let name = str::from_utf8(bytes_start.name());
match name {
Ok("l:dep") => {
let attributes = bytes_start.attributes();
assert_eq!(0, attributes.count());
Ok(())
}
_ => panic!("unreachable"),
}
}
_ => panic!("did not match expected event"),
}));
sut.write_start_tag(b"l:dep")?;
Ok(())
}
#[test]
fn write_end_tag() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
Event::End(bytes_end) => {
let name = str::from_utf8(bytes_end.name());
assert_eq!("package", name?);
Ok(())
}
_ => panic!("did not match expected event"),
}));
sut.write_end_tag(b"package")?;
Ok(())
}
#[test]
fn write_section() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
Event::Text(_) => (Ok(())),
_ => panic!("did not trigger event"),
}));
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let obj = Object::IdentFragment(
&sym,
IdentKind::Meta,
Source::default(),
String::from(""),
);
let mut section = Section::new();
section.push_body(&obj);
sut.write_section(section.iter())?;
Ok(())
}
#[test]
fn write_section_ignores_other_kinds() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|_| {
panic!("callback should not have been called");
}));
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
let obj = Object::Ident(
&sym,
IdentKind::Cgen(Dim::default()),
Source::default(),
);
let mut section = Section::new();
section.push_body(&obj);
sut.write_section(section.iter())?;
Ok(())
}
#[test]
fn write_section_catch_missing() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|_| {
panic!("callback should not have been called");
}));
let obj = Object::Empty;
let mut section = Section::new();
section.push_body(&obj);
let result = sut.write_section(section.iter());
match result {
Err(WriterError::ExpectedFragment(_)) => {}
_ => panic!("expected Err"),
}
Ok(())
}
#[test]
fn write_sections() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
Event::Empty(bytes_start) => {
let name = str::from_utf8(bytes_start.name())?;
assert_eq!("preproc:sym", name);
let mut attributes = bytes_start.attributes();
assert_eq!(2, attributes.clone().count());
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("type", str::from_utf8(attr.key)?);
assert_eq!("worksheet", str::from_utf8(&attr.value)?);
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("name", str::from_utf8(attr.key)?);
assert_eq!("random_symbol", str::from_utf8(&attr.value)?);
Ok(())
}
_ => panic!("unexpected event"),
}));
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "random_symbol");
let object =
Object::Ident(&sym, IdentKind::Worksheet, Source::default());
let mut sections = Sections::new();
sections.map.push_body(&object);
sut.write_sections(&sections, &String::from(""))?;
Ok(())
}
#[test]
fn write_sections_with_sources() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
Event::Empty(bytes_start) => {
let name = str::from_utf8(bytes_start.name())?;
assert_eq!("preproc:sym", name);
let mut attributes = bytes_start.attributes();
assert_eq!(7, attributes.clone().count());
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("type", str::from_utf8(attr.key)?);
assert_eq!("worksheet", str::from_utf8(&attr.value)?);
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("name", str::from_utf8(attr.key)?);
assert_eq!("name", str::from_utf8(&attr.value)?);
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("preproc:generated", str::from_utf8(attr.key)?);
assert_eq!("true", str::from_utf8(&attr.value)?);
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("src", str::from_utf8(attr.key)?);
assert_eq!("rootname", str::from_utf8(&attr.value)?);
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("parent", str::from_utf8(attr.key)?);
assert_eq!("parent", str::from_utf8(&attr.value)?);
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("yields", str::from_utf8(attr.key)?);
assert_eq!("yields", str::from_utf8(&attr.value)?);
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("desc", str::from_utf8(attr.key)?);
assert_eq!("sym desc", str::from_utf8(&attr.value)?);
Ok(())
}
_ => panic!("unexpected event"),
}));
let nsym = Symbol::new_dummy(SymbolIndex::from_u32(1), "name");
let ssym = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
let psym = Symbol::new_dummy(SymbolIndex::from_u32(3), "parent");
let ysym = Symbol::new_dummy(SymbolIndex::from_u32(4), "yields");
let fsym = Symbol::new_dummy(SymbolIndex::from_u32(5), "from");
let attrs = SymAttrs {
pkg_name: Some(&nsym),
src: Some(&ssym),
generated: true,
parent: Some(&psym),
yields: Some(&ysym),
desc: Some("sym desc".to_string()),
from: Some(vec![&fsym]),
virtual_: true,
..Default::default()
};
let object = Object::Ident(&nsym, IdentKind::Worksheet, attrs.into());
let mut sections = Sections::new();
sections.map.push_body(&object);
sut.write_sections(&sections, &String::from("root"))?;
Ok(())
}
#[test]
fn write_froms() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
Event::Empty(bytes_start) => {
let name = str::from_utf8(bytes_start.name())?;
assert_eq!("l:from", name);
let mut attributes = bytes_start.attributes();
assert_eq!(1, attributes.clone().count());
let attr = attributes.next().expect("Expects attributes")?;
assert_eq!("name", str::from_utf8(attr.key)?);
assert_eq!("dest symbol", str::from_utf8(&attr.value)?);
Ok(())
}
_ => panic!("unexpected event"),
}));
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "source symbol");
let symb = Symbol::new_dummy(SymbolIndex::from_u32(2), "dest symbol");
let mut src = Source::default();
src.from = Some(vec![&symb]);
let object = Object::Ident(&sym, IdentKind::Worksheet, src);
let mut sections = Sections::new();
sections.map.push_body(&object);
sut.write_froms(&sections)?;
Ok(())
}
#[test]
fn write_froms_no_from_no_write() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
_ => panic!("unexpected write"),
}));
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "random_symbol");
let object =
Object::Ident(&sym, IdentKind::Worksheet, Source::default());
let mut sections = Sections::new();
sections.map.push_body(&object);
sut.write_froms(&sections)?;
Ok(())
}
#[test]
fn write_element() -> Result {
let mut sut = Sut::new(vec![]);
sut.writer.write_callback = Some(Box::new(|event| match event {
Event::Start(bytes) => {
let name = str::from_utf8(bytes.name());
match name {
Ok("foo") => {
let attributes = bytes.attributes();
assert_eq!(0, attributes.count());
Ok(())
}
_ => panic!("unreachable"),
}
}
Event::End(bytes) => {
let name = str::from_utf8(bytes.name());
match name {
Ok("foo") => Ok(()),
_ => panic!("unreachable"),
}
}
_ => panic!("did not match expected event"),
}));
sut.write_element(b"foo", |writer| Ok(writer))?;
Ok(())
}
}

View File

@ -0,0 +1,75 @@
// xmlo object files
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! `xmlo` object file construction and processing.
//!
//! This object file format exists for compatibility with the old compiler
//! written in XSLT;
//! it will be removed in the future.
//!
//!
//! `xmlo` Object Files
//! ===================
//! An `xmlo` object file is produced by the for each source file.
//! It is a terribly inefficient object format and will be eliminated in the
//! future.
//! The format is XML because the original compiler was written in XSLT.
//!
//! The general structure of an `xmlo` file consists of:
//! - Package metadata as attributes on the root node;
//! - A symbol table along with symbol metadata;
//! - Symbol dependencies (as [adjacency lists][]);
//! - Compiled JavaScript fragments for each applicable symbol; and
//! - Expanded source XML.
//!
//! [adjacency lists]: https://en.wikipedia.org/wiki/Adjacency_list
//!
//! For example (with some extra information omitted):
//!
//! ```xml
//! <package xmlns="http://www.lovullo.com/rater"
//! xmlns:preproc="http://www.lovullo.com/rater/preproc"
//! title="Example Package"
//! name="example/package"
//! __rootpath="../"
//! preproc:elig-class-yields="isEligexamplepackage">
//! <!-- Symbol table -->
//! <preproc:symtable>
//! <preproc:sym name=":class:some-sym" type="class" ... />
//! <!-- ... -->
//! </preproc:symtable>
//!
//! <!-- Dependency graph (adjacency lists) -->
//! <preproc:sym-deps>
//! <preproc:sym-dep name=":class:some-sym">
//! <preproc:sym-ref name="someOtherSym" />
//! <!-- ... -->
//! </preproc:sym-dep>
//! </preproc:sym-deps>
//!
//! <!-- Compiled JS fragments -->
//! <preproc:fragments>
//! <preproc:fragment id=":class:some-sym">
//! classes['some-sym'] = '...generated JS code...';
//! </preproc:fragment>
//! </preproc:fragments>
//!
//! <!-- Expanded src -->
//! </package>
//! ```
pub mod reader;

File diff suppressed because it is too large Load Diff

826
tamer/src/sym.rs 100644
View File

@ -0,0 +1,826 @@
// String internment
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! String internment system.
//!
//! Interned strings are represented by [`Symbol`],
//! created by an [`Interner`]:
//~
//! - [`ArenaInterner`] - Intern pool backed by an [arena][] for fast
//! and stable allocation.
//! - [`DefaultInterner`] - The currently recommended intern pool
//! configuration for symbol interning.
//! - [`FxArenaInterner`] - Intern pool backed by an [arena][] using the
//! [Fx Hash][fxhash] hashing algorithm.
//!
//! Interners return symbols by reference which allows for `O(1)` comparison
//! by pointer.
//!
//! [arena]: bumpalo
//!
//! ```
//! use tamer::sym::{Interner, DefaultInterner, Symbol, SymbolIndex};
//!
//! // Inputs to be interned
//! let a = "foo";
//! let b = &"foo".to_string();
//! let c = "foobar";
//! let d = &c[0..3];
//!
//! // Interners employ interior mutability and so do not need to be
//! // declared `mut`
//! let interner = DefaultInterner::new();
//!
//! let (ia, ib, ic, id) = (
//! interner.intern(a),
//! interner.intern(b),
//! interner.intern(c),
//! interner.intern(d),
//! );
//!
//! assert_eq!(ia, ib);
//! assert_eq!(ia, id);
//! assert_eq!(ib, id);
//! assert_ne!(ia, ic);
//!
//! // All interns can be cloned and clones are eq
//! assert_eq!(*ia, ia.clone());
//!
//! // Only "foo" and "foobar" are interned
//! assert_eq!(2, interner.len());
//! assert!(interner.contains("foo"));
//! assert!(interner.contains("foobar"));
//! assert!(!interner.contains("something else"));
//!
//! // Each symbol has an associated, densely-packed integer value
//! // that can be used for indexing
//! assert_eq!(SymbolIndex::from_u32(1), ia.index());
//! assert_eq!(SymbolIndex::from_u32(1), ib.index());
//! assert_eq!(SymbolIndex::from_u32(2), ic.index());
//! assert_eq!(SymbolIndex::from_u32(1), id.index());
//! ```
//!
//! What Is String Interning?
//! =========================
//! _[String interning][]_ is a process by which a single copy of a string
//! is stored immutably in memory as part of a _pool_.
//! When the same string is later encountered,
//! a reference to the string in the pool is used rather than allocating a
//! new string.
//! Interned strings are typically referred to as "symbols" or "atoms".
//!
//! String comparison then amounts to comparing pointers (`O(1)`)
//! rather than having to scan the string (`O(n)`).
//! There is, however, a hashing cost of interning strings,
//! as well as looking up strings in the intern pool.
//!
//! [string interning]: https://en.wikipedia.org/wiki/String_interning
//!
//!
//! Internment Mechanism
//! ====================
//! The current [`DefaultInterner`] is [`FxArenaInterner`],
//! which is an [arena][]-allocated intern pool mapped by the
//! [Fx Hash][fxhash] hash function:
//!
//! 1. Strings are compared against the existing intern pool using a
//! [`HashMap`].
//! 2. If a string has not yet been interned:
//! - The string is copied into the arena-backed pool;
//! - A new [`Symbol`] is allocated adjacent to it in the arena holding
//! a string slice referencing the arena-allocated string; and
//! - The symbol is stored as the value in the [`HashMap`] for that key.
//! 3. Otherwise, a reference to the existing [`Symbol`] is returned.
//!
//! Since the arena provides a stable location in memory,
//! and all symbols are immutable,
//! [`ArenaInterner`] is able to safely return any number of references to
//! a single [`Symbol`],
//! bound to the lifetime of the arena itself.
//! Since the [`Symbol`] contains the string slice,
//! it also acts as a [smart pointer] for the interned string itself,
//! allowing [`Symbol`] to be used in any context where `&str` is
//! expected.
//! Dropping a [`Symbol`] does _not_ affect the underlying arena-allocated
//! data.
//!
//! [smart pointer]: https://doc.rust-lang.org/book/ch15-00-smart-pointers.html
//!
//! Each symbol also has an associated integer index value
//! (see [`Symbol::index`]),
//! which provides a dense range of values suitable for use in vectors
//! as an alternative to [`HashMap`] for mapping symbol data.
//!
//! Since a reference to the same [`Symbol`] is returned for each
//! [`Interner::intern`] and [`Interner::intern_soft`] call,
//! symbols can be compared by pointer in `O(1)` time.
//! Symbols also implement [`Copy`],
//! and will still compare equal to other symbols referencing the same
//! interned value by comparing the underlying string slice pointers.
//!
//! This implementation was heavily motivated by [Rustc's own internment
//! system][rustc-intern],
//! but differs in significant ways:
//!
//! - This implementation stores string references in [`Symbol`] rather
//! than relying on a global singleton [`Interner`];
//! - Consequently, associates the lifetime of interned strings with that
//! of the underlying arena rather than casting to `&'static`;
//! - Retrieves symbol values by pointer reference without requiring use
//! of [`Interner`] or a locking mechanism; and
//! - Stores [`Symbol`] objects in the arena rather than within a vector
//! indexed by [`SymbolIndex`].
//!
//!
//! Name Mangling
//! =============
//! Interners do not perform [name mangling][].
//! For future consideration,
//! see [RFC 2603][rfc-2603] and the [Itanium ABI][itanium-abi].
//!
//! [name mangling]: https://en.wikipedia.org/wiki/Name_mangling
//! [rfc-2603]: https://rust-lang.github.io/rfcs/2603-symbol-name-mangling-v2.html
//! [itanium-abi]: http://refspecs.linuxbase.org/cxxabi-1.86.html#mangling
//!
//!
//! Related Work and Further Reading
//! ================================
//! String interning is often tightly coupled with symbols (in the generic
//! sense),
//! sometimes called atoms.
//! Symbols can often be either interned,
//! and therefore compared for equivalency,
//! or _uninterned_,
//! which makes them unique even to symbols of the same name.
//! Interning may also be done automatically by a language for performance.
//! Languages listed below that allow for explicit interning may also
//! perform automatic interning as well
//! (for example, `'symbol` in Lisp and `lowercase_vars` as atoms in
//! Erlang).
//!
//! | Language | Interned | Uninterned |
//! | -------- | -------- | ---------- |
//! | [Erlang][] | [`list_to_atom`][edt] | _(None)_ |
//! | [GNU Emacs Lisp][] | [`intern`][es], [`intern-soft`][es] | [`make-symbol`][es], [`gensym`][es] |
//! | [GNU Guile][] | [`string->symbol`][gs], [`gensym`][gs] | [`make-symbol`][gu] |
//! | [JavaScript][] | [`Symbol.for`][js] | [`Symbol`][js] |
//! | [Java][] | [`intern`][jvs] | _(None)_ |
//! | [Lua][] | _(Automatic for string performance)_ | _(None)_ |
//! | [MIT/GNU Scheme][] | [`intern`][ms], [`intern-soft`][ms], [`string->symbol`][ms] | [`string->uninterned-symbol`][ms], [`generate-uninterned-symbol`][ms] |
//! | [PHP][] | _(Automatic for string [performance][pp])_ | _(None)_ |
//! | [Python][] | [`sys.intern`][pys] | _(None)_ |
//! | [R6RS Scheme][] | [`string->symbol`][r6s] | _(None)_ |
//! | [Racket][] | [`string->symbol`][rs], [`string->unreadable-symbol`][rs] | [`string->uninterned-symbol`][rs], [`gensym`][rs] |
//!
//! [gnu guile]: https://www.gnu.org/software/guile/
//! [gs]: https://www.gnu.org/software/guile/manual/html_node/Symbol-Primitives.html#Symbol-Primitives
//! [gu]: https://www.gnu.org/software/guile/manual/html_node/Symbol-Uninterned.html#Symbol-Uninterned
//! [gnu emacs lisp]: https://www.gnu.org/software/emacs/
//! [es]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Creating-Symbols.html
//! [racket]: https://racket-lang.org/
//! [rs]: https://docs.racket-lang.org/reference/symbols.html
//! [r6rs scheme]: http://www.r6rs.org/
//! [r6s]: http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-14.html
//! [mit/gnu scheme]: https://www.gnu.org/software/mit-scheme/
//! [ms]: https://www.gnu.org/software/mit-scheme/documentation/mit-scheme-ref/Symbols.html
//! [javascript]: https://developer.mozilla.org/en-US/docs/Web/JavaScript
//! [js]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol
//! [java]: http://openjdk.java.net/
//! [jvs]: https://cr.openjdk.java.net/~iris/se/12/latestSpec/api/java.base/java/lang/String.html#intern()
//! [php]: https://www.php.net/
//! [pp]: https://wiki.php.net/rfc/performanceimprovements
//! [erlang]: https://erlang.org/
//! [edt]: http://erlang.org/doc/reference_manual/data_types.html
//! [lua]: https://www.lua.org/
//! [python]: https://www.python.org/
//! [pys]: https://docs.python.org/3/library/sys.html
//!
//! More information:
//! - Wikipedia entry on [string interning][].
//! - The [flyweight pattern][] in object-oriented programming is a type
//! of interning.
//! - [RFC 1845][rfc-1845] gives an example of string interning using
//! `Rc<str>`.
//! - Emacs directly exposes the intern pool at runtime as
//! [`obarray`][es].
//! - [`string-cache`][rust-string-cache] is a string interning system
//! for Rust developed by Mozilla for Servo.
//! - [`string-interner`][rust-string-interner] is another string
//! interning library for Rust.
//! - [Rustc interns strings as `Symbol`s][rustc-intern] using an
//! [arena allocator][rustc-arena] and avoids `Rc` by representing
//! symbols as integer values and converting them to strings using a
//! global pool and unsafe rust to cast to a `static` slice.
//! - Rustc identifies symbols by integer value encapsulated within a
//! `Symbol`.
//! - Rustc's [`newtype_index!` macro][rustc-nt] uses
//! [`global::NonZeroProgSymSize`] so that [`Option`] uses no
//! additional space (see [pull request `53315`][rustc-nt-pr]).
//! - Differences between TAMER and Rustc's implementations are outlined
//! above.
//!
//! [flyweight pattern]: https://en.wikipedia.org/wiki/Flyweight_pattern
//! [rust-string-cache]: https://github.com/servo/string-cache
//! [rust-string-interner]: https://github.com/robbepop/string-interner
//! [rfc-1845]: https://rust-lang.github.io/rfcs/1845-shared-from-slice.html
//! [rustc-intern]: https://doc.rust-lang.org/nightly/nightly-rustc/syntax/ast/struct.Name.html
//! [rustc-arena]: https://doc.rust-lang.org/nightly/nightly-rustc/arena/index.html
//! [rustc-nt]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/macro.newtype_index.html
//! [rustc-nt-pr]: https://github.com/rust-lang/rust/pull/53315
//!
//! The hash function chosen for this module is [Fx Hash][fxhash].
//!
//! - Rustc previously used the [Fowler-Noll-Vo (FNV)][fnv] hash
//! function,
//! but [now uses Fx Hash][rustc-fx].
//! This was extracted into the [`fxhash`][fxhash] crate,
//! which is used by TAMER.
//! - TAMER originally used FNV,
//! but benchmarks showed that Fx Hash was more performant.
//! - Benchmarks for other hash functions,
//! including FNV,
//! can be found at the [`hash-rs`][hash-rs] project.
//!
//! [fnv]: https://doc.servo.org/fnv/
//! [rustc-fx]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/fx/index.html
//! [hash-rs]: https://github.com/Gankra/hash-rs
use crate::global;
use bumpalo::Bump;
use fxhash::FxBuildHasher;
use std::cell::{Cell, RefCell};
use std::collections::HashMap;
use std::convert::TryInto;
use std::fmt;
use std::hash::BuildHasher;
use std::ops::Deref;
/// Unique symbol identifier.
///
/// _Do not construct this value yourself;_
/// use an [`Interner`].
///
/// This newtype helps to prevent other indexes from being used where a
/// symbol index is expected.
/// Note, however, that it provides no defense against mixing symbol indexes
/// between multiple [`Interner`]s.
///
/// The index `0` is never valid because of [`global::NonZeroProgSymSize`],
/// which allows us to have `Option<SymbolIndex>` at no space cost.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SymbolIndex(global::NonZeroProgSymSize);
impl From<SymbolIndex> for usize {
fn from(value: SymbolIndex) -> usize {
value.0.get().try_into().unwrap()
}
}
impl SymbolIndex {
/// Construct index from a non-zero `u32` value.
///
/// Panics
/// ------
/// Will panic if `n == 0`.
pub fn from_u32(n: u32) -> SymbolIndex {
SymbolIndex(global::NonZeroProgSymSize::new(n).unwrap())
}
/// Construct index from an unchecked non-zero `u32` value.
///
/// This does not verify that `n > 0` and so must only be used in
/// contexts where this invariant is guaranteed to hold.
/// Unlike [`from_u32`](SymbolIndex::from_u32),
/// this never panics.
unsafe fn from_u32_unchecked(n: u32) -> SymbolIndex {
SymbolIndex(global::NonZeroProgSymSize::new_unchecked(n))
}
}
/// Interned string.
///
/// A reference to this symbol is returned each time the same string is
/// interned with the same [`Interner`];
/// as such,
/// symbols can be compared for equality by pointer;
/// the underlying symbol id need not be used.
///
/// Each symbol is identified by a unique integer
/// (see [`index`](Symbol::index)).
/// The use of integers creates a more dense range of values than pointers,
/// which allows callers to use a plain [`Vec`] as a map instead of
/// something far more expensive like
/// [`HashSet`](std::collections::HashSet);
/// this is especially beneficial for portions of the system that make
/// use of nearly all interned symbols,
/// like the ASG.
///
/// The symbol also stores a string slice referencing the interned string
/// itself,
/// whose lifetime is that of the [`Interner`]'s underlying data store.
/// Dereferencing the symbol will expose the underlying slice.
#[derive(Copy, Clone, Debug)]
pub struct Symbol<'i> {
index: SymbolIndex,
str: &'i str,
}
impl<'i> Symbol<'i> {
/// Construct a new interned value.
///
/// _This must only be done by an [`Interner`]._
/// As such,
/// this function is not public.
///
/// For test builds (when `cfg(test)`),
/// `new_dummy` is available to create symbols for tests.
#[inline]
fn new(index: SymbolIndex, str: &'i str) -> Symbol<'i> {
Self { index, str }
}
/// Retrieve unique symbol index.
///
/// This is a densely-packed identifier that can be used as an index for
/// mapping.
/// See [`SymbolIndex`] for more information.
#[inline]
pub fn index(&self) -> SymbolIndex {
self.index
}
/// Construct a new interned value _for testing_.
///
/// This is a public version of [`Symbol::new`] available for test
/// builds.
/// This separate name is meant to strongly imply that you should not be
/// doing this otherwise.
#[cfg(test)]
#[inline(always)]
pub fn new_dummy(index: SymbolIndex, str: &'i str) -> Symbol<'i> {
Self::new(index, str)
}
}
impl<'i> PartialEq for Symbol<'i> {
fn eq(&self, other: &Self) -> bool {
std::ptr::eq(self as *const _, other as *const _)
|| std::ptr::eq(self.str.as_ptr(), other.str.as_ptr())
}
}
impl<'i> Eq for Symbol<'i> {}
impl<'i> Deref for Symbol<'i> {
type Target = str;
/// Dereference to interned string slice.
///
/// This allows for symbols to be used where strings are expected.
#[inline]
fn deref(&self) -> &str {
self.str
}
}
impl<'i> fmt::Display for Symbol<'i> {
/// Display name of underlying string.
///
/// Since symbols contain pointers to their interned slices,
/// we effectively get this for free.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.str)
}
}
/// Create, store, compare, and retrieve [`Symbol`] values.
///
/// Interners accept string slices and produce values of type [`Symbol`].
/// A reference to the same [`Symbol`] will always be returned for a given
/// string,
/// allowing symbols to be compared for equality cheaply by comparing
/// pointers.
/// Symbol locations in memory are fixed for the lifetime of the interner.
///
/// If you care whether a value has been interned yet or not,
/// see [`intern_soft`][Interner::intern_soft`] and
/// [`contains`](Interner::contains).
///
/// See the [module-level documentation](self) for an example.
pub trait Interner<'i> {
/// Intern a string slice or return an existing [`Symbol`].
///
/// If the provided string has already been interned,
/// then a reference to the existing [`Symbol`] will be returned.
/// Otherwise,
/// the string will be interned and a new [`Symbol`] created.
///
/// The lifetime of the returned symbol is bound to the lifetime of the
/// underlying intern pool.
///
/// To retrieve an existing symbol _without_ interning,
/// see [`intern_soft`](Interner::intern_soft).
fn intern(&'i self, value: &str) -> &'i Symbol<'i>;
/// Retrieve an existing intern for the string slice `s`.
///
/// Unlike [`intern`](Interner::intern),
/// this will _not_ intern the string if it has not already been
/// interned.
fn intern_soft(&'i self, value: &str) -> Option<&'i Symbol<'i>>;
/// Determine whether the given value has already been interned.
fn contains(&self, value: &str) -> bool;
/// Number of interned strings.
///
/// This count will increase each time a unique string is interned.
/// It does not increase when a string is already interned.
fn len(&self) -> usize;
/// Intern an assumed-UTF8 slice of bytes or return an existing
/// [`Symbol`].
///
/// Safety
/// ======
/// This function is unsafe because it uses
/// [`std::str::from_utf8_unchecked`].
/// It is provided for convenience when interning from trusted binary
/// data
/// (such as [object files][]).
///
/// [object files]: crate::obj
unsafe fn intern_utf8_unchecked(&'i self, value: &[u8]) -> &'i Symbol<'i> {
self.intern(std::str::from_utf8_unchecked(value))
}
}
/// An interner backed by an [arena](bumpalo).
///
/// Since interns exist until the interner itself is freed,
/// an arena is a much more efficient and appropriate memory allocation
/// strategy.
/// This further provides a stable location in memory for symbol data.
///
/// For the recommended configuration,
/// see [`DefaultInterner`].
///
/// See the [module-level documentation](self) for examples and more
/// information on how to use this interner.
pub struct ArenaInterner<'i, S>
where
S: BuildHasher + Default,
{
/// String and [`Symbol`] storage.
arena: Bump,
/// Next available symbol index.
///
/// This must always be ≥1.
/// It is not defined as `NonZeroProgSymSize` because
/// `intern` enforces the invariant.
next_index: Cell<global::ProgSymSize>,
/// Map of interned strings to their respective [`Symbol`].
///
/// Both strings and symbols are allocated within `arena`.
map: RefCell<HashMap<&'i str, &'i Symbol<'i>, S>>,
}
impl<'i, S> ArenaInterner<'i, S>
where
S: BuildHasher + Default,
{
/// Initialize a new interner with no initial capacity.
///
/// Prefer [`with_capacity`](ArenaInterner::with_capacity) when possible.
#[inline]
pub fn new() -> Self {
Self::with_capacity(0)
}
/// Initialize a new interner with an initial capacity for the
/// underlying [`HashMap`].
///
/// The given `capacity` has no affect on arena allocation.
/// Specifying initial capacity is important only for the map of strings
/// to symbols because it will reallocate and re-hash its contents
/// once capacity is exceeded.
/// See benchmarks.
///
/// If reallocation is a major concern,
/// a [consistent hashing algorithm][consistent] could be considered,
/// but the implementation will still incur the cost of copying
/// the [`HashMap`]'s contents to a new location in memory.
///
/// [consistent]: https://en.wikipedia.org/wiki/Consistent_hashing
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self {
arena: Bump::new(),
next_index: Cell::new(1),
map: RefCell::new(HashMap::with_capacity_and_hasher(
capacity,
Default::default(),
)),
}
}
}
impl<'i, S> Interner<'i> for ArenaInterner<'i, S>
where
S: BuildHasher + Default,
{
fn intern(&'i self, value: &str) -> &'i Symbol<'i> {
let mut map = self.map.borrow_mut();
if let Some(sym) = map.get(value) {
return sym;
}
let next_index = self.next_index.get();
// Next_index should always be initialized to at least 1.
debug_assert!(next_index != 0);
let id = unsafe { SymbolIndex::from_u32_unchecked(next_index) };
// Copy string slice into the arena.
let clone: &'i str = unsafe {
&*(std::str::from_utf8_unchecked(
self.arena.alloc_slice_clone(value.as_bytes()),
) as *const str)
};
// Symbols are also stored within the arena, adjacent to the
// string. This ensures that both have stable locations in memory.
let sym: &'i Symbol<'i> = self.arena.alloc(Symbol::new(id, &clone));
map.insert(clone, sym);
self.next_index.set(next_index + 1);
sym
}
#[inline]
fn intern_soft(&'i self, value: &str) -> Option<&'i Symbol<'i>> {
self.map.borrow().get(value).map(|sym| *sym)
}
#[inline]
fn contains(&self, value: &str) -> bool {
self.map.borrow().contains_key(value)
}
#[inline]
fn len(&self) -> usize {
self.map.borrow().len()
}
}
/// Interner using the [Fx Hash][fxhash] hashing function.
///
/// _This is currently the hash function used by [`DefaultInterner`]._
///
/// If denial of service is not a concern,
/// then this will outperform the default
/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
/// (which uses SipHash at the time of writing).
///
/// See intern benchmarks for a comparison.
pub type FxArenaInterner<'i> = ArenaInterner<'i, FxBuildHasher>;
/// Recommended [`Interner`] and configuration.
///
/// The choice of this default relies on the assumption that
/// denial-of-service attacks against the hash function are not a
/// concern.
///
/// For more information on the hashing algorithm,
/// see [`FxArenaInterner`].
pub type DefaultInterner<'i> = FxArenaInterner<'i>;
#[cfg(test)]
mod test {
use super::*;
mod symbol {
use super::*;
/// Option<Symbol> should have no space cost.
#[test]
fn symbol_index_option_no_cost() {
use std::mem::size_of;
assert_eq!(
size_of::<Option<Symbol>>(),
size_of::<Symbol>(),
"Option<Symbol> should be the same size as Symbol"
);
}
#[test]
fn self_compares_eq() {
let sym = Symbol::new(SymbolIndex::from_u32(1), "str");
assert_eq!(&sym, &sym);
}
#[test]
fn copy_compares_equal() {
let sym = Symbol::new(SymbolIndex::from_u32(1), "str");
let cpy = sym;
assert_eq!(sym, cpy);
}
// Integer values are for convenience, not identity. They cannot be
// used as a unique identifier across different interners.
#[test]
fn same_index_different_slices_compare_unequal() {
let a = Symbol::new(SymbolIndex::from_u32(1), "a");
let b = Symbol::new(SymbolIndex::from_u32(1), "b");
assert_ne!(a, b);
}
// As mentioned above, ids are _not_ the identity of the symbol. If
// two values point to the same location in memory, they are assumed
// to have come from the same interner, and should therefore have
// the same index this should never happen unless symbols are
// being created without the use of interners, which is unsupported.
//
// This test is a cautionary tale.
#[test]
fn different_index_same_slices_compare_equal() {
let slice = "str";
let a = Symbol::new(SymbolIndex::from_u32(1), slice);
let b = Symbol::new(SymbolIndex::from_u32(2), slice);
assert_eq!(a, b);
}
#[test]
fn cloned_symbols_compare_equal() {
let sym = Symbol::new(SymbolIndex::from_u32(1), "foo");
assert_eq!(sym, sym.clone());
}
// &Symbol can be used where string slices are expected (this won't
// compile otherwise).
#[test]
fn ref_can_be_used_as_string_slice() {
let slice = "str";
let sym_slice: &str = &Symbol::new(SymbolIndex::from_u32(1), slice);
assert_eq!(slice, sym_slice);
}
// For use when we can guarantee proper ids.
#[test]
fn can_create_index_unchecked() {
assert_eq!(SymbolIndex::from_u32(1), unsafe {
SymbolIndex::from_u32_unchecked(1)
});
}
#[test]
fn can_retrieve_symbol_index() {
let index = SymbolIndex::from_u32(1);
assert_eq!(index, Symbol::new(index, "").index());
}
#[test]
fn displays_as_interned_value() {
let sym = Symbol::new(SymbolIndex::from_u32(1), "foo");
assert_eq!(format!("{}", sym), sym.str);
}
}
mod interner {
use super::*;
type Sut<'i> = DefaultInterner<'i>;
#[test]
fn recognizes_equal_strings() {
let a = "foo";
let b = a.to_string();
let c = "bar";
let d = c.to_string();
let sut = Sut::new();
let (ia, ib, ic, id) =
(sut.intern(a), sut.intern(&b), sut.intern(c), sut.intern(&d));
assert_eq!(ia, ib);
assert_eq!(&ia, &ib);
assert_eq!(*ia, *ib);
assert_eq!(ic, id);
assert_eq!(&ic, &id);
assert_eq!(*ic, *id);
assert_ne!(ia, ic);
assert_ne!(&ia, &ic);
assert_ne!(*ia, *ic);
}
#[test]
fn symbol_id_increases_with_each_new_intern() {
let sut = Sut::new();
// Remember that identifiers begin at 1
assert_eq!(
SymbolIndex::from_u32(1),
sut.intern("foo").index(),
"First index should be 1"
);
assert_eq!(
SymbolIndex::from_u32(1),
sut.intern("foo").index(),
"Index should not increment for already-interned symbols"
);
assert_eq!(
SymbolIndex::from_u32(2),
sut.intern("bar").index(),
"Index should increment for new symbols"
);
}
#[test]
fn length_increases_with_each_new_intern() {
let sut = Sut::new();
assert_eq!(0, sut.len(), "invalid empty len");
sut.intern("foo");
assert_eq!(1, sut.len(), "increment len");
// duplicate
sut.intern("foo");
assert_eq!(1, sut.len(), "do not increment len on duplicates");
sut.intern("bar");
assert_eq!(2, sut.len(), "increment len (2)");
}
#[test]
fn can_check_wither_string_is_interned() {
let sut = Sut::new();
assert!(!sut.contains("foo"), "recognize missing value");
sut.intern("foo");
assert!(sut.contains("foo"), "recognize interned value");
}
#[test]
fn intern_soft() {
let sut = Sut::new();
assert_eq!(None, sut.intern_soft("foo"));
let foo = sut.intern("foo");
assert_eq!(Some(foo), sut.intern_soft("foo"));
}
#[test]
fn new_with_capacity() {
let n = 512;
let sut = Sut::with_capacity(n);
// note that this is not publicly available
assert!(sut.map.borrow().capacity() >= n);
}
#[test]
fn intern_utf8_unchecked() {
let sut = Sut::new();
let a = sut.intern("foo");
let b = unsafe { sut.intern_utf8_unchecked(b"foo") };
assert_eq!(a, b);
}
}
}

View File

@ -0,0 +1,18 @@
// Mocks, stubs, and other stuff for testing
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
pub mod quick_xml;

View File

@ -0,0 +1,125 @@
// quick_xml mocks
//
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use quick_xml::Result as XmlResult;
use std::borrow::Cow;
use std::cell::Cell;
pub enum MockXmlEvent<'a> {
Start(MockBytesStart<'a>),
End(MockBytesEnd<'a>),
Empty(MockBytesStart<'a>),
#[allow(dead_code)]
Text(MockBytesText<'a>),
}
pub struct MockBytesStart<'a> {
name: &'a [u8],
attrs: Cell<Option<MockAttributes<'a>>>,
}
impl<'a> MockBytesStart<'a> {
pub fn new(name: &'a [u8], attrs: Option<MockAttributes<'a>>) -> Self {
Self {
name,
attrs: Cell::new(attrs),
}
}
pub fn name(&self) -> &[u8] {
self.name
}
pub fn attributes(&self) -> MockAttributes {
self.attrs.take().expect("missing mock attributes")
}
}
pub struct MockBytesEnd<'a> {
name: Cow<'a, [u8]>,
}
impl<'a> MockBytesEnd<'a> {
pub fn new(name: &'a [u8]) -> Self {
Self {
name: Cow::Borrowed(name),
}
}
pub fn name(&self) -> &[u8] {
&*self.name
}
}
pub struct MockBytesText<'a> {
#[allow(dead_code)]
content: Cow<'a, [u8]>,
}
impl<'a> MockBytesText<'a> {
pub fn new(content: &'a [u8]) -> Self {
Self {
content: Cow::Borrowed(content),
}
}
}
pub struct MockAttributes<'a> {
attrs: Vec<MockAttribute<'a>>,
with_checks: Option<bool>,
}
impl<'a> MockAttributes<'a> {
pub fn new(attrs: Vec<MockAttribute<'a>>) -> Self {
Self {
attrs,
with_checks: None,
}
}
pub fn with_checks(&mut self, val: bool) -> &mut Self {
self.with_checks = Some(val);
self
}
}
impl<'a> Iterator for MockAttributes<'a> {
type Item = XmlResult<MockAttribute<'a>>;
fn next(&mut self) -> Option<Self::Item> {
// We read output from Saxon, which will always be valid
if self.with_checks != Some(false) {
panic!("MockAttributes expected with_checks false")
}
self.attrs.pop().map(|attr| Ok(attr))
}
}
pub struct MockAttribute<'a> {
pub key: &'a [u8],
pub value: Cow<'a, [u8]>,
}
impl<'a> MockAttribute<'a> {
pub fn new(key: &'a [u8], value: &'a [u8]) -> Self {
Self {
key,
value: Cow::Borrowed(&value[..]),
}
}
}