TAMER linker (still partly proof-of-concept)
We will continue to finalize this as we go. It is currently used in production, both for performance and because it fixes a bug in the XSLT-based linker.master
commit
777494a602
|
@ -1,4 +1,4 @@
|
|||
# This number is incremented for every linker change to force rebuilding
|
||||
# of xmle files.
|
||||
0
|
||||
4
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# This number is incremented for every compiler change to force rebuilding
|
||||
# of xmlo files.
|
||||
0
|
||||
2
|
||||
|
||||
|
|
|
@ -132,7 +132,8 @@ standalones: $(dest_standalone)
|
|||
strip: $(dest_standalone_strip) ui/package.strip.js
|
||||
%.xmle: %.xmlo $(path_tame)/.rev-xmle
|
||||
$(TAME_TS)
|
||||
$(TAME) link $< $@
|
||||
@echo "WARNING: using WIP proof-of-concept linker!"
|
||||
set -o pipefail; $(path_tame)/tamer/target/release/tameld $< | awk '/^<package/{p=1};p' > $@
|
||||
%.js: %.xmle
|
||||
$(TAME_TS)
|
||||
$(TAME) standalone $< $@
|
||||
|
|
|
@ -31,21 +31,42 @@
|
|||
|
||||
<template mode="preproc:compile-fragments" priority="9"
|
||||
match="lv:package">
|
||||
<copy>
|
||||
<sequence select="@*" />
|
||||
|
||||
<apply-templates select="*" mode="preproc:compile-fragments-root" />
|
||||
</copy>
|
||||
</template>
|
||||
|
||||
|
||||
<template mode="preproc:compile-fragments-root" priority="1"
|
||||
match="node()">
|
||||
<sequence select="." />
|
||||
</template>
|
||||
|
||||
|
||||
<!-- Position fragments directly after dependencies. This allows TAMER to
|
||||
halt processing early on, rather than having to read the rest of the
|
||||
file (fragments used to be placed at the end). -->
|
||||
<template mode="preproc:compile-fragments-root" priority="5"
|
||||
match="preproc:sym-deps">
|
||||
<sequence select="." />
|
||||
|
||||
<variable name="package" as="element( lv:package )"
|
||||
select="parent::lv:package" />
|
||||
|
||||
<variable name="symtable-map" as="map( xs:string, element( preproc:sym ) )"
|
||||
select="map:merge(
|
||||
for $sym in preproc:symtable/preproc:sym
|
||||
for $sym in $package/preproc:symtable/preproc:sym
|
||||
return map{ string( $sym/@name ) : $sym } )" />
|
||||
|
||||
<copy>
|
||||
<sequence select="@*, *" />
|
||||
|
||||
<preproc:fragments>
|
||||
<apply-templates mode="preproc:compile-fragments">
|
||||
<with-param name="symtable-map" select="$symtable-map"
|
||||
tunnel="yes" />
|
||||
</apply-templates>
|
||||
</preproc:fragments>
|
||||
</copy>
|
||||
<preproc:fragments>
|
||||
<apply-templates select="$package/*"
|
||||
mode="preproc:compile-fragments">
|
||||
<with-param name="symtable-map" select="$symtable-map"
|
||||
tunnel="yes" />
|
||||
</apply-templates>
|
||||
</preproc:fragments>
|
||||
</template>
|
||||
|
||||
|
||||
|
@ -117,6 +138,11 @@
|
|||
<apply-templates mode="preproc:compile-fragments" />
|
||||
</template>
|
||||
|
||||
<!-- Do not compile these as consts -->
|
||||
<template mode="preproc:compile-fragments" priority="9"
|
||||
match="lv:meta/lv:prop/lv:const">
|
||||
<!-- ignore -->
|
||||
</template>
|
||||
|
||||
<template mode="preproc:compile-fragments" priority="5"
|
||||
match="lv:meta/lv:prop">
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
|
||||
@return compiled JS
|
||||
-->
|
||||
<template match="lv:package" mode="compiler:entry">
|
||||
<template name="compiler:entry">
|
||||
<!-- enclose everything in a self-executing function to sandbox our data -->
|
||||
<text>( function() { </text>
|
||||
<!-- to store debug information for equations (we have to put this out here
|
||||
|
@ -76,7 +76,7 @@
|
|||
</template>
|
||||
|
||||
|
||||
<template match="lv:package" mode="compiler:entry-rater">
|
||||
<template name="compiler:entry-rater">
|
||||
<!-- the rater itself -->
|
||||
<value-of select="$compiler:nl" />
|
||||
<text>function rater( arglist, _canterm ) {</text>
|
||||
|
@ -107,16 +107,13 @@
|
|||
<text>/**@expose*/var genclasses = {};</text>
|
||||
</template>
|
||||
|
||||
<template match="lv:package" mode="compiler:entry-classifier">
|
||||
<template name="compiler:classifier">
|
||||
<!-- allow classification of any arbitrary dataset -->
|
||||
<value-of select="$compiler:nl" />
|
||||
<text>rater.classify = function( args, _canterm ) {</text>
|
||||
return rater( args, _canterm ).classes;
|
||||
<text> };</text>
|
||||
</template>
|
||||
|
||||
<template match="lv:package" mode="compiler:exit-classifier">
|
||||
<!-- TODO: make sure fromMap has actually been compiled -->
|
||||
<text>rater.classify.fromMap = function( args_base, _canterm ) { </text>
|
||||
<text>var ret = {}; </text>
|
||||
<text>rater.fromMap( args_base, function( args ) {</text>
|
||||
|
@ -136,8 +133,10 @@
|
|||
<text> }; </text>
|
||||
</template>
|
||||
|
||||
<template match="lv:package" mode="compiler:exit-rater">
|
||||
<template name="compiler:exit-rater">
|
||||
<param name="name" as="xs:string "/>
|
||||
<param name="symbols" as="element( preproc:sym )*" />
|
||||
<param name="mapfrom" as="element()*" />
|
||||
|
||||
<value-of select="$compiler:nl" />
|
||||
<text>return { </text>
|
||||
|
@ -152,7 +151,7 @@
|
|||
|
||||
<!-- make the name of the supplier available -->
|
||||
<text>/**@expose*/rater.supplier = '</text>
|
||||
<value-of select="substring-after( @name, '/' )" />
|
||||
<value-of select="substring-after( $name, '/' )" />
|
||||
<text>'; </text>
|
||||
|
||||
<text>/**@expose*/rater.meta = meta;</text>
|
||||
|
@ -174,20 +173,8 @@
|
|||
$symbols[ @type='class' ] )" />
|
||||
<text> }; </text>
|
||||
|
||||
<variable name="mapfrom" select="
|
||||
preproc:symtable/preproc:sym[
|
||||
@type='map'
|
||||
]/preproc:from[
|
||||
not(
|
||||
@name = parent::preproc:sym
|
||||
/preceding-sibling::preproc:sym[
|
||||
@type='map'
|
||||
]/preproc:from/@name
|
||||
)
|
||||
]
|
||||
" />
|
||||
|
||||
<!-- mapped fields (external names) -->
|
||||
<value-of select="$compiler:nl" />
|
||||
<text>/**@expose*/rater.knownFields = {</text>
|
||||
<for-each select="$mapfrom">
|
||||
<if test="position() > 1">
|
||||
|
@ -221,6 +208,7 @@
|
|||
<text>'</text>
|
||||
<value-of select="substring-after( @name, ':class:' )" />
|
||||
<text>':'</text>
|
||||
<!-- yields -->
|
||||
<value-of select="@yields" />
|
||||
<text>'</text>
|
||||
</for-each>
|
||||
|
@ -239,7 +227,7 @@
|
|||
<value-of select="substring-after( @name, ':class:' )" />
|
||||
<text>':'</text>
|
||||
<!-- todo: escape -->
|
||||
<value-of select="translate( @desc, "'", '' )" />
|
||||
<value-of select="translate( normalize-space(@desc), "'", '' )" />
|
||||
<text>'</text>
|
||||
</for-each>
|
||||
</function>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -123,7 +123,8 @@
|
|||
<!-- final result with compiled fragments -->
|
||||
<lv:package>
|
||||
<sequence select="$pkg-with-symtable/@*,
|
||||
$pkg-with-symtable/node()" />
|
||||
$pkg-with-symtable/preproc:sym-deps/preceding-sibling::*,
|
||||
$pkg-with-symtable/preproc:sym-deps" />
|
||||
|
||||
<preproc:fragments>
|
||||
<!-- special fragment to be output as the head -->
|
||||
|
@ -148,6 +149,8 @@
|
|||
<text>};</text>
|
||||
</preproc:fragment>
|
||||
</preproc:fragments>
|
||||
|
||||
<sequence select="$pkg-with-symtable/preproc:sym-deps/following-sibling::*" />
|
||||
</lv:package>
|
||||
</template>
|
||||
|
||||
|
@ -197,7 +200,8 @@
|
|||
<!-- final result with compiled fragments -->
|
||||
<lv:package>
|
||||
<sequence select="$pkg-with-symtable/@*,
|
||||
$pkg-with-symtable/node()" />
|
||||
$pkg-with-symtable/preproc:sym-deps/preceding-sibling::*,
|
||||
$pkg-with-symtable/preproc:sym-deps" />
|
||||
|
||||
<preproc:fragments>
|
||||
<!-- special fragment to be output as the head -->
|
||||
|
@ -222,6 +226,8 @@
|
|||
<text>};</text>
|
||||
</preproc:fragment>
|
||||
</preproc:fragments>
|
||||
|
||||
<sequence select="$pkg-with-symtable/preproc:sym-deps/following-sibling::*" />
|
||||
</lv:package>
|
||||
</template>
|
||||
|
||||
|
|
|
@ -80,7 +80,8 @@
|
|||
$displays )" />
|
||||
|
||||
<lv:package name="{@name}"
|
||||
lvmc:type="worksheet">
|
||||
__rootpath="{$__relroot}"
|
||||
lvmc:type="worksheet">
|
||||
<!-- we provide one special symbol -->
|
||||
<preproc:symtable>
|
||||
<preproc:sym name="___worksheet"
|
||||
|
|
|
@ -66,13 +66,27 @@
|
|||
<text>[depgen] *determining symbol dependencies...</text>
|
||||
</message>
|
||||
|
||||
<apply-templates select="preproc:symtable" mode="preproc:depgen" />
|
||||
|
||||
<sequence select="*" />
|
||||
<apply-templates select="*" mode="preproc:depgen-root" />
|
||||
</copy>
|
||||
</template>
|
||||
|
||||
|
||||
<template mode="preproc:depgen-root" priority="1"
|
||||
match="node()">
|
||||
<sequence select="." />
|
||||
</template>
|
||||
|
||||
|
||||
<template mode="preproc:depgen-root" priority="5"
|
||||
match="preproc:symtable">
|
||||
<!-- Place symbol table _before_ dependencies. This simplifies
|
||||
streaming processing in TAMER. -->
|
||||
<sequence select="." />
|
||||
|
||||
<apply-templates select="." mode="preproc:depgen" />
|
||||
</template>
|
||||
|
||||
|
||||
<template match="preproc:symtable" mode="preproc:depgen" priority="9">
|
||||
<variable name="symtable" select="." />
|
||||
|
||||
|
|
|
@ -514,10 +514,18 @@
|
|||
<!-- overridden; we're obsolete :( -->
|
||||
</when>
|
||||
|
||||
<!-- if we've gotten this far, then the override is good; clear it -->
|
||||
<!-- if we've gotten this far, then the override is good; clear it
|
||||
so as not to trigger override errors -->
|
||||
<when test="@override='true'">
|
||||
<copy>
|
||||
<sequence select="@*[ not( name()='override' ) ], *" />
|
||||
<sequence select="@*[ not( name()='override' ) ]" />
|
||||
|
||||
<!-- mark this has having been overridden for the linker (see
|
||||
TAMER; we'll hopefully be getting rid of overrides in the
|
||||
future) -->
|
||||
<attribute name="isoverride" select="'true'" />
|
||||
|
||||
<sequence select="*" />
|
||||
</copy>
|
||||
</when>
|
||||
|
||||
|
|
|
@ -36,17 +36,17 @@
|
|||
xmlns:preproc="http://www.lovullo.com/rater/preproc">
|
||||
|
||||
|
||||
<output
|
||||
indent="yes"
|
||||
omit-xml-declaration="yes"
|
||||
/>
|
||||
<output method="text" />
|
||||
|
||||
<include href="include/dslc-base.xsl" />
|
||||
|
||||
<!-- compiler -> JS -->
|
||||
<include href="compiler/linker.xsl" />
|
||||
<include href="compiler/map.xsl" />
|
||||
<include href="compiler/js.xsl" />
|
||||
<include href="include/depgen.xsl" />
|
||||
<include href="include/preproc/symtable.xsl" />
|
||||
<include href="include/util.xsl" />
|
||||
|
||||
|
||||
<!-- path to program XML -->
|
||||
<param name="path-program-ui" />
|
||||
|
@ -54,9 +54,22 @@
|
|||
<template match="/" priority="5">
|
||||
<!-- the rater itself -->
|
||||
<text>var rater = </text>
|
||||
<!-- (moved from linker during TAMER POC linker) -->
|
||||
<call-template name="compiler:entry" />
|
||||
<call-template name="compiler:classifier" />
|
||||
<value-of disable-output-escaping="yes" select="/lv:package/l:static/text()" />
|
||||
<call-template name="compiler:entry-rater" />
|
||||
<value-of disable-output-escaping="yes" select="/lv:package/l:exec/text()" />
|
||||
<text>; </text>
|
||||
|
||||
<!--(moved from linker during TAMER POC linker) -->
|
||||
<call-template name="compiler:exit-rater">
|
||||
<with-param name="name" select="/*/@name" />
|
||||
<with-param name="symbols" select="/*/l:dep/preproc:sym" />
|
||||
<with-param name="mapfrom" select="/*/l:map-from/l:from" />
|
||||
</call-template>
|
||||
<call-template name="compiler:static" />
|
||||
|
||||
<!-- maps may or may not exist -->
|
||||
<variable name="map" select="/lv:package/l:map-exec" />
|
||||
<variable name="retmap" select="/lv:package/l:retmap-exec" />
|
||||
|
@ -64,7 +77,7 @@
|
|||
<!-- store a reference to the mapper in rater.fromMap() -->
|
||||
<text>rater.fromMap = </text>
|
||||
<choose>
|
||||
<when test="$map">
|
||||
<when test="/lv:package/l:dep/preproc:sym[@type='map'][1]">
|
||||
<value-of disable-output-escaping="yes" select="$map/text()" />
|
||||
</when>
|
||||
|
||||
|
@ -79,7 +92,7 @@
|
|||
<!-- return map -->
|
||||
<text>rater._retmap = </text>
|
||||
<choose>
|
||||
<when test="$retmap">
|
||||
<when test="/lv:package/l:dep/preproc:sym[@type='retmap'][1]">
|
||||
<value-of disable-output-escaping="yes" select="$retmap/text()" />
|
||||
</when>
|
||||
|
||||
|
|
|
@ -1,10 +1,28 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "2.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.2.1"
|
||||
|
@ -36,13 +54,18 @@ dependencies = [
|
|||
name = "tamer"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[metadata]
|
||||
"checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708"
|
||||
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
|
||||
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
|
||||
"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
|
||||
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
|
||||
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
|
||||
|
|
|
@ -17,8 +17,15 @@ opt-level = 3
|
|||
[profile.release]
|
||||
lto = true
|
||||
|
||||
[profile.bench]
|
||||
# We want our benchmarks to be representative of how well TAME will perform
|
||||
# in a release.
|
||||
lto = true
|
||||
|
||||
[dependencies]
|
||||
quick-xml = ">= 0.17.0"
|
||||
petgraph = ">= 0.4.13"
|
||||
bumpalo = ">= 2.6.0"
|
||||
# used by petgraph
|
||||
fixedbitset = ">= 0.1"
|
||||
fxhash = ">= 0.2.1"
|
||||
petgraph = ">= 0.4.13"
|
||||
quick-xml = ">= 0.17.0"
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
.DELETE_ON_ERROR:
|
||||
|
||||
.PHONY: all
|
||||
.PHONY: all fix fmt check-fmt bench
|
||||
|
||||
CARGO_BUILD_FLAGS=@CARGO_BUILD_FLAGS@
|
||||
|
||||
|
@ -27,10 +27,23 @@ all:
|
|||
|
||||
doc: html
|
||||
html-am:
|
||||
@CARGO@ doc
|
||||
@CARGO@ test --doc
|
||||
@CARGO@ @CARGO_DOC_FLAGS@ doc --document-private-items
|
||||
|
||||
# note that 'cargo check' is something else; see 'cargo --help'
|
||||
test: check
|
||||
check-am:
|
||||
check-am: check-fmt
|
||||
@CARGO@ test
|
||||
|
||||
check-fmt:
|
||||
@CARGO@ fmt -- --check
|
||||
|
||||
bench:
|
||||
@CARGO@ @CARGO_BENCH_FLAGS@ bench
|
||||
|
||||
fix: fmt
|
||||
fmt:
|
||||
@CARGO@ fmt
|
||||
|
||||
clean-am:
|
||||
@CARGO@ clean
|
||||
|
|
|
@ -47,3 +47,51 @@ $ ./configure CARGO_BUILD_FLAGS=--release && make
|
|||
$ ./configure && make
|
||||
$ ./configure CARGO_BUILD_FLAGS=--release && make CARGO_BUILD_FLAGS=
|
||||
```
|
||||
|
||||
|
||||
## Hacking
|
||||
This section contains advice for those developing TAMER.
|
||||
|
||||
|
||||
### Running Tests
|
||||
Developers should be using test-driven development (TDD). `make check` will
|
||||
run all necessary tests.
|
||||
|
||||
|
||||
### Code Format
|
||||
Rust provides `rustfmt` that can automatically format code for you. This
|
||||
project mandates its use and therefore eliminates personal preference in
|
||||
code style (for better or worse).
|
||||
|
||||
Formatting checks are run during `make check` and, on failure, will output
|
||||
the diff that would be applied if you ran `make fmt` (or `make fix`); this
|
||||
will run `cargo fmt` for you (and will use the binaries configured via
|
||||
`configure`).
|
||||
|
||||
Since developers should be doing test-driven development (TDD) and therefore
|
||||
should be running `make check` frequently, the hope is that frequent
|
||||
feedback on formatting issues will allow developers to quickly adjust their
|
||||
habits to avoid triggering formatting errors at all.
|
||||
|
||||
If you want to automatically fix formatting errors and then run tests:
|
||||
|
||||
```sh
|
||||
$ make fmt check
|
||||
```
|
||||
|
||||
|
||||
## Benchmarking
|
||||
Benchmarks serve two purposes: external integration tests (which are subject
|
||||
to module visibility constraints) and actual benchmarking. To run
|
||||
benchmarks, invoke `make bench`.
|
||||
|
||||
Note that link-time optimizations (LTO) are performed on the binary for
|
||||
benchmarking so that its performance reflects release builds that will be
|
||||
used in production.
|
||||
|
||||
The `configure` script will automatically detect whether the `test` feature
|
||||
is unstable (as it was as of the time of writing) and, if so, will
|
||||
automatically fall back to invoking nightly (by running `cargo +nightly
|
||||
bench`).
|
||||
|
||||
If you do not have nightly, run you install it via `rustup install nightly`.
|
||||
|
|
|
@ -0,0 +1,174 @@
|
|||
// String internment benchmarks and baselines
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// Note that the baseline tests have a _suffix_ rather than a prefix so that
|
||||
// they are still grouped with the associated test in the output, since it's
|
||||
// sorted lexically by function name.
|
||||
|
||||
#![feature(test)]
|
||||
|
||||
extern crate tamer;
|
||||
extern crate test;
|
||||
|
||||
use std::rc::Rc;
|
||||
use tamer::sym::*;
|
||||
use test::Bencher;
|
||||
|
||||
fn gen_strs(n: usize) -> Vec<String> {
|
||||
(0..n)
|
||||
.map(|n| n.to_string() + "foobarbazquuxlongsymbol")
|
||||
.collect()
|
||||
}
|
||||
|
||||
mod interner {
|
||||
use super::*;
|
||||
use std::collections::hash_map::RandomState;
|
||||
use std::collections::HashSet;
|
||||
use std::hash::BuildHasher;
|
||||
|
||||
pub struct HashSetSut<S = RandomState>
|
||||
where
|
||||
S: BuildHasher,
|
||||
{
|
||||
pub map: HashSet<Rc<str>, S>,
|
||||
}
|
||||
|
||||
impl<S> HashSetSut<S>
|
||||
where
|
||||
S: BuildHasher + Default,
|
||||
{
|
||||
#[inline]
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
map: HashSet::with_hasher(Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn intern(&mut self, value: &str) -> Rc<str> {
|
||||
if !self.map.contains(value) {
|
||||
self.map.insert(value.into());
|
||||
}
|
||||
|
||||
self.map.get(value).unwrap().clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// This is our baseline with a raw Rc<str>.
|
||||
#[bench]
|
||||
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
|
||||
let strs = gen_strs(1000);
|
||||
|
||||
bench.iter(|| {
|
||||
let mut sut = HashSetSut::<RandomState>::new();
|
||||
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn with_all_new_1000(bench: &mut Bencher) {
|
||||
let strs = gen_strs(1000);
|
||||
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<RandomState>::new();
|
||||
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
/// This is our baseline with a raw Rc<str>.
|
||||
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
|
||||
bench.iter(|| {
|
||||
let mut sut = HashSetSut::<RandomState>::new();
|
||||
(0..1000).map(|_| sut.intern("first")).for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn with_one_new_1000(bench: &mut Bencher) {
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<RandomState>::new();
|
||||
(0..1000).map(|_| sut.intern("first")).for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
mod fx {
|
||||
use super::*;
|
||||
use fxhash::FxBuildHasher;
|
||||
|
||||
/// This is our baseline with a raw Rc<str>.
|
||||
#[bench]
|
||||
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
|
||||
let strs = gen_strs(1000);
|
||||
bench.iter(|| {
|
||||
let mut sut = HashSetSut::<FxBuildHasher>::new();
|
||||
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn with_all_new_1000(bench: &mut Bencher) {
|
||||
let strs = gen_strs(1000);
|
||||
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<FxBuildHasher>::new();
|
||||
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
/// This is our baseline with a raw Rc<str>.
|
||||
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
|
||||
bench.iter(|| {
|
||||
let mut sut: HashSetSut<FxBuildHasher> = HashSetSut {
|
||||
map: HashSet::with_hasher(Default::default()),
|
||||
};
|
||||
(0..1000).map(|_| sut.intern("first")).for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn with_one_new_1000(bench: &mut Bencher) {
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<FxBuildHasher>::new();
|
||||
(0..1000).map(|_| sut.intern("first")).for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn with_one_new_1000_utf8_unchecked(bench: &mut Bencher) {
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<FxBuildHasher>::new();
|
||||
(0..1000)
|
||||
.map(|_| unsafe { sut.intern_utf8_unchecked(b"first") })
|
||||
.for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
/// Since Fx is the best-performing, let's build upon it to demonstrate
|
||||
/// the benefits of with_capacity
|
||||
#[bench]
|
||||
fn with_all_new_1000_with_capacity(bench: &mut Bencher) {
|
||||
let n = 1000;
|
||||
let strs = gen_strs(n);
|
||||
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<FxBuildHasher>::with_capacity(n);
|
||||
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// Feature check for `test`
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// As of the time of writing, this feature is unstable and can only be
|
||||
// enabled in nightly. This file is intended to be used in the `configure`
|
||||
// script to determine whether a nightly version of Rust must be used to
|
||||
// invoke benchmarks.
|
||||
#![feature(test)]
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
// Feature check for `test`
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// As of the time of writing, this feature is unstable and can only be
|
||||
// enabled in nightly. This file is intended to be used in the `configure`
|
||||
// script to determine whether a nightly version of Rust must be used to
|
||||
// build documentation.
|
||||
#![feature(intra_rustdoc_links)]
|
||||
|
|
@ -43,18 +43,57 @@ AC_CHECK_PROGS(CARGO, [cargo])
|
|||
|
||||
test -n "$CARGO" || AC_MSG_ERROR([cargo not found])
|
||||
|
||||
rustc_ver_req=1.39.0
|
||||
rustc_ver_req=1.41.0
|
||||
|
||||
AC_CHECK_PROGS(RUSTC, [rustc])
|
||||
AC_MSG_CHECKING([rustc version >= $rustc_ver_req])
|
||||
rustc_version=$("$RUSTC" --version | cut -d' ' -f2)
|
||||
AX_COMPARE_VERSION([$rustc_version], [ge], [$rustc_ver_req],
|
||||
[AC_MSG_RESULT([yes ($rustc_version)])],
|
||||
[AC_MSG_ERROR([no ($rustc_version)])])
|
||||
[AC_MSG_RESULT([no ($rustc_version)])
|
||||
AC_MSG_ERROR([If using rustup, run `rustup update'])])
|
||||
|
||||
AC_ARG_VAR([CARGO_BUILD_FLAGS],
|
||||
[Flags to be passed to `cargo build' when invoked via Make])
|
||||
|
||||
# The `intra_rustdoc_links` feature is required for building
|
||||
# documentation. If unavailable, then it's still an unstable feature and
|
||||
# we'll need to use nightly. We don't check for nightly here, though---if
|
||||
# it's missing, then cargo will tell the user what to do.
|
||||
AC_MSG_CHECKING([`intra_rustdoc_links_check` feature support])
|
||||
AS_IF(["$RUSTC" --crate-type lib build_aux/intra_rustdoc_links_check.rs &>/dev/null],
|
||||
[AC_MSG_RESULT(available)],
|
||||
[AC_MSG_RESULT([no (nightly required)])
|
||||
AC_SUBST([CARGO_DOC_FLAGS], [+nightly])])
|
||||
|
||||
# The `test` feature is required for benchmarking. If unavailable, then
|
||||
# it's still an unstable feature and we'll need to use nightly. We don't
|
||||
# check for nightly here, though---if it's missing, then cargo will tell the
|
||||
# user what to do.
|
||||
AC_MSG_CHECKING([`test` feature support])
|
||||
AS_IF(["$RUSTC" --crate-type lib build_aux/bench_check.rs &>/dev/null],
|
||||
[AC_MSG_RESULT(available)],
|
||||
[AC_MSG_RESULT([no (nightly required)])
|
||||
AC_SUBST([CARGO_BENCH_FLAGS], [+nightly])])
|
||||
|
||||
# Cargo commands may be available but not necessarily installed for the
|
||||
# active toolchain. Let's check that.
|
||||
AC_MSG_CHECKING([whether cargo-fmt is available for active toolchain])
|
||||
AS_IF([cargo fmt --help &>/dev/null],
|
||||
[AC_MSG_RESULT(yes)],
|
||||
[AC_MSG_RESULT(no)
|
||||
cargo fmt --help # run again so user can see output
|
||||
AC_MSG_ERROR([missing cargo-fmt for active toolchain])])
|
||||
|
||||
# Cargo commands may be available but not necessarily installed for the
|
||||
# active toolchain. Let's check that.
|
||||
AC_MSG_CHECKING([whether cargo-doc is available for toolchain])
|
||||
AS_IF([cargo $CARGO_DOC_FLAGS doc --help &>/dev/null],
|
||||
[AC_MSG_RESULT(yes)],
|
||||
[AC_MSG_RESULT(no)
|
||||
cargo $CARGO_DOC_FLAGS doc --help # run again so user can see output
|
||||
AC_MSG_ERROR([missing cargo-doc for toolchain])])
|
||||
|
||||
AC_CONFIG_FILES([Makefile])
|
||||
|
||||
AC_OUTPUT
|
||||
|
|
|
@ -19,19 +19,8 @@
|
|||
//! utility. Its job is to take each of the compiled object files and
|
||||
//! produce a final executable.
|
||||
//!
|
||||
//! # Backwards-Compatibility (XSLT System)
|
||||
//! This linker is part of the TAMER (TAME in Rust) project, which aims to
|
||||
//! incrementally rewrite TAME in Rust. Consequently, it must be able to
|
||||
//! serve as a drop-in replacement for the existing (XSLT) linker, which
|
||||
//! takes as input `xmlo` files and produces as output an `xmle` file. This
|
||||
//! is not efficient, and future versions will begin to migrate away from
|
||||
//! this strategy.
|
||||
//!
|
||||
//! The output `xmle` file is then fed to a `standalone` command which
|
||||
//! extracts the JavaScript fragment and places it into its own file. Even
|
||||
//! when that is replaced (when this just outputs a final JS file directly),
|
||||
//! the `xmle` file is still needed for other purposes, such as `summary`
|
||||
//! and `dote` generation.
|
||||
//! For more information about the linker,
|
||||
//! see the [`tamer::ld`] module.
|
||||
|
||||
extern crate tamer;
|
||||
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
// Global constants across the entirety of TAMER
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! System-wide static configuration.
|
||||
//!
|
||||
//! This module provides a system-wide configuration.
|
||||
//! Subsystems should reference these values rather than defining their own
|
||||
//! and risk incompatibilities or maintenance issues as requirements
|
||||
//! change.
|
||||
//!
|
||||
//! By convention,
|
||||
//! import this entire module rather than individual members and reference
|
||||
//! them as `global::foo` to emphasize their nature and risk.
|
||||
|
||||
use std::num;
|
||||
|
||||
/// A size capable of representing every interned string in a package.
|
||||
pub type PkgSymSize = u16;
|
||||
|
||||
/// A non-zero equivalent of [`PkgSymSize`];
|
||||
pub type NonZeroPkgSymSize = num::NonZeroU16;
|
||||
|
||||
/// A size capable of representing every interned string in a program.
|
||||
pub type ProgSymSize = u32;
|
||||
|
||||
/// A non-zero equivalent of [`ProgSymSize`];
|
||||
pub type NonZeroProgSymSize = num::NonZeroU32;
|
||||
|
||||
/// A size capable of representing indexes of each individual identifier
|
||||
/// within a single package.
|
||||
///
|
||||
/// Note that,
|
||||
/// since TAME is a metalanguage and can easily expand into a great
|
||||
/// deal of code,
|
||||
/// this must accommodate far more than the user's expectations
|
||||
/// working within the provided level of abstraction.
|
||||
///
|
||||
/// This must be ≥ [`PkgSymSize`].
|
||||
pub type PkgIdentSize = u16;
|
||||
|
||||
/// A size capable of representing every individual identifier and
|
||||
/// expression within a single package.
|
||||
///
|
||||
/// Note that,
|
||||
/// since TAME is a metalanguage and can easily expand into a great
|
||||
/// deal of code,
|
||||
/// this must accommodate far more than the user's expectations
|
||||
/// working within the provided level of abstraction.
|
||||
pub type PkgIdentExprSize = u32;
|
||||
|
||||
/// A size capable of representing the union of every identifier of every
|
||||
/// package used by an entire program.
|
||||
///
|
||||
/// This must be ≥ [`ProgSymSize`].
|
||||
pub type ProgIdentSize = u32;
|
||||
|
||||
/// A size capable of representing the union of every identifier and every
|
||||
/// expression of every package used by an entire program.
|
||||
///
|
||||
/// Note that,
|
||||
/// since TAME is a metalanguage and can easily expand into a great
|
||||
/// deal of code,
|
||||
/// this must accommodate far more than the user's expectations
|
||||
/// working within the provided level of abstraction.
|
||||
///
|
||||
/// This must be ≥ [`ProgSymSize`].
|
||||
pub type ProgIdentExprSize = u32;
|
|
@ -0,0 +1,652 @@
|
|||
// Concrete ASG
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Base concrete [`Asg`] implementation.
|
||||
|
||||
use super::graph::{Asg, AsgEdge, AsgError, AsgResult, Node, ObjectRef};
|
||||
use super::ident::IdentKind;
|
||||
use super::object::{FragmentText, Object, Source};
|
||||
use crate::sym::Symbol;
|
||||
use fixedbitset::FixedBitSet;
|
||||
use petgraph::graph::{
|
||||
DiGraph, EdgeIndex, Graph, IndexType, Neighbors, NodeIndex,
|
||||
};
|
||||
use petgraph::visit::{GraphBase, IntoNeighbors, Visitable};
|
||||
|
||||
/// Concrete ASG.
|
||||
///
|
||||
/// This implementation is currently based on [`petgraph`].
|
||||
///
|
||||
/// Identifiers are cached by name for `O(1)` lookup.
|
||||
/// Since [`SymbolIndex`][crate::sym::SymbolIndex] is used for this purpose,
|
||||
/// the index may contain more entries than nodes and may contain gaps.
|
||||
///
|
||||
/// For more information,
|
||||
/// see [`Asg`].
|
||||
pub struct BaseAsg<'i, Ix: IndexType> {
|
||||
/// Directed graph on which objects are stored.
|
||||
graph: DiGraph<Node<'i>, AsgEdge, Ix>,
|
||||
|
||||
/// Map of [`SymbolIndex`][crate::sym::SymbolIndex] to node indexes.
|
||||
///
|
||||
/// This allows for `O(1)` lookup of identifiers in the graph.
|
||||
/// Note that,
|
||||
/// while we store [`NodeIndex`] internally,
|
||||
/// the public API encapsulates it within an [`ObjectRef`].
|
||||
index: Vec<NodeIndex<Ix>>,
|
||||
|
||||
/// Empty node indicating that no object exists for a given index.
|
||||
empty_node: NodeIndex<Ix>,
|
||||
}
|
||||
|
||||
impl<'i, Ix> BaseAsg<'i, Ix>
|
||||
where
|
||||
Ix: IndexType,
|
||||
{
|
||||
/// Create an ASG with the provided initial capacity.
|
||||
///
|
||||
/// The value for `objects` will be used as the capacity for the nodes
|
||||
/// in the graph,
|
||||
/// as well as the initial index capacity.
|
||||
/// The value for `edges` may be more difficult to consider,
|
||||
/// since edges are used to represent various relationships between
|
||||
/// different types of objects,
|
||||
/// but it's safe to say that each object will have at least one
|
||||
/// edge to another object.
|
||||
///
|
||||
/// A basic `new` method is not provided to ensure that callers consider
|
||||
/// capacity during construction,
|
||||
/// since graphs can get quite large.
|
||||
pub fn with_capacity(objects: usize, edges: usize) -> Self {
|
||||
let mut graph = Graph::with_capacity(objects, edges);
|
||||
let mut index = Vec::with_capacity(objects);
|
||||
|
||||
// Exhaust the first index to be used as a placeholder.
|
||||
let empty_node = graph.add_node(Some(Object::Empty));
|
||||
index.push(empty_node);
|
||||
|
||||
Self {
|
||||
graph,
|
||||
index,
|
||||
empty_node,
|
||||
}
|
||||
}
|
||||
|
||||
/// Index the provided symbol `name` as representing the identifier `node`.
|
||||
///
|
||||
/// This index permits `O(1)` identifier lookups.
|
||||
///
|
||||
/// After an identifier is indexed it is not expected to be reassigned
|
||||
/// to another node.
|
||||
/// Debug builds contain an assertion that will panic in this instance.
|
||||
///
|
||||
/// Panics
|
||||
/// ======
|
||||
/// Will panic if unable to allocate more space for the index.
|
||||
fn index_identifier(&mut self, name: &'i Symbol<'i>, node: NodeIndex<Ix>) {
|
||||
let i: usize = name.index().into();
|
||||
|
||||
if i >= self.index.len() {
|
||||
// If this is ever a problem we can fall back to usize max and
|
||||
// re-compare before panicing
|
||||
let new_size = (i + 1)
|
||||
.checked_next_power_of_two()
|
||||
.expect("internal error: cannot allocate space for ASG index");
|
||||
|
||||
self.index.resize(new_size, self.empty_node);
|
||||
}
|
||||
|
||||
// We should never overwrite indexes
|
||||
debug_assert!(self.index[i] == self.empty_node);
|
||||
|
||||
self.index[i] = node;
|
||||
}
|
||||
|
||||
/// Lookup `ident` or add an [`Object::Missing`] to the graph and
|
||||
/// return a reference to it.
|
||||
#[inline]
|
||||
fn lookup_or_missing(&mut self, ident: &'i Symbol<'i>) -> ObjectRef<Ix> {
|
||||
self.lookup(ident).unwrap_or_else(|| {
|
||||
let index = self.graph.add_node(Some(Object::Missing(ident)));
|
||||
|
||||
self.index_identifier(ident, index);
|
||||
ObjectRef(index)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i, Ix> Asg<'i, Ix> for BaseAsg<'i, Ix>
|
||||
where
|
||||
Ix: IndexType,
|
||||
{
|
||||
fn declare(
|
||||
&mut self,
|
||||
name: &'i Symbol<'i>,
|
||||
kind: IdentKind,
|
||||
src: Source<'i>,
|
||||
) -> AsgResult<ObjectRef<Ix>> {
|
||||
// TODO: src check
|
||||
if let Some(existing) = self.lookup(name) {
|
||||
let node = self.graph.node_weight_mut(existing.0).unwrap();
|
||||
|
||||
match node {
|
||||
Some(Object::Missing(_)) => {
|
||||
node.replace(Object::Ident(name, kind, src));
|
||||
return Ok(existing);
|
||||
}
|
||||
// TODO: no override-override
|
||||
Some(Object::Ident(_, _, orig_src))
|
||||
if orig_src.virtual_ && src.override_ =>
|
||||
{
|
||||
*orig_src = src;
|
||||
return Ok(existing);
|
||||
}
|
||||
// TODO: no override-override
|
||||
Some(Object::IdentFragment(_, _, orig_src, _))
|
||||
if orig_src.virtual_ && src.override_ =>
|
||||
{
|
||||
// clears fragment, which is no longer applicable
|
||||
node.replace(Object::Ident(name, kind, src));
|
||||
return Ok(existing);
|
||||
}
|
||||
_ => return Ok(existing),
|
||||
}
|
||||
}
|
||||
|
||||
let node = self.graph.add_node(Some(Object::Ident(name, kind, src)));
|
||||
|
||||
self.index_identifier(name, node);
|
||||
|
||||
Ok(ObjectRef(node))
|
||||
}
|
||||
|
||||
fn declare_extern(
|
||||
&mut self,
|
||||
name: &'i Symbol<'i>,
|
||||
expected_kind: IdentKind,
|
||||
) -> AsgResult<ObjectRef<Ix>> {
|
||||
// TODO: resolution!
|
||||
let node = self
|
||||
.graph
|
||||
.add_node(Some(Object::Extern(name, expected_kind)));
|
||||
|
||||
self.index_identifier(name, node);
|
||||
|
||||
Ok(ObjectRef(node))
|
||||
}
|
||||
|
||||
fn set_fragment(
|
||||
&mut self,
|
||||
identi: ObjectRef<Ix>,
|
||||
text: FragmentText,
|
||||
) -> AsgResult<ObjectRef<Ix>> {
|
||||
// This should _never_ happen as long as you're only using ObjectRef
|
||||
// values produced by these methods.
|
||||
let node = self
|
||||
.graph
|
||||
.node_weight_mut(identi.0)
|
||||
.expect("internal error: BaseAsg::set_fragment bogus identi");
|
||||
|
||||
// This should also never happen, since we immediately repopulate
|
||||
// the node below.
|
||||
let ty = node
|
||||
.take()
|
||||
.expect("internal error: BaseAsg::set_fragment missing Node data");
|
||||
|
||||
let result = match ty {
|
||||
Object::Ident(sym, kind, src) => {
|
||||
Ok(Object::IdentFragment(sym, kind, src, text))
|
||||
}
|
||||
_ => {
|
||||
let err = Err(AsgError::BadFragmentDest(format!(
|
||||
"identifier is not a Object::Ident): {:?}",
|
||||
ty,
|
||||
)));
|
||||
|
||||
node.replace(ty);
|
||||
err
|
||||
}
|
||||
}?;
|
||||
|
||||
node.replace(result);
|
||||
|
||||
Ok(identi)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get<I: Into<ObjectRef<Ix>>>(&self, index: I) -> Option<&Object<'i>> {
|
||||
self.graph.node_weight(index.into().0).map(|node| {
|
||||
node.as_ref()
|
||||
.expect("internal error: BaseAsg::get missing Node data")
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn lookup(&self, name: &'i Symbol<'i>) -> Option<ObjectRef<Ix>> {
|
||||
let i: usize = name.index().into();
|
||||
|
||||
self.index
|
||||
.get(i)
|
||||
.filter(|ni| ni.index() > 0)
|
||||
.map(|ni| ObjectRef(*ni))
|
||||
}
|
||||
|
||||
fn add_dep(&mut self, identi: ObjectRef<Ix>, depi: ObjectRef<Ix>) {
|
||||
self.graph.update_edge(identi.0, depi.0, Default::default());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn has_dep(&self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>) -> bool {
|
||||
self.graph.contains_edge(ident.0, dep.0)
|
||||
}
|
||||
|
||||
fn add_dep_lookup(
|
||||
&mut self,
|
||||
ident: &'i Symbol<'i>,
|
||||
dep: &'i Symbol<'i>,
|
||||
) -> (ObjectRef<Ix>, ObjectRef<Ix>) {
|
||||
let identi = self.lookup_or_missing(ident);
|
||||
let depi = self.lookup_or_missing(dep);
|
||||
|
||||
self.graph.update_edge(identi.0, depi.0, Default::default());
|
||||
|
||||
(identi, depi)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: encapsulate Petgraph API (N.B. this is untested!)
|
||||
impl<'i, Ix> Visitable for BaseAsg<'i, Ix>
|
||||
where
|
||||
Ix: IndexType,
|
||||
{
|
||||
type Map = FixedBitSet;
|
||||
|
||||
fn visit_map(&self) -> Self::Map {
|
||||
self.graph.visit_map()
|
||||
}
|
||||
|
||||
fn reset_map(&self, map: &mut Self::Map) {
|
||||
self.graph.reset_map(map)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i, Ix> GraphBase for BaseAsg<'i, Ix>
|
||||
where
|
||||
Ix: IndexType,
|
||||
{
|
||||
type NodeId = NodeIndex<Ix>;
|
||||
type EdgeId = EdgeIndex<Ix>;
|
||||
}
|
||||
|
||||
impl<'a, 'i, Ix> IntoNeighbors for &'a BaseAsg<'i, Ix>
|
||||
where
|
||||
Ix: IndexType,
|
||||
{
|
||||
type Neighbors = Neighbors<'a, AsgEdge, Ix>;
|
||||
|
||||
fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
|
||||
self.graph.neighbors(n)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::sym::SymbolIndex;
|
||||
|
||||
type Sut<'i> = BaseAsg<'i, u8>;
|
||||
|
||||
#[test]
|
||||
fn create_with_capacity() {
|
||||
let node_capacity = 100;
|
||||
let edge_capacity = 300;
|
||||
let sut = Sut::with_capacity(node_capacity, edge_capacity);
|
||||
|
||||
// breaks encapsulation to introspect; the behavior is
|
||||
// transparent to callers (aside from performance
|
||||
// characteristics)
|
||||
let (nc, ec) = sut.graph.capacity();
|
||||
assert!(nc >= node_capacity);
|
||||
assert!(ec >= edge_capacity);
|
||||
assert!(sut.index.capacity() >= node_capacity);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn declare_new_unique_idents() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
// NB: The index ordering is important! We first use a larger
|
||||
// index to create a gap, and then use an index within that gap
|
||||
// to ensure that it's not considered an already-defined
|
||||
// identifier.
|
||||
let syma = Symbol::new_dummy(SymbolIndex::from_u32(5), "syma");
|
||||
let symb = Symbol::new_dummy(SymbolIndex::from_u32(1), "symab");
|
||||
|
||||
let nodea = sut.declare(
|
||||
&syma,
|
||||
IdentKind::Meta,
|
||||
Source {
|
||||
desc: Some("a".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
)?;
|
||||
|
||||
let nodeb = sut.declare(
|
||||
&symb,
|
||||
IdentKind::Worksheet,
|
||||
Source {
|
||||
desc: Some("b".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
)?;
|
||||
|
||||
assert_ne!(nodea, nodeb);
|
||||
|
||||
assert_eq!(
|
||||
Some(&Object::Ident(
|
||||
&syma,
|
||||
IdentKind::Meta,
|
||||
Source {
|
||||
desc: Some("a".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
)),
|
||||
sut.get(nodea),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(&Object::Ident(
|
||||
&symb,
|
||||
IdentKind::Worksheet,
|
||||
Source {
|
||||
desc: Some("b".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
)),
|
||||
sut.get(nodeb),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lookup_by_symbol() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "lookup");
|
||||
let node = sut.declare(
|
||||
&sym,
|
||||
IdentKind::Meta,
|
||||
Source {
|
||||
generated: true,
|
||||
..Default::default()
|
||||
},
|
||||
)?;
|
||||
|
||||
assert_eq!(Some(node), sut.lookup(&sym));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn declare_extern() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "extern");
|
||||
let node = sut.declare_extern(&sym, IdentKind::Meta)?;
|
||||
|
||||
assert_eq!(Some(&Object::Extern(&sym, IdentKind::Meta)), sut.get(node),);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO: incompatible
|
||||
#[test]
|
||||
fn declare_returns_existing_compatible() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "symdup");
|
||||
let node = sut.declare(&sym, IdentKind::Meta, Source::default())?;
|
||||
|
||||
// Same declaration a second time
|
||||
let redeclare =
|
||||
sut.declare(&sym, IdentKind::Meta, Source::default())?;
|
||||
|
||||
assert_eq!(node, redeclare);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO: incompatible
|
||||
#[test]
|
||||
fn declare_override_virtual_ident() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "virtual");
|
||||
let over_src = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
|
||||
|
||||
let virt_node = sut.declare(
|
||||
&sym,
|
||||
IdentKind::Meta,
|
||||
Source {
|
||||
virtual_: true,
|
||||
..Default::default()
|
||||
},
|
||||
)?;
|
||||
|
||||
let over_src = Source {
|
||||
override_: true,
|
||||
src: Some(&over_src),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let over_node = sut.declare(&sym, IdentKind::Meta, over_src.clone())?;
|
||||
|
||||
assert_eq!(virt_node, over_node);
|
||||
|
||||
assert_eq!(
|
||||
sut.get(over_node),
|
||||
Some(&Object::Ident(&sym, IdentKind::Meta, over_src,))
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO: incompatible
|
||||
#[test]
|
||||
fn declare_override_virtual_ident_fragment() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "virtual");
|
||||
let over_src = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
|
||||
|
||||
let virt_node = sut.declare(
|
||||
&sym,
|
||||
IdentKind::Meta,
|
||||
Source {
|
||||
virtual_: true,
|
||||
..Default::default()
|
||||
},
|
||||
)?;
|
||||
|
||||
sut.set_fragment(virt_node, FragmentText::from("remove me"))?;
|
||||
|
||||
let over_src = Source {
|
||||
override_: true,
|
||||
src: Some(&over_src),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let over_node = sut.declare(&sym, IdentKind::Meta, over_src.clone())?;
|
||||
|
||||
assert_eq!(virt_node, over_node);
|
||||
|
||||
// The act of overriding the node should have cleared any existing
|
||||
// fragment, making way for a new fragment to take its place as soon
|
||||
// as it is discovered. (So, back to an Object::Ident.)
|
||||
assert_eq!(
|
||||
sut.get(over_node),
|
||||
Some(&Object::Ident(&sym, IdentKind::Meta, over_src,))
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_fragment_to_ident() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "tofrag");
|
||||
let src = Source {
|
||||
generated: true,
|
||||
..Default::default()
|
||||
};
|
||||
let node = sut.declare(&sym, IdentKind::Meta, src.clone())?;
|
||||
|
||||
let fragment = "a fragment".to_string();
|
||||
let node_with_frag = sut.set_fragment(node, fragment.clone())?;
|
||||
|
||||
// Attaching a fragment should _replace_ the node, not create a
|
||||
// new one
|
||||
assert_eq!(
|
||||
node, node_with_frag,
|
||||
"fragment node does not match original node"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Some(&Object::IdentFragment(&sym, IdentKind::Meta, src, fragment)),
|
||||
sut.get(node)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_fragment_to_fragment_fails() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
|
||||
let node = sut.declare(&sym, IdentKind::Meta, Source::default())?;
|
||||
let fragment = "orig fragment".to_string();
|
||||
|
||||
sut.set_fragment(node, fragment.clone())?;
|
||||
|
||||
// Since it's already a fragment, this should fail.
|
||||
let err = sut
|
||||
.set_fragment(node, "replacement".to_string())
|
||||
.expect_err("Expected failure");
|
||||
|
||||
match err {
|
||||
AsgError::BadFragmentDest(str) if str.contains("sym") => (),
|
||||
_ => panic!("expected AsgError::BadFragmentDest: {:?}", err),
|
||||
}
|
||||
|
||||
// Make sure we didn't leave the node in an inconsistent state
|
||||
assert_eq!(
|
||||
Some(&Object::IdentFragment(
|
||||
&sym,
|
||||
IdentKind::Meta,
|
||||
Default::default(),
|
||||
fragment
|
||||
)),
|
||||
sut.get(node)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_ident_dep_to_ident() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
|
||||
let dep = Symbol::new_dummy(SymbolIndex::from_u32(1), "dep");
|
||||
|
||||
let symnode = sut.declare(&sym, IdentKind::Meta, Source::default())?;
|
||||
let depnode = sut.declare(&dep, IdentKind::Meta, Source::default())?;
|
||||
|
||||
sut.add_dep(symnode, depnode);
|
||||
assert!(sut.has_dep(symnode, depnode));
|
||||
|
||||
// sanity check if we re-add a dep
|
||||
sut.add_dep(symnode, depnode);
|
||||
assert!(sut.has_dep(symnode, depnode));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// same as above test
|
||||
#[test]
|
||||
fn add_dep_lookup_existing() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
|
||||
let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
|
||||
|
||||
let _ = sut.declare(&sym, IdentKind::Meta, Source::default())?;
|
||||
let _ = sut.declare(&dep, IdentKind::Meta, Source::default())?;
|
||||
|
||||
let (symnode, depnode) = sut.add_dep_lookup(&sym, &dep);
|
||||
assert!(sut.has_dep(symnode, depnode));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_dep_lookup_missing() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
|
||||
let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
|
||||
|
||||
// both of these are missing
|
||||
let (symnode, depnode) = sut.add_dep_lookup(&sym, &dep);
|
||||
assert!(sut.has_dep(symnode, depnode));
|
||||
|
||||
assert_eq!(Some(&Object::Missing(&sym)), sut.get(symnode));
|
||||
assert_eq!(Some(&Object::Missing(&dep)), sut.get(depnode));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn declare_return_missing_symbol() -> AsgResult<()> {
|
||||
let mut sut = Sut::with_capacity(0, 0);
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
|
||||
let dep = Symbol::new_dummy(SymbolIndex::from_u32(2), "dep");
|
||||
|
||||
// both of these are missing, see add_dep_lookup_missing
|
||||
let (symnode, _) = sut.add_dep_lookup(&sym, &dep);
|
||||
|
||||
let src = Source {
|
||||
desc: Some("Tamer is NOT lamer.".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Check with a declared value
|
||||
let declared = sut.declare(&sym, IdentKind::Meta, src.clone())?;
|
||||
|
||||
assert_eq!(symnode, declared);
|
||||
|
||||
assert_eq!(
|
||||
Some(&Object::Ident(&sym, IdentKind::Meta, src)),
|
||||
sut.get(declared),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,249 @@
|
|||
// Graph abstraction
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Abstract graph as the basis for concrete ASGs.
|
||||
|
||||
use super::ident::IdentKind;
|
||||
use super::object::{FragmentText, Object, Source};
|
||||
use crate::sym::Symbol;
|
||||
use petgraph::graph::{IndexType, NodeIndex};
|
||||
use std::result::Result;
|
||||
|
||||
/// An abstract semantic graph of [objects][Object].
|
||||
///
|
||||
/// This IR focuses on the definition and manipulation of objects and their
|
||||
/// dependencies.
|
||||
/// See [`Object`] for a summary of valid object state transitions.
|
||||
///
|
||||
/// Objects are never deleted from the graph,
|
||||
/// so [`ObjectRef`]s will remain valid for the lifetime of the ASG.
|
||||
///
|
||||
/// For more information,
|
||||
/// see the [module-level documentation][self].
|
||||
pub trait Asg<'i, Ix: IndexType> {
|
||||
/// Declare a concrete identifier.
|
||||
///
|
||||
/// An identifier declaration is similar to a declaration in a header
|
||||
/// file in a language like C,
|
||||
/// describing the structure of the identifier.
|
||||
/// Once declared,
|
||||
/// this information cannot be changed.
|
||||
///
|
||||
/// Identifiers are uniquely identified by a [`Symbol`] `name`.
|
||||
/// If an identifier of the same `name` already exists,
|
||||
/// then the provided declaration is compared against the existing
|
||||
/// declaration---should
|
||||
/// they be incompatible,
|
||||
/// then the operation will fail;
|
||||
/// otherwise,
|
||||
/// the existing identifier will be returned.
|
||||
/// A successful declaration will add a [`Object::Ident`] to the graph
|
||||
/// and return an [`ObjectRef`] reference.
|
||||
///
|
||||
/// If an existing identifier is an extern (see
|
||||
/// [`Asg::declare_extern`]),
|
||||
/// then the declaration will be compared just the same,
|
||||
/// but the identifier will be converted from a
|
||||
/// [`Object::Extern`] into a [`Object::Ident`].
|
||||
/// When this happens,
|
||||
/// the extern is said to be _resolved_.
|
||||
///
|
||||
/// If a virtual identifier of type [`Object::IdentFragment`] is
|
||||
/// overridden,
|
||||
/// then its fragment is cleared
|
||||
/// (it returns to a [`Object::Ident`])
|
||||
/// to make way for the fragment of the override.
|
||||
fn declare(
|
||||
&mut self,
|
||||
name: &'i Symbol<'i>,
|
||||
kind: IdentKind,
|
||||
src: Source<'i>,
|
||||
) -> AsgResult<ObjectRef<Ix>>;
|
||||
|
||||
/// Declare an abstract identifier.
|
||||
///
|
||||
/// An _extern_ declaration declares an identifier the same as
|
||||
/// [`Asg::declare`],
|
||||
/// but instead as [`Object::Extern`].
|
||||
/// Externs are identifiers that are expected to be defined somewhere
|
||||
/// else ("externally"),
|
||||
/// and are resolved at [link-time][crate::ld].
|
||||
///
|
||||
/// If a concrete identifier has already been declared (see
|
||||
/// [`Asg::declare`]),
|
||||
/// then the declarations will be compared and,
|
||||
/// if compatible,
|
||||
/// the identifier will be immediately _resolved_ and the object
|
||||
/// on the graph will not be altered.
|
||||
/// Resolution will otherwise fail in error.
|
||||
fn declare_extern(
|
||||
&mut self,
|
||||
name: &'i Symbol<'i>,
|
||||
expected_kind: IdentKind,
|
||||
) -> AsgResult<ObjectRef<Ix>>;
|
||||
|
||||
/// Set the fragment associated with a concrete identifier.
|
||||
///
|
||||
/// This changes the type of the identifier from [`Object::Ident`]
|
||||
/// into [`Object::IdentFragment`],
|
||||
/// which is intended for use by the [linker][crate::ld].
|
||||
fn set_fragment(
|
||||
&mut self,
|
||||
identi: ObjectRef<Ix>,
|
||||
text: FragmentText,
|
||||
) -> AsgResult<ObjectRef<Ix>>;
|
||||
|
||||
/// Retrieve an object from the graph by [`ObjectRef`].
|
||||
///
|
||||
/// Since an [`ObjectRef`] should only be produced by an [`Asg`],
|
||||
/// and since objects are never deleted from the graph,
|
||||
/// this should never fail so long as references are not shared
|
||||
/// between multiple graphs.
|
||||
/// It is nevertheless wrapped in an [`Option`] just in case.
|
||||
fn get<I: Into<ObjectRef<Ix>>>(&self, index: I) -> Option<&Object<'i>>;
|
||||
|
||||
/// Attempt to retrieve an identifier from the graph by name.
|
||||
///
|
||||
/// Since only identifiers carry a name,
|
||||
/// this method cannot be used to retrieve all possible objects on the
|
||||
/// graph---for
|
||||
/// that, see [`Asg::get`].
|
||||
fn lookup(&self, name: &'i Symbol<'i>) -> Option<ObjectRef<Ix>>;
|
||||
|
||||
/// Declare that `dep` is a dependency of `ident`.
|
||||
///
|
||||
/// An object must be declared as a dependency if its value must be
|
||||
/// computed before computing the value of `ident`.
|
||||
/// The [linker][crate::ld] will ensure this ordering.
|
||||
///
|
||||
/// See [`add_dep_lookup`][Asg::add_dep_lookup] if identifiers have to
|
||||
/// be looked up by [`Symbol`] or if they may not yet have been
|
||||
/// declared.
|
||||
fn add_dep(&mut self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>);
|
||||
|
||||
/// Check whether `dep` is a dependency of `ident`.
|
||||
fn has_dep(&self, ident: ObjectRef<Ix>, dep: ObjectRef<Ix>) -> bool;
|
||||
|
||||
/// Declare that `dep` is a dependency of `ident`,
|
||||
/// regardless of whether they are known.
|
||||
///
|
||||
/// In contrast to [`add_dep`][Asg::add_dep],
|
||||
/// this method will add the dependency even if one or both of `ident`
|
||||
/// or `dep` have not yet been declared.
|
||||
/// In such a case,
|
||||
/// an [`Object::Missing`] will be added as a placeholder for the
|
||||
/// missing identifier,
|
||||
/// allowing the ASG to be built with partial information as
|
||||
/// identifiers continue to be discovered.
|
||||
///
|
||||
/// References to both identifiers are returned in argument order.
|
||||
fn add_dep_lookup(
|
||||
&mut self,
|
||||
ident: &'i Symbol<'i>,
|
||||
dep: &'i Symbol<'i>,
|
||||
) -> (ObjectRef<Ix>, ObjectRef<Ix>);
|
||||
}
|
||||
|
||||
/// A [`Result`] with a hard-coded [`AsgError`] error type.
|
||||
///
|
||||
/// This is the result of every [`Asg`] operation that could potentially
|
||||
/// fail in error.
|
||||
pub type AsgResult<T> = Result<T, AsgError>;
|
||||
|
||||
/// Reference to an [object][Object] stored within the [`Asg`].
|
||||
///
|
||||
/// Object references are integer offsets,
|
||||
/// not pointers.
|
||||
/// See the [module-level documentation][self] for more information.
|
||||
#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
|
||||
pub struct ObjectRef<Ix>(pub NodeIndex<Ix>);
|
||||
|
||||
impl<Ix> From<NodeIndex<Ix>> for ObjectRef<Ix>
|
||||
where
|
||||
Ix: IndexType,
|
||||
{
|
||||
fn from(index: NodeIndex<Ix>) -> Self {
|
||||
Self(index)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Ix> From<ObjectRef<Ix>> for NodeIndex<Ix>
|
||||
where
|
||||
Ix: IndexType,
|
||||
{
|
||||
fn from(objref: ObjectRef<Ix>) -> Self {
|
||||
objref.0
|
||||
}
|
||||
}
|
||||
|
||||
/// There are currently no data stored on edges ("edge weights").
|
||||
pub type AsgEdge = ();
|
||||
|
||||
/// Each node of the graph represents an object.
|
||||
///
|
||||
/// Enclosed in an [`Option`] to permit moving owned values out of the
|
||||
/// graph.
|
||||
pub type Node<'i> = Option<Object<'i>>;
|
||||
|
||||
/// An error from an ASG operation.
|
||||
///
|
||||
/// Storing [`Symbol`] would require that this have a lifetime,
|
||||
/// which is very inconvenient when chaining [`Result`],
|
||||
/// so this stores only owned values.
|
||||
/// The caller will know the problem values.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum AsgError {
|
||||
/// The provided identifier is not in a state that is permitted to
|
||||
/// receive a fragment.
|
||||
///
|
||||
/// See [`Asg::set_fragment`] for more information.
|
||||
BadFragmentDest(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for AsgError {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::BadFragmentDest(msg) => {
|
||||
write!(fmt, "bad fragment destination: {}", msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for AsgError {
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
mod objref {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn to_from_nodeindex() {
|
||||
let index = NodeIndex::<u32>::new(5);
|
||||
let objref: ObjectRef<u32> = ObjectRef::from(index);
|
||||
|
||||
assert_eq!(index, objref.0);
|
||||
assert_eq!(index, objref.into());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,376 @@
|
|||
// ASG identifiers
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Identifiers (a type of [object][super::object::Object]).
|
||||
|
||||
use crate::ir::legacyir::{SymAttrs, SymDtype, SymType};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
/// Types of identifiers.
|
||||
///
|
||||
/// Here, the term _calculation_ refers to a composable expression that
|
||||
/// produces a numeric result.
|
||||
///
|
||||
/// These are derived from [`legacyir::SymType`][crate::ir::legacyir::SymType]
|
||||
/// and will be generalized in the future.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum IdentKind {
|
||||
/// Classification generator.
|
||||
///
|
||||
/// This has the same number of dimensions as its highest-dimension
|
||||
/// predicate.
|
||||
/// Every [`Class`][IdentKind::Class] has an associated generator.
|
||||
Cgen(Dim),
|
||||
|
||||
/// Boolean classification.
|
||||
///
|
||||
/// This is an artifact of an ancient system.
|
||||
/// The dimensions here refers to the dimensions of the associated
|
||||
/// [`Cgen`][IdentKind::Cgen].
|
||||
Class(Dim),
|
||||
|
||||
/// Constant value.
|
||||
Const(Dim, DataType),
|
||||
|
||||
/// Re-usable encapsulated expression.
|
||||
///
|
||||
/// Functions are nothing more than expressions that can be re-used with
|
||||
/// dynamic values at runtime.
|
||||
/// See also [`Lparam`][IdentKind::Lparam].
|
||||
Func(Dim, DataType),
|
||||
|
||||
/// Generating calculation.
|
||||
///
|
||||
/// Generators are associated with iterative expressions,
|
||||
/// such as sums and products.
|
||||
/// They always have a parent [`Rate`][IdentKind::Rate].
|
||||
Gen(Dim, DataType),
|
||||
|
||||
/// Local (non-global) parameter.
|
||||
///
|
||||
/// Local parameters are lexically scoped to their parent expression:
|
||||
/// - [`Func`][IdentKind::Func], where there exists one per defined
|
||||
/// function parameter; and
|
||||
/// - `let` expression bindings.
|
||||
///
|
||||
/// This is not to be confused with the global
|
||||
/// [`Param`][IdentKind::Param].
|
||||
Lparam(Dim, DataType),
|
||||
|
||||
/// Global parameter.
|
||||
///
|
||||
/// These parameters serve as inputs to the system.
|
||||
/// Input values are bound using [`Map`][IdentKind::Map].
|
||||
Param(Dim, DataType),
|
||||
|
||||
/// Scalar result of a named calculation.
|
||||
///
|
||||
/// The verb "rate" is historical,
|
||||
/// since TAME was developed for insurance rating systems.
|
||||
/// This represents a named expression that yields a scalar value.
|
||||
///
|
||||
/// This serves as a parent to [`Gen`][IdentKind::Gen].
|
||||
Rate(DataType),
|
||||
|
||||
/// Template definition.
|
||||
///
|
||||
/// A template is used only at expansion-time and,
|
||||
/// unlike most other things in the system,
|
||||
/// have no runtime value.
|
||||
Tpl,
|
||||
|
||||
/// User-defined data type.
|
||||
///
|
||||
/// The only types typically defined are enums and unions of enums.
|
||||
/// The type itself has no runtime value,
|
||||
/// but each of the enum variants have an associated value of type
|
||||
/// [`DataType`].
|
||||
Type(DataType),
|
||||
|
||||
/// Input map head (meta identifier generated by compiler for each input
|
||||
/// map).
|
||||
MapHead,
|
||||
|
||||
/// Input field→param mapping.
|
||||
///
|
||||
/// These may only map to [`Param`][IdentKind::Param].
|
||||
/// The source data is arbitrary and provided at runtime.
|
||||
Map,
|
||||
|
||||
/// Input map tail (meta symbol generated by compiler for each input
|
||||
/// map).
|
||||
MapTail,
|
||||
|
||||
/// Return map head (meta symbol generated by compiler for each return
|
||||
/// map).
|
||||
RetMapHead,
|
||||
|
||||
/// Return param→field mapping.
|
||||
///
|
||||
/// Return mappings export data to calling systems.
|
||||
/// They can map back any globally defined numeric expression.
|
||||
RetMap,
|
||||
|
||||
/// Return map tail (meta symbol generated by compiler for each return
|
||||
/// map).
|
||||
RetMapTail,
|
||||
|
||||
/// Arbitrary metadata.
|
||||
///
|
||||
/// This permits the definition of static key/value data that is
|
||||
/// compiled into the final executable.
|
||||
Meta,
|
||||
|
||||
/// Rating worksheet (generated by compiler for worksheet packages).
|
||||
///
|
||||
/// The worksheet exposes intermediate calculation values in a much more
|
||||
/// concise form than that of the Summary Page.
|
||||
Worksheet,
|
||||
}
|
||||
|
||||
impl<'i> TryFrom<SymAttrs<'i>> for IdentKind {
|
||||
type Error = &'static str;
|
||||
|
||||
/// Attempt to raise [`SymAttrs`] into an [`IdentKind`].
|
||||
///
|
||||
/// Certain [`IdentKind`] require that certain attributes be present,
|
||||
/// otherwise the conversion will fail.
|
||||
fn try_from(attrs: SymAttrs<'i>) -> Result<Self, Self::Error> {
|
||||
Self::try_from(&attrs)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i> TryFrom<&SymAttrs<'i>> for IdentKind {
|
||||
type Error = &'static str;
|
||||
|
||||
/// Attempt to raise [`SymAttrs`] into an [`IdentKind`].
|
||||
///
|
||||
/// Certain [`IdentKind`] require that certain attributes be present,
|
||||
/// otherwise the conversion will fail.
|
||||
fn try_from(attrs: &SymAttrs<'i>) -> Result<Self, Self::Error> {
|
||||
let ty = attrs.ty.as_ref().ok_or("missing symbol type")?;
|
||||
|
||||
macro_rules! ident {
|
||||
($to:expr) => {
|
||||
Ok($to)
|
||||
};
|
||||
($to:expr, dim) => {
|
||||
Ok($to(Dim(attrs.dim.ok_or("missing dim")?)))
|
||||
};
|
||||
($to:expr, dtype) => {
|
||||
Ok($to(attrs.dtype.ok_or("missing dtype")?))
|
||||
};
|
||||
($to:expr, dim, dtype) => {
|
||||
Ok($to(
|
||||
Dim(attrs.dim.ok_or("missing dim")?),
|
||||
attrs.dtype.ok_or("missing dtype")?,
|
||||
))
|
||||
};
|
||||
}
|
||||
|
||||
match ty {
|
||||
SymType::Cgen => ident!(Self::Cgen, dim),
|
||||
SymType::Class => ident!(Self::Class, dim),
|
||||
SymType::Const => ident!(Self::Const, dim, dtype),
|
||||
SymType::Func => ident!(Self::Func, dim, dtype),
|
||||
SymType::Gen => ident!(Self::Gen, dim, dtype),
|
||||
SymType::Lparam => ident!(IdentKind::Lparam, dim, dtype),
|
||||
SymType::Param => ident!(IdentKind::Param, dim, dtype),
|
||||
SymType::Rate => ident!(IdentKind::Rate, dtype),
|
||||
SymType::Tpl => ident!(IdentKind::Tpl),
|
||||
SymType::Type => ident!(IdentKind::Type, dtype),
|
||||
SymType::MapHead => ident!(IdentKind::MapHead),
|
||||
SymType::Map => ident!(IdentKind::Map),
|
||||
SymType::MapTail => ident!(IdentKind::MapTail),
|
||||
SymType::RetMapHead => ident!(IdentKind::RetMapHead),
|
||||
SymType::RetMap => ident!(IdentKind::RetMap),
|
||||
SymType::RetMapTail => ident!(IdentKind::RetMapTail),
|
||||
SymType::Meta => ident!(IdentKind::Meta),
|
||||
SymType::Worksheet => ident!(IdentKind::Worksheet),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Identifier dimensions.
|
||||
///
|
||||
/// This determines the number of subscripts needed to access a scalar
|
||||
/// value.
|
||||
/// A value of `0` indicates a scalar;
|
||||
/// a value of `1` indicates a vector;
|
||||
/// a value of `2` indicates a matrix;
|
||||
/// and a value of `n` indicates a multi-dimensional array of
|
||||
/// depth `n`.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
|
||||
pub struct Dim(u8);
|
||||
|
||||
/// Underlying datatype of identifier.
|
||||
///
|
||||
/// TODO: This will always be 0≤n≤9, so let's introduce a newtype for it.
|
||||
impl AsRef<str> for Dim {
|
||||
fn as_ref(&self) -> &str {
|
||||
match self.0 {
|
||||
0 => &"0",
|
||||
1 => &"1",
|
||||
2 => &"2",
|
||||
3 => &"3",
|
||||
4 => &"4",
|
||||
5 => &"5",
|
||||
6 => &"6",
|
||||
7 => &"7",
|
||||
8 => &"8",
|
||||
9 => &"9",
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Underlying datatype of identifier.
|
||||
pub type DataType = SymDtype;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use std::convert::TryInto;
|
||||
|
||||
#[test]
|
||||
fn dim_to_str() {
|
||||
// we'll just test high and low
|
||||
let low: &str = Dim(0).as_ref();
|
||||
let high: &str = Dim(9).as_ref();
|
||||
|
||||
assert_eq!("0", low);
|
||||
assert_eq!("9", high);
|
||||
}
|
||||
|
||||
macro_rules! test_kind {
|
||||
($name:ident, $src:expr => $dest:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!(
|
||||
Ok($dest),
|
||||
SymAttrs {
|
||||
ty: Some($src),
|
||||
..Default::default()
|
||||
}
|
||||
.try_into()
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
($name:ident, $src:expr => $dest:expr, dim) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let dim = 1;
|
||||
|
||||
assert_eq!(
|
||||
Ok($dest(Dim(dim))),
|
||||
SymAttrs {
|
||||
ty: Some($src),
|
||||
dim: Some(dim),
|
||||
..Default::default()
|
||||
}
|
||||
.try_into()
|
||||
);
|
||||
|
||||
// no dim
|
||||
IdentKind::try_from(SymAttrs {
|
||||
ty: Some($src),
|
||||
..Default::default()
|
||||
})
|
||||
.expect_err("must fail when missing dim");
|
||||
}
|
||||
};
|
||||
|
||||
($name:ident, $src:expr => $dest:expr, dtype) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let dtype = SymDtype::Float;
|
||||
|
||||
assert_eq!(
|
||||
Ok($dest(dtype)),
|
||||
SymAttrs {
|
||||
ty: Some($src),
|
||||
dtype: Some(dtype),
|
||||
..Default::default()
|
||||
}
|
||||
.try_into()
|
||||
);
|
||||
|
||||
// no dtype
|
||||
IdentKind::try_from(SymAttrs {
|
||||
ty: Some($src),
|
||||
..Default::default()
|
||||
})
|
||||
.expect_err("must fail when missing dtype");
|
||||
}
|
||||
};
|
||||
|
||||
($name:ident, $src:expr => $dest:expr, dim, dtype) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let dim = 1;
|
||||
let dtype = SymDtype::Float;
|
||||
|
||||
assert_eq!(
|
||||
Ok($dest(Dim(dim), dtype)),
|
||||
SymAttrs {
|
||||
ty: Some($src),
|
||||
dim: Some(dim),
|
||||
dtype: Some(dtype),
|
||||
..Default::default()
|
||||
}
|
||||
.try_into()
|
||||
);
|
||||
|
||||
// no dim
|
||||
IdentKind::try_from(SymAttrs {
|
||||
ty: Some($src),
|
||||
dtype: Some(dtype),
|
||||
..Default::default()
|
||||
})
|
||||
.expect_err("must fail when missing dim");
|
||||
|
||||
// no dtype
|
||||
IdentKind::try_from(SymAttrs {
|
||||
ty: Some($src),
|
||||
dim: Some(dim),
|
||||
..Default::default()
|
||||
})
|
||||
.expect_err("must fail when missing dtype");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_kind!(cgen, SymType::Cgen => IdentKind::Cgen, dim);
|
||||
test_kind!(class, SymType::Class => IdentKind::Class, dim);
|
||||
test_kind!(r#const, SymType::Const => IdentKind::Const, dim, dtype);
|
||||
test_kind!(func, SymType::Func => IdentKind::Func, dim, dtype);
|
||||
test_kind!(gen, SymType::Gen => IdentKind::Gen, dim, dtype);
|
||||
test_kind!(lparam, SymType::Lparam => IdentKind::Lparam, dim, dtype);
|
||||
test_kind!(param, SymType::Param => IdentKind::Param, dim, dtype);
|
||||
test_kind!(rate, SymType::Rate => IdentKind::Rate, dtype);
|
||||
test_kind!(tpl, SymType::Tpl => IdentKind::Tpl);
|
||||
test_kind!(r#type, SymType::Type => IdentKind::Type, dtype);
|
||||
test_kind!(maphead, SymType::MapHead => IdentKind::MapHead);
|
||||
test_kind!(map, SymType::Map => IdentKind::Map);
|
||||
test_kind!(maptail, SymType::MapTail => IdentKind::MapTail);
|
||||
test_kind!(retmaphead, SymType::RetMapHead => IdentKind::RetMapHead);
|
||||
test_kind!(retmap, SymType::RetMap => IdentKind::RetMap);
|
||||
test_kind!(retmaptail, SymType::RetMapTail => IdentKind::RetMapTail);
|
||||
test_kind!(meta, SymType::Meta => IdentKind::Meta);
|
||||
test_kind!(worksheet, SymType::Worksheet => IdentKind::Worksheet);
|
||||
}
|
|
@ -0,0 +1,192 @@
|
|||
// Abstract semantic graph (ASG) intermediate representation (IR)
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Abstract semantic graph.
|
||||
//!
|
||||
//! The [abstract semantic graph][asg] (ASG) is an IR representing the
|
||||
//! relationship between objects using a directed [graph][].
|
||||
//! An _object_ is an identifier or expression.
|
||||
//!
|
||||
//! Since TAME is a declarative language,
|
||||
//! the ASG does not represent control flow;
|
||||
//! instead, it represents the relationship between objects and their
|
||||
//! dependencies.
|
||||
//! Control flow is determined solely by the [linker][crate::ld] based on
|
||||
//! these dependencies.
|
||||
//!
|
||||
//! See [`crate::global`] for available index sizes depending on context.
|
||||
//! For example,
|
||||
//! a linker may choose to use [`crate::global::ProgIdentSize`];
|
||||
//!
|
||||
//!
|
||||
//! Graph Structure
|
||||
//! ===============
|
||||
//! Each node (vector) in the graph represents an [object][Object],
|
||||
//! such as an identifier or an expression.
|
||||
//! Each directed edge `(A->B)` represents that `A` depends upon `B`.
|
||||
//!
|
||||
//! Graphs may contain cycles for recursive functions—that is,
|
||||
//! TAME's ASG is _not_ a DAG.
|
||||
//! Mutually recursive functions are therefore represented as
|
||||
//! [strongly connected components][scc].
|
||||
//!
|
||||
//! [asg]: https://en.wikipedia.org/wiki/Abstract_semantic_graph
|
||||
//! [graph]: https://en.wikipedia.org/wiki/Graph_(discrete_mathematics)
|
||||
//! [scc]: https://en.wikipedia.org/wiki/Strongly_connected_component
|
||||
//!
|
||||
//! Each object may have a number of valid states;
|
||||
//! see [`Object`] for valid object states and transitions.
|
||||
//!
|
||||
//!
|
||||
//! How To Use
|
||||
//! ==========
|
||||
//! A suitable concrete [`Asg`] implementation is provided by
|
||||
//! [`DefaultAsg`].
|
||||
//!
|
||||
//! ```
|
||||
//! use tamer::global;
|
||||
//! use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, Source};
|
||||
//! use tamer::sym::{Interner, DefaultInterner};
|
||||
//!
|
||||
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
//! // Be sure to choose size and initial capacities appropriate for your
|
||||
//! // situation.
|
||||
//! let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
|
||||
//!
|
||||
//! let interner = DefaultInterner::new();
|
||||
//! let identa_sym = interner.intern("identa");
|
||||
//! let identb_sym = interner.intern("identb");
|
||||
//!
|
||||
//! let identa = asg.declare(identa_sym, IdentKind::Meta, Source::default())?;
|
||||
//! let identb = asg.declare_extern(identb_sym, IdentKind::Meta)?;
|
||||
//!
|
||||
//! assert_eq!(
|
||||
//! Some(&Object::Extern(identb_sym, IdentKind::Meta)),
|
||||
//! asg.get(identb),
|
||||
//! );
|
||||
//!
|
||||
//! // Dependencies can be declared even if an identifier is
|
||||
//! // unresolved. This declares `(identa)->(identb)`.
|
||||
//! asg.add_dep(identa, identb);
|
||||
//! assert!(asg.has_dep(identa, identb));
|
||||
//!
|
||||
//! // TODO: extern resolution
|
||||
//!
|
||||
//! // Identifiers are indexed by symbol name.
|
||||
//! assert_eq!(Some(identa), asg.lookup(identa_sym));
|
||||
//! #
|
||||
//! # Ok(()) // main
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! Missing Identifiers
|
||||
//! -------------------
|
||||
//! Since identifiers in TAME can be defined in any order relative to their
|
||||
//! dependencies within a source file,
|
||||
//! it is often the case that a dependency will have to be added to the
|
||||
//! graph before it is resolved.
|
||||
//! For example,
|
||||
//! [`Asg::add_dep_lookup`] will add an [`Object::Missing`] to the graph
|
||||
//! if either identifier has not yet been declared.
|
||||
//!
|
||||
//! ```
|
||||
//! # use tamer::global;
|
||||
//! # use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, FragmentText, Source};
|
||||
//! # use tamer::sym::{Interner, DefaultInterner};
|
||||
//! #
|
||||
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
//! # let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
|
||||
//! # let interner = DefaultInterner::new();
|
||||
//! #
|
||||
//! let identa_sym = interner.intern("identa");
|
||||
//! let identb_sym = interner.intern("identb");
|
||||
//! let (identa, identb) = asg.add_dep_lookup(identa_sym, identb_sym);
|
||||
//!
|
||||
//! assert_eq!(Some(&Object::Missing(identa_sym)), asg.get(identa));
|
||||
//! assert_eq!(Some(&Object::Missing(identb_sym)), asg.get(identb));
|
||||
//!
|
||||
//! // The identifiers returned above are proper objects on the graph.
|
||||
//! assert_eq!(Some(identa), asg.lookup(identa_sym));
|
||||
//! assert_eq!(Some(identb), asg.lookup(identb_sym));
|
||||
//!
|
||||
//! // Once declared, the missing identifier changes state and dependencies
|
||||
//! // are retained.
|
||||
//! asg.declare(identa_sym, IdentKind::Meta, Source::default())?;
|
||||
//!
|
||||
//! assert_eq!(
|
||||
//! Some(&Object::Ident(identa_sym, IdentKind::Meta, Source::default())),
|
||||
//! asg.get(identa),
|
||||
//! );
|
||||
//!
|
||||
//! assert!(asg.has_dep(identa, identb));
|
||||
//! #
|
||||
//! # Ok(()) // main
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! Fragments
|
||||
//! ---------
|
||||
//! A compiled fragment can be attached to any resolved identifier (see
|
||||
//! [`Object::Ident`]) using [`Asg::set_fragment`].
|
||||
//! Doing so changes the state of the identifier to [`Object::IdentFragment`],
|
||||
//! and it is an error to attempt to overwrite that fragment once it is
|
||||
//! set.
|
||||
//!
|
||||
//! ```
|
||||
//! # use tamer::global;
|
||||
//! # use tamer::ir::asg::{Asg, DefaultAsg, IdentKind, Object, FragmentText, Source};
|
||||
//! # use tamer::sym::{Interner, DefaultInterner};
|
||||
//! #
|
||||
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
//! # let mut asg = DefaultAsg::<global::PkgIdentSize>::with_capacity(1024, 1024);
|
||||
//! # let interner = DefaultInterner::new();
|
||||
//! #
|
||||
//! // Fragments can be attached to resolved identifiers.
|
||||
//! let ident = asg.declare(
|
||||
//! interner.intern("ident"), IdentKind::Meta, Source::default()
|
||||
//! )?;
|
||||
//! asg.set_fragment(ident, FragmentText::from("test fragment"))?;
|
||||
//!
|
||||
//! assert_eq!(
|
||||
//! Some(&Object::IdentFragment(
|
||||
//! interner.intern("ident"),
|
||||
//! IdentKind::Meta,
|
||||
//! Source::default(),
|
||||
//! FragmentText::from("test fragment"),
|
||||
//! )),
|
||||
//! asg.get(ident),
|
||||
//! );
|
||||
//!
|
||||
//! // But overwriting will fail
|
||||
//! let bad = asg.set_fragment(ident, FragmentText::from("overwrite"));
|
||||
//! assert!(bad.is_err());
|
||||
//! #
|
||||
//! # Ok(()) // main
|
||||
//! # }
|
||||
//! ```
|
||||
|
||||
mod base;
|
||||
mod graph;
|
||||
mod ident;
|
||||
mod object;
|
||||
|
||||
pub use graph::{Asg, AsgResult, ObjectRef};
|
||||
pub use ident::{Dim, IdentKind};
|
||||
pub use object::{FragmentText, Object, Source};
|
||||
|
||||
/// Default concrete ASG implementation.
|
||||
pub type DefaultAsg<'i, Ix> = base::BaseAsg<'i, Ix>;
|
|
@ -0,0 +1,230 @@
|
|||
// Objects represented on ASG
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Objects represented by the ASG.
|
||||
//!
|
||||
//! _This is a private module.
|
||||
//! See [`super`] for available exports._
|
||||
|
||||
use super::ident::IdentKind;
|
||||
use crate::ir::legacyir::SymAttrs;
|
||||
use crate::sym::Symbol;
|
||||
|
||||
/// Type of object.
|
||||
///
|
||||
/// These types represent object states:
|
||||
///
|
||||
/// ```text
|
||||
/// ,-> (Missing) -------.
|
||||
/// / \ \
|
||||
/// / v v
|
||||
/// ((Empty)) -> (Extern) -> ((Ident)) -> ((IdentFragment)).
|
||||
/// \ ^ /
|
||||
/// \ / \ /
|
||||
/// `--------------------` `-----------'
|
||||
/// ```
|
||||
///
|
||||
/// The [`Empty`][Object::Empty] state is never directly accessable
|
||||
/// through [`Asg`][super::Asg]'s public API,
|
||||
/// as it represents the _absence_ of an object at that node within the
|
||||
/// ASG.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Object<'i> {
|
||||
/// An identifier is expected to be defined but is not yet available.
|
||||
///
|
||||
/// This variant contains the symbol representing the name of the
|
||||
/// expected identifier.
|
||||
/// By defining an object as missing,
|
||||
/// this allows the graph to be built incrementally as objects are
|
||||
/// discovered.
|
||||
///
|
||||
/// Note that this is different than [`Empty`][Object::Empty].
|
||||
Missing(&'i Symbol<'i>),
|
||||
|
||||
/// A resolved identifier.
|
||||
///
|
||||
/// This represents an identifier that has been declared with certain
|
||||
/// type information.
|
||||
Ident(&'i Symbol<'i>, IdentKind, Source<'i>),
|
||||
|
||||
/// An identifier that has not yet been resolved.
|
||||
///
|
||||
/// Externs are upgraded to [`Object::Ident`] once an identifier of
|
||||
/// the same name is loaded.
|
||||
/// It is an error if the loaded identifier does not have a compatible
|
||||
/// [`IdentKind`].
|
||||
Extern(&'i Symbol<'i>, IdentKind),
|
||||
|
||||
/// Identifier with associated text.
|
||||
///
|
||||
/// Code fragments are portions of the target language associated with
|
||||
/// an identifier.
|
||||
/// They are produced by the compiler and it is the job of the
|
||||
/// [linker][crate::ld] to put them into the correct order for the
|
||||
/// final executable.
|
||||
IdentFragment(&'i Symbol<'i>, IdentKind, Source<'i>, FragmentText),
|
||||
|
||||
/// The empty node (default value for indexer).
|
||||
///
|
||||
/// This is not a valid state accessible via [`Asg`][super::Asg].
|
||||
///
|
||||
/// Note that this is different than [`Missing`][Object::Missing].
|
||||
Empty,
|
||||
}
|
||||
|
||||
/// Compiled fragment for identifier.
|
||||
///
|
||||
/// This represents the text associated with an identifier.
|
||||
pub type FragmentText = String;
|
||||
|
||||
/// Metadata about the source of an object.
|
||||
///
|
||||
/// This contains information from the symbol table that does not belong on
|
||||
/// [`IdentKind`],
|
||||
/// since that stores _type_ information.
|
||||
///
|
||||
/// TODO: This does not currently store byte offsets within the source file
|
||||
/// since the original XSLT-based compiler did not have that capability;
|
||||
/// this will provide that information in the future.
|
||||
#[derive(Debug, Default, PartialEq, Clone)]
|
||||
pub struct Source<'i> {
|
||||
/// Name of package containing reference to this object.
|
||||
pub pkg_name: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// Relative path to the source of this object,
|
||||
/// if not present in the current package.
|
||||
pub src: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// The identifier from which this one is derived.
|
||||
///
|
||||
/// See [`IdentKind`] for more information on parents.
|
||||
/// For example,
|
||||
/// a [`IdentKind::Cgen`] always has a parent [`IdentKind::Class`].
|
||||
pub parent: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// Child identifier associated with this identifier.
|
||||
///
|
||||
/// For [`IdentKind::Class`],
|
||||
/// this represents an associated [`IdentKind::Cgen`].
|
||||
pub yields: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// User-friendly identifier description.
|
||||
///
|
||||
/// This is used primarily by [`IdentKind::Class`] and
|
||||
/// [`IdentKind::Gen`].
|
||||
pub desc: Option<String>,
|
||||
|
||||
/// Whether this identifier was generated by the compiler.
|
||||
///
|
||||
/// A generated identifier is representative of an internal
|
||||
/// implementation detail that should remain encapsulated from the
|
||||
/// user and is subject to change over time.
|
||||
///
|
||||
/// Identifiers created by templates are not considered to be generated.
|
||||
pub generated: bool,
|
||||
|
||||
/// Related identifiers.
|
||||
///
|
||||
/// These data represent a kluge created to add additional symbol
|
||||
/// information in two different contexts:
|
||||
///
|
||||
/// - [`IdentKind::Map`] includes the name of the source field; and
|
||||
/// - [`IdentKind::Func`] lists params in order (so that the compiler
|
||||
/// knows application order).
|
||||
///
|
||||
/// TODO: We have `parent`, `yields`, and `from`.
|
||||
/// We should begin to consolodate.
|
||||
pub from: Option<Vec<&'i Symbol<'i>>>,
|
||||
|
||||
/// Whether identifier is virtual (can be overridden).
|
||||
///
|
||||
/// This feature adds complexity and will ideally be removed in the
|
||||
/// future.
|
||||
///
|
||||
/// See also [`override`][Source::override_].
|
||||
pub virtual_: bool,
|
||||
|
||||
/// Whether identifier overrides a virtual identifier.
|
||||
///
|
||||
/// This feature adds complexity and will ideally be removed in the
|
||||
/// future.
|
||||
///
|
||||
/// See also [`virtual_`][Source::virtual_].
|
||||
pub override_: bool,
|
||||
}
|
||||
|
||||
impl<'i> From<SymAttrs<'i>> for Source<'i> {
|
||||
/// Raise Legacy IR [`SymAttrs`].
|
||||
///
|
||||
/// This simply extracts a subset of fields from the source attributes.
|
||||
fn from(attrs: SymAttrs<'i>) -> Self {
|
||||
Source {
|
||||
pkg_name: attrs.pkg_name,
|
||||
src: attrs.src,
|
||||
generated: attrs.generated,
|
||||
parent: attrs.parent,
|
||||
yields: attrs.yields,
|
||||
desc: attrs.desc,
|
||||
from: attrs.from,
|
||||
virtual_: attrs.virtual_,
|
||||
override_: attrs.override_,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::sym::SymbolIndex;
|
||||
|
||||
#[test]
|
||||
fn source_from_sym_attrs() {
|
||||
let nsym = Symbol::new_dummy(SymbolIndex::from_u32(1), "name");
|
||||
let ssym = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
|
||||
let psym = Symbol::new_dummy(SymbolIndex::from_u32(3), "parent");
|
||||
let ysym = Symbol::new_dummy(SymbolIndex::from_u32(4), "yields");
|
||||
let fsym = Symbol::new_dummy(SymbolIndex::from_u32(5), "from");
|
||||
|
||||
let attrs = SymAttrs {
|
||||
pkg_name: Some(&nsym),
|
||||
src: Some(&ssym),
|
||||
generated: true,
|
||||
parent: Some(&psym),
|
||||
yields: Some(&ysym),
|
||||
desc: Some("sym desc".to_string()),
|
||||
from: Some(vec![&fsym]),
|
||||
virtual_: true,
|
||||
override_: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
Source {
|
||||
pkg_name: Some(&nsym),
|
||||
src: Some(&ssym),
|
||||
generated: attrs.generated,
|
||||
parent: attrs.parent,
|
||||
yields: attrs.yields,
|
||||
desc: Some("sym desc".to_string()),
|
||||
from: Some(vec![&fsym]),
|
||||
virtual_: true,
|
||||
override_: true,
|
||||
},
|
||||
attrs.into(),
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,347 @@
|
|||
// Legacy IR
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Legacy IR faithful to the XSLT-based compiler.
|
||||
//!
|
||||
//! This represents the intermediate format (IR) used by the `xmlo` files
|
||||
//! (see [`crate::obj::xmlo`]) originally produced by the XSLT-based
|
||||
//! compiler.
|
||||
//! It consists largely of metadata for object symbols.
|
||||
//!
|
||||
//! This IR should be converted into a higher-level IR quickly,
|
||||
//! especially considering that it will be going away in the future.
|
||||
|
||||
use crate::sym::Symbol;
|
||||
use std::convert::TryFrom;
|
||||
use std::result::Result;
|
||||
|
||||
/// Toplevel package attributes.
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub struct PackageAttrs<'i> {
|
||||
/// Unique package identifier.
|
||||
///
|
||||
/// The package name is derived from the filename relative to the
|
||||
/// project root during compilation (see `relroot`).
|
||||
pub name: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// Relative path from package to project root.
|
||||
pub relroot: Option<String>,
|
||||
|
||||
/// Whether this package is a program.
|
||||
///
|
||||
/// A _program_ is a package intended to be linked into a final
|
||||
/// executable.
|
||||
/// Programs cannot be imported by other packages.
|
||||
/// Non-program packages cannot be linked.
|
||||
pub program: bool,
|
||||
|
||||
/// Symbol representing package eligibility.
|
||||
///
|
||||
/// A package is _eligible_ for computation if certain invariants are
|
||||
/// met.
|
||||
/// This symbol is responsible for including each of those invariants as
|
||||
/// dependencies so that they are included at link-time.
|
||||
pub elig: Option<&'i Symbol<'i>>,
|
||||
}
|
||||
|
||||
/// Symbol attributes.
|
||||
///
|
||||
/// This is a subset of all available attributes available on the
|
||||
/// `preproc:sym` nodes;
|
||||
/// more will be added as needed.
|
||||
///
|
||||
/// Not all symbols share the same set of attributes,
|
||||
/// so this represents the union of all possible attribute sets.
|
||||
///
|
||||
/// Due to the number of possible attributes,
|
||||
/// this is not an opaque type.
|
||||
/// Consequently,
|
||||
/// valid values should be enforced by the Rust's type system.
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub struct SymAttrs<'i> {
|
||||
/// Relative path to the package that defined this symbol.
|
||||
///
|
||||
/// Object files store relative paths so that they are somewhat
|
||||
/// portable—the
|
||||
/// entire project root should be able to be relocated.
|
||||
pub src: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// Symbol type.
|
||||
///
|
||||
/// The type describes the purpose of the symbol and determines both how
|
||||
/// it is compiled and its location in the final executable.
|
||||
pub ty: Option<SymType>,
|
||||
|
||||
/// Number of dimensions.
|
||||
///
|
||||
/// This determines the number of subscripts needed to access a scalar
|
||||
/// value.
|
||||
/// A value of `0` indicates a scalar;
|
||||
/// a value of `1` indicates a vector;
|
||||
/// a value of `2` indicates a matrix;
|
||||
/// and a value of `n` indicates a multi-dimensional array of
|
||||
/// depth `n`.
|
||||
pub dim: Option<u8>,
|
||||
|
||||
/// Type of underlying data.
|
||||
///
|
||||
/// This is not a primitive,
|
||||
/// and mostly represents whether or not floating point computations
|
||||
/// will take place.
|
||||
pub dtype: Option<SymDtype>,
|
||||
|
||||
/// Whether the symbol's location will be determined at link-time.
|
||||
///
|
||||
/// Externs allow symbols to be referenced without having yet been given
|
||||
/// a concrete definition,
|
||||
/// provided that an eventual concrete definition matches the
|
||||
/// provided declaration.
|
||||
/// The linker (see [`crate::ld`]) is responsible for ensuring that the
|
||||
/// extern is satisfied and properly located in the final executable.
|
||||
pub extern_: bool,
|
||||
|
||||
/// Unique package identifier.
|
||||
///
|
||||
/// The name of a package is automatically derived from the package path
|
||||
/// relative to the project root.
|
||||
/// _Note that this is problematic if one wants to compile the equivalent
|
||||
/// of shared libraries._
|
||||
pub pkg_name: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// The identifier from which this one is derived.
|
||||
///
|
||||
/// For example,
|
||||
/// [`SymType::Cgen`] has a parent [`SymType::Class`] and
|
||||
/// [`SymType::Gen`] has a parent [`SymType::Rate`].
|
||||
pub parent: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// Whether this identifier was generated by the compiler.
|
||||
///
|
||||
/// A generated identifier is representative of an internal
|
||||
/// implementation detail that should remain encapsulated from the
|
||||
/// user and is subject to change over time.
|
||||
///
|
||||
/// Identifiers created by templates are not considered to be generated.
|
||||
pub generated: bool,
|
||||
|
||||
/// Child identifier associated with this identifier.
|
||||
///
|
||||
/// For [`SymType::Class`],
|
||||
/// this represents an associated [`SymType::Cgen`].
|
||||
pub yields: Option<&'i Symbol<'i>>,
|
||||
|
||||
/// User-friendly identifier description.
|
||||
///
|
||||
/// This is used primarily by [`SymType::Class`] and [`SymType::Gen`].
|
||||
pub desc: Option<String>,
|
||||
|
||||
/// Related identifiers.
|
||||
///
|
||||
/// These data represent a kluge created to add additional symbol
|
||||
/// information in two different contexts:
|
||||
///
|
||||
/// - [`SymType::Map`] includes the name of the source field; and
|
||||
/// - [`SymType::Func`] lists params in order (so that the compiler
|
||||
/// knows application order).
|
||||
pub from: Option<Vec<&'i Symbol<'i>>>,
|
||||
|
||||
/// Whether symbol can be overridden.
|
||||
///
|
||||
/// See also [`override`][SymAttrs::override_].
|
||||
pub virtual_: bool,
|
||||
|
||||
/// Whether symbol is an override of a virtual symbol.
|
||||
///
|
||||
/// See also [`virtual`][SymAttrs::virtual_].
|
||||
pub override_: bool,
|
||||
}
|
||||
|
||||
/// Legacy symbol types.
|
||||
///
|
||||
/// This enum represents all symbol types represented in the `xmlo` files.
|
||||
/// They are overly specialized and will be deprecated in favor of more
|
||||
/// generalized dependent types in later IRs.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum SymType {
|
||||
/// Classification generator (from `lv:classify/@yields`).
|
||||
Cgen,
|
||||
/// Classification (from `lv:classify/@as`).
|
||||
Class,
|
||||
/// Constant (from `lv:const/@name`).
|
||||
Const,
|
||||
/// Function (from `lv:function/@name`).
|
||||
Func,
|
||||
/// Generator (from `lv:rate/@generates`).
|
||||
Gen,
|
||||
/// Local function parameter (from `lv:function/lv:param/@name`) or let
|
||||
/// binding (from `lv:let/lv:values/lv:value/@name`).
|
||||
Lparam,
|
||||
/// Global parameter (from `lv:param/@name`).
|
||||
Param,
|
||||
/// Scalar calculation result (from `lv:rate/@yields`).
|
||||
Rate,
|
||||
/// Template (from `lv:template/@name`).
|
||||
Tpl,
|
||||
/// Typedef (from `lv:type/@name`).
|
||||
Type,
|
||||
/// Input map head (meta symbol generated by compiler for each input map).
|
||||
MapHead,
|
||||
/// Input field→param mapping (from `lvm:map`, `lvm:pass`).
|
||||
Map,
|
||||
/// Input map tail (meta symbol generated by compiler for each input map).
|
||||
MapTail,
|
||||
/// Return map head (meta symbol generated by compiler for each return map).
|
||||
RetMapHead,
|
||||
/// Return param→field mapping (from `lvm:map`, `lvm:pass`).
|
||||
RetMap,
|
||||
/// Return map tail (meta symbol generated by compiler for each return map).
|
||||
RetMapTail,
|
||||
/// Arbitrary metadata (from `lv:meta`).
|
||||
Meta,
|
||||
/// Rating worksheet (generated by compiler for worksheet packages).
|
||||
Worksheet,
|
||||
}
|
||||
|
||||
impl TryFrom<&[u8]> for SymType {
|
||||
type Error = String;
|
||||
|
||||
/// Determine symbol type from source `preproc:sym/@type`.
|
||||
///
|
||||
/// This raises source `xmlo` data into this IR.
|
||||
/// See [`crate::obj::xmlo::reader`].
|
||||
fn try_from(value: &[u8]) -> Result<SymType, Self::Error> {
|
||||
match value {
|
||||
b"cgen" => Ok(SymType::Cgen),
|
||||
b"class" => Ok(SymType::Class),
|
||||
b"const" => Ok(SymType::Const),
|
||||
b"func" => Ok(SymType::Func),
|
||||
b"gen" => Ok(SymType::Gen),
|
||||
b"lparam" => Ok(SymType::Lparam),
|
||||
b"param" => Ok(SymType::Param),
|
||||
b"rate" => Ok(SymType::Rate),
|
||||
b"tpl" => Ok(SymType::Tpl),
|
||||
b"type" => Ok(SymType::Type),
|
||||
b"retmap:head" => Ok(SymType::RetMapHead),
|
||||
b"retmap" => Ok(SymType::RetMap),
|
||||
b"retmap:tail" => Ok(SymType::RetMapTail),
|
||||
b"map:head" => Ok(SymType::MapHead),
|
||||
b"map" => Ok(SymType::Map),
|
||||
b"map:tail" => Ok(SymType::MapTail),
|
||||
b"meta" => Ok(SymType::Meta),
|
||||
b"worksheet" => Ok(SymType::Worksheet),
|
||||
_ => Err(format!(
|
||||
"unknown symbol type `{}`",
|
||||
String::from_utf8(value.to_vec())
|
||||
.unwrap_or("(invalid UTF8)".into())
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Underlying datatype.
|
||||
///
|
||||
/// This is the type of scalar data stored within the given symbol.
|
||||
///
|
||||
/// *NB:* This was _not enforced_ by the XSLT-based compiler.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum SymDtype {
|
||||
/// {⊥,⊤} = {0,1} ⊂ ℤ
|
||||
Boolean,
|
||||
/// ℤ
|
||||
Integer,
|
||||
/// ℝ
|
||||
Float,
|
||||
/// ∅
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl AsRef<str> for SymDtype {
|
||||
/// Produce `xmlo`-compatible representation.
|
||||
fn as_ref(&self) -> &str {
|
||||
match self {
|
||||
SymDtype::Boolean => &"boolean",
|
||||
SymDtype::Integer => &"integer",
|
||||
SymDtype::Float => &"float",
|
||||
SymDtype::Empty => &"empty",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&[u8]> for SymDtype {
|
||||
type Error = String;
|
||||
|
||||
/// Determine data type from source `preproc:sym/@dtype`.
|
||||
///
|
||||
/// This raises source `xmlo` data into this IR.
|
||||
/// See [`crate::obj::xmlo::reader`].
|
||||
fn try_from(value: &[u8]) -> Result<SymDtype, Self::Error> {
|
||||
match value {
|
||||
b"boolean" => Ok(SymDtype::Boolean),
|
||||
b"integer" => Ok(SymDtype::Integer),
|
||||
b"float" => Ok(SymDtype::Float),
|
||||
b"empty" => Ok(SymDtype::Empty),
|
||||
_ => Err(format!(
|
||||
"unknown symbol dtype `{}`",
|
||||
String::from_utf8(value.to_vec())
|
||||
.unwrap_or("(invalid UTF8)".into())
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
// We're not going to check every possible value here since we'd be
|
||||
// maintaining the mapping in two places; we can leave that to
|
||||
// integration tests.
|
||||
#[test]
|
||||
fn symtype_from_u8() {
|
||||
assert_eq!(Ok(SymType::Cgen), SymType::try_from(b"cgen" as &[u8]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symtype_failure_from_unknown_u8() {
|
||||
match SymType::try_from(b"unknown" as &[u8]) {
|
||||
Err(s) => assert!(s.contains("unknown")),
|
||||
bad => panic!("expected error: {:?}", bad),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symdtype_from_u8() {
|
||||
assert_eq!(
|
||||
Ok(SymDtype::Integer),
|
||||
SymDtype::try_from(b"integer" as &[u8])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symdtype_failure_from_unknown_u8() {
|
||||
match SymDtype::try_from(b"unknownd" as &[u8]) {
|
||||
Err(s) => assert!(s.contains("unknownd")),
|
||||
bad => panic!("expected error: {:?}", bad),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symdtype_as_str() {
|
||||
let boolean: &str = SymDtype::Boolean.as_ref();
|
||||
assert_eq!("boolean", boolean);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
// Intermediate representations (IRs)
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Intermediate representations for TAME programs.
|
||||
//!
|
||||
//! [Intermediate representations][ir] (IRs) are data structures used to
|
||||
//! represent source data in a manner most suitable for a particular phase
|
||||
//! of compilation.
|
||||
//! A single IR may be used by multiple compilation phases,
|
||||
//! or by multiple systems (e.g. various compilers or [linkers][]).
|
||||
//!
|
||||
//! [ir]: https://en.wikipedia.org/wiki/Intermediate_representation
|
||||
//! [linkers]: crate::ld
|
||||
//!
|
||||
//!
|
||||
//! Implicit AST
|
||||
//! ============
|
||||
//! Each input language begins as an [abstract syntax tree][ast] (AST),
|
||||
//! produced by the parser.
|
||||
//! For TAME languages that are XML-based,
|
||||
//! the production of the AST is handled by [`quick_xml`],
|
||||
//! and is effectively the same as the source XML.
|
||||
//! There is no explicit data structure to represent the AST of XML
|
||||
//! sources.
|
||||
//!
|
||||
//! [ast]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
|
||||
//!
|
||||
//!
|
||||
//! Summary of IRs
|
||||
//! ==============
|
||||
//! There are currently two IRs:
|
||||
//!
|
||||
//! 1. **[Legacy IR](legacyir)** corresponds very closely to the structure
|
||||
//! of [`xmlo` object files](super::obj::xmlo).
|
||||
//! It contains a lot of cruft and will be replaced in the future with
|
||||
//! a more suitable IR.
|
||||
//! This stores very limited context for the information it provides,
|
||||
//! so it must quickly translate it to a higher-level IR for further
|
||||
//! processing before context is lost.
|
||||
//! 2. The **[Abstract Semantic Graph (ASG)](asg)** is created from
|
||||
//! lower-level IRs.
|
||||
//! It stores relationships between identifiers and expressions within
|
||||
//! a graph data structure,
|
||||
//! and is capable of representing entire programs composed of many
|
||||
//! different packages.
|
||||
|
||||
pub mod asg;
|
||||
pub mod legacyir;
|
|
@ -15,4 +15,96 @@
|
|||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Combine [object files](crate::obj) into a final executable.
|
||||
//!
|
||||
//! It's user-facing binary is [`tameld`](../../tameld).
|
||||
//!
|
||||
//!
|
||||
//! Background Information
|
||||
//! ======================
|
||||
//! A [linker][] is responsible for combining individually compiled
|
||||
//! [object files](crate::obj) containing relocatable code into a final
|
||||
//! executable.
|
||||
//! This involves putting the compiled code fragments into the right order
|
||||
//! and into the right place within the executable.
|
||||
//!
|
||||
//! [linker]: https://en.wikipedia.org/wiki/Linker_(computing)
|
||||
//!
|
||||
//! _See below for more information on why this linker currently produces
|
||||
//! another intermediate format (`xmle`) rather than a final executable._
|
||||
//!
|
||||
//! The type of relocatable code depends on the _target_.
|
||||
//! Currently, the only target is JavaScript.
|
||||
//!
|
||||
//!
|
||||
//! Backwards-Compatibility With XSLT System
|
||||
//! -------------------------------------
|
||||
//! This linker is part of the TAMER (TAME in Rust) project,
|
||||
//! which aims to incrementally rewrite TAME in Rust.
|
||||
//! Consequently, it must be able to serve as a drop-in replacement for the
|
||||
//! existing (XSLT) linker,
|
||||
//! which takes as input `xmlo` files and produces as output an `xmle`
|
||||
//! file.
|
||||
//! *This is not efficient*,
|
||||
//! and future versions will begin to migrate away from this strategy.
|
||||
//!
|
||||
//! The output `xmle` file can then be fed to a `standalone` command which
|
||||
//! extracts the JavaScript fragment and places it into its own file.
|
||||
//! Even when that is replaced
|
||||
//! (when this just outputs a final JS file directly),
|
||||
//! the `xmle` file is still needed for other purposes,
|
||||
//! such as `summary` and `dote` generation.
|
||||
//! Those too will eventually be linker targets.
|
||||
//!
|
||||
//!
|
||||
//! Linking Process
|
||||
//! ===============
|
||||
//! The linker works in the following steps:
|
||||
//!
|
||||
//! 1. [Object files](crate::obj) are recursively read.
|
||||
//! They are used in a streaming manner for the next step of the
|
||||
//! process;
|
||||
//! they do not persist in memory.
|
||||
//! Only the required portions of the file are loaded.
|
||||
//! See the [Legacy IR](crate::ir::legacyir) for more information.
|
||||
//!
|
||||
//! 2. This information is used to populate the [ASG].
|
||||
//! Information is added to the graph as it is discovered during object
|
||||
//! file loading,
|
||||
//! so the graph initially contains edges to missing identifiers.
|
||||
//! Expressions are _not_ added to the graph,
|
||||
//! as they are not needed for linking.
|
||||
//! Once all data are loaded,
|
||||
//! the ASG contains relocatable code fragments for each identifier.
|
||||
//!
|
||||
//! 3. The ASG is [sorted topologically][topo-sort] so that dependencies
|
||||
//! will be written to the executable file before identifiers that
|
||||
//! depend on them.
|
||||
//! Roots for the sort are specified by the return map.
|
||||
//! _Identifiers that are not accessable from one of those roots will be
|
||||
//! omitted from the executable output._
|
||||
//!
|
||||
//! 4. Relocatable code fragments are output into various sections in the
|
||||
//! executable file.
|
||||
//! This output file is currently `xmle`.
|
||||
//! (**TODO**: Link to new `xmle` crate.)
|
||||
//!
|
||||
//! [ASG]: crate::ir::asg
|
||||
//! [topo-sort]: https://en.wikipedia.org/wiki/Topological_sorting
|
||||
//!
|
||||
//! Steps 1 and 2 are performed at the same time:
|
||||
//! object files are used to immediately populate the [ASG][].
|
||||
//! Since the ASG contains only partial information,
|
||||
//! it must perform other validations (such as extern resolution) during
|
||||
//! this process;
|
||||
//! see [crate::ir::asg] for more information.
|
||||
//!
|
||||
//! Because the topological sort only considered explicitly defined roots,
|
||||
//! identifiers are only included in the final executable if they are
|
||||
//! either a root or are a dependency of a root.
|
||||
//! This makes it possible to create large reusable packages without
|
||||
//! incurring a runtime cost for unused objects,
|
||||
//! which is especially important since templates may expand into many
|
||||
//! identifiers.
|
||||
|
||||
pub mod poc;
|
||||
|
|
|
@ -18,216 +18,44 @@
|
|||
//! **This is a poorly-written proof of concept; do not use!** It has been
|
||||
//! banished to its own file to try to make that more clear.
|
||||
|
||||
use fixedbitset::FixedBitSet;
|
||||
use petgraph::graph::{DiGraph, EdgeIndex, Neighbors, NodeIndex};
|
||||
use petgraph::visit::{DfsPostOrder, GraphBase, IntoNeighbors, Visitable};
|
||||
use quick_xml::events::Event;
|
||||
use quick_xml::Reader;
|
||||
use std::collections::hash_map::{Entry, Iter};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use crate::global;
|
||||
use crate::ir::asg::IdentKind;
|
||||
use crate::ir::asg::{Asg, DefaultAsg, Object, ObjectRef, Source};
|
||||
use crate::obj::xmle::writer::{Sections, XmleWriter};
|
||||
use crate::obj::xmlo::reader::{XmloError, XmloEvent, XmloReader};
|
||||
use crate::sym::{DefaultInterner, Interner, Symbol};
|
||||
use fxhash::{FxHashMap, FxHashSet};
|
||||
use petgraph::visit::DfsPostOrder;
|
||||
use std::convert::TryInto;
|
||||
use std::error::Error;
|
||||
use std::fs;
|
||||
use std::io::BufRead;
|
||||
use std::ops::{Deref, Index};
|
||||
use std::rc::Rc;
|
||||
use std::io::BufReader;
|
||||
use std::io::Cursor;
|
||||
|
||||
// The term "sym" is used throughout because it's easier to search for that
|
||||
// in source code than "symbol", which is a generic term with many different
|
||||
// meanings.
|
||||
|
||||
// if mutability is needed:
|
||||
//#[derive(Debug)]
|
||||
//struct SymRecord {
|
||||
// data: SymData,
|
||||
//
|
||||
// // the idea is to keep the index encapsulated so that nothing else can
|
||||
// // ever hold a reference to it, ensuring that it's freed when the node
|
||||
// // is removed
|
||||
// index: Rc<RefCell<Option<NodeIndex>>>,
|
||||
//}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SymData {
|
||||
name: Rc<str>,
|
||||
}
|
||||
|
||||
type DepGraphNode = SymEntry;
|
||||
type DepGraphEdge = ();
|
||||
|
||||
struct DepGraph {
|
||||
graph: DiGraph<DepGraphNode, DepGraphEdge>,
|
||||
|
||||
// serves as both a string internment system and graph indexer
|
||||
index: HashMap<Rc<str>, SymRef>,
|
||||
// if removals are permitted:
|
||||
//index: HashMap<Rc<str>, Weak<RefCell<Option<NodeIndex>>>>,
|
||||
}
|
||||
|
||||
// This encapsulates the underlying Graph to enforce certain
|
||||
// assumptions. For example, we do not permit removing nodes because that
|
||||
// would invalidate the NodeIndex reference in the index, which would then
|
||||
// require workarounds like the commented-out code above and below.
|
||||
//
|
||||
// While Petgraph's use of indexes to represent graph and edge references
|
||||
// makes it easy to bypass the borrow checker, it does just that---it's no
|
||||
// different than a pointer reference (albeit guaranteed to safely reference
|
||||
// a node rather than an arbitrary memory location) that can change out from
|
||||
// under you at any moment. As such, much of the planning that went into
|
||||
// this was determining how to best mitigate that.
|
||||
//
|
||||
// The linker has certain needs that may differ as the compiler evolves, so
|
||||
// it may be desirable to permit deletions in the future. In the meantime,
|
||||
// if a node needs to be deleted, we can simply remove all edges from it and
|
||||
// possibly mark it in a way that states it was removed.
|
||||
//
|
||||
// This graph uses a separate map to serve a dual role: a string internment
|
||||
// system and an indexer by symbol name. This will have to evolve in the
|
||||
// future as the graph ends up containing more stuff.
|
||||
//
|
||||
// This is currently called a dependency graph, since that's what we're
|
||||
// using it for, but in the future the compiler will also use it as an IR,
|
||||
// so this will likely be renamed.
|
||||
impl DepGraph {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
// TODO: with_capacity
|
||||
graph: DiGraph::new(),
|
||||
index: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn declare(&mut self, name: &str) -> SymRef {
|
||||
match self.index.entry(name.into()) {
|
||||
Entry::Occupied(o) => *o.get(),
|
||||
Entry::Vacant(v) => {
|
||||
let entry = SymEntry::MissingSym {
|
||||
name: Rc::clone(v.key()),
|
||||
};
|
||||
|
||||
let index = SymRef(self.graph.add_node(entry));
|
||||
v.insert(index);
|
||||
|
||||
index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// will not duplicate dependencies if they already exist
|
||||
fn declare_dep(&mut self, symbol: SymRef, dep: SymRef) -> () {
|
||||
self.graph.update_edge(*symbol, *dep, ());
|
||||
}
|
||||
|
||||
fn lookup(&self, name: &str) -> Option<SymRef> {
|
||||
self.index.get(name.into()).map(|index| *index)
|
||||
}
|
||||
|
||||
fn index_iter(&self) -> Iter<Rc<str>, SymRef> {
|
||||
self.index.iter()
|
||||
}
|
||||
|
||||
// POC when removals were permitted:
|
||||
//fn add_symbol(&mut self, sym: SymData) -> NodeIndex {
|
||||
// let name = Rc::clone(&sym.name);
|
||||
// let record = SymRecord { data: sym, index: Rc::new(RefCell::new(None)) };
|
||||
// let index = self.graph.add_node(record);
|
||||
|
||||
// let index = Rc::downgrade(&self.graph[index].index);
|
||||
// self.graph[index].index.replace(Some(index));
|
||||
|
||||
// self.index.insert(name, index);
|
||||
// index
|
||||
//}
|
||||
}
|
||||
|
||||
impl GraphBase for DepGraph {
|
||||
type NodeId = NodeIndex;
|
||||
type EdgeId = EdgeIndex;
|
||||
}
|
||||
|
||||
impl Visitable for DepGraph {
|
||||
type Map = FixedBitSet;
|
||||
|
||||
fn visit_map(&self) -> Self::Map {
|
||||
self.graph.visit_map()
|
||||
}
|
||||
|
||||
fn reset_map(&self, map: &mut Self::Map) {
|
||||
self.graph.reset_map(map)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoNeighbors for &'a DepGraph {
|
||||
type Neighbors = Neighbors<'a, DepGraphEdge>;
|
||||
|
||||
fn neighbors(self, n: Self::NodeId) -> Self::Neighbors {
|
||||
self.graph.neighbors(n)
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<SymRef> for DepGraph {
|
||||
type Output = DepGraphNode;
|
||||
|
||||
fn index(&self, index: SymRef) -> &Self::Output {
|
||||
&self.graph[*index]
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: we may not to allow this; using SymRef could be a means to
|
||||
// guarantee that a lookup has occurred and that it actually exists. We
|
||||
// don't need this if we set NodeId = SymRef in GraphBase, but that requires
|
||||
// implementing other traits as well.
|
||||
impl Index<NodeIndex> for DepGraph {
|
||||
type Output = DepGraphNode;
|
||||
|
||||
fn index(&self, index: NodeIndex) -> &Self::Output {
|
||||
&self.graph[index]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
struct SymRef(NodeIndex);
|
||||
|
||||
impl From<SymRef> for NodeIndex {
|
||||
fn from(symref: SymRef) -> Self {
|
||||
*symref
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NodeIndex> for SymRef {
|
||||
fn from(index: NodeIndex) -> Self {
|
||||
Self(index)
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for SymRef {
|
||||
type Target = NodeIndex;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum SymEntry {
|
||||
MissingSym { name: Rc<str> },
|
||||
}
|
||||
type LinkerAsg<'i> = DefaultAsg<'i, global::ProgIdentSize>;
|
||||
type LinkerObjectRef = ObjectRef<global::ProgIdentSize>;
|
||||
|
||||
pub fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut pkgs_seen = HashSet::<String>::new();
|
||||
let mut fragments = HashMap::<String, String>::new();
|
||||
let mut depgraph = DepGraph::new();
|
||||
let mut pkgs_seen: FxHashSet<String> = Default::default();
|
||||
let mut fragments: FxHashMap<&str, String> = Default::default();
|
||||
let mut depgraph = LinkerAsg::with_capacity(65536, 65536);
|
||||
let mut roots = Vec::new();
|
||||
let interner = DefaultInterner::new();
|
||||
|
||||
let package_path = std::env::args().nth(1).expect("Missing argument");
|
||||
let abs_path = fs::canonicalize(package_path).unwrap();
|
||||
|
||||
println!("WARNING: This is proof-of-concept; do not use!");
|
||||
|
||||
load_xmlo(
|
||||
let (name, relroot) = load_xmlo(
|
||||
&abs_path.to_str().unwrap().to_string(),
|
||||
&mut pkgs_seen,
|
||||
&mut fragments,
|
||||
&mut depgraph,
|
||||
)?;
|
||||
&interner,
|
||||
&mut roots,
|
||||
)?
|
||||
.expect("missing root package information");
|
||||
|
||||
// println!(
|
||||
// "Graph {:?}",
|
||||
|
@ -239,92 +67,152 @@ pub fn main() -> Result<(), Box<dyn Error>> {
|
|||
// .collect::<Vec<_>>()
|
||||
// );
|
||||
|
||||
let sorted = sort_deps(&depgraph);
|
||||
roots.extend(
|
||||
vec!["___yield", "___worksheet"]
|
||||
.iter()
|
||||
.map(|name| interner.intern(name))
|
||||
.filter_map(|sym| depgraph.lookup(sym)),
|
||||
);
|
||||
|
||||
println!("Sorted ({}): {:?}", sorted.len(), sorted);
|
||||
let mut sorted = sort_deps(&depgraph, &roots);
|
||||
|
||||
//println!("Sorted ({}): {:?}", sorted.len(), sorted);
|
||||
|
||||
output_xmle(
|
||||
&depgraph,
|
||||
&interner,
|
||||
&mut sorted,
|
||||
name.expect("missing root package name"),
|
||||
relroot.expect("missing root package relroot"),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_xmlo<'a>(
|
||||
fn load_xmlo<'a, 'i, I: Interner<'i>>(
|
||||
path_str: &'a str,
|
||||
pkgs_seen: &mut HashSet<String>,
|
||||
fragments: &mut HashMap<String, String>,
|
||||
depgraph: &mut DepGraph,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
pkgs_seen: &mut FxHashSet<String>,
|
||||
fragments: &mut FxHashMap<&'i str, String>,
|
||||
depgraph: &mut LinkerAsg<'i>,
|
||||
interner: &'i I,
|
||||
roots: &mut Vec<LinkerObjectRef>,
|
||||
) -> Result<Option<(Option<&'i Symbol<'i>>, Option<String>)>, Box<dyn Error>> {
|
||||
let path = fs::canonicalize(path_str)?;
|
||||
let path_str = path.to_str().unwrap();
|
||||
|
||||
let first = pkgs_seen.len() == 0;
|
||||
|
||||
if !pkgs_seen.insert(path_str.to_string()) {
|
||||
return Ok(());
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
println!("processing {}", path_str);
|
||||
//println!("processing {}", path_str);
|
||||
|
||||
let mut found = HashSet::<String>::new();
|
||||
let mut found: FxHashSet<&str> = Default::default();
|
||||
|
||||
match Reader::from_file(&path) {
|
||||
Ok(mut reader) => loop {
|
||||
let mut buf = Vec::new();
|
||||
let file = fs::File::open(&path)?;
|
||||
let reader = BufReader::new(file);
|
||||
let mut xmlo = XmloReader::new(reader, interner);
|
||||
let mut elig = None;
|
||||
|
||||
// we know that the XML produced by Saxon is valid
|
||||
reader.check_end_names(false);
|
||||
let mut name: Option<&'i Symbol<'i>> = None;
|
||||
let mut relroot: Option<String> = None;
|
||||
|
||||
match reader.read_event(&mut buf) {
|
||||
Ok(Event::Start(ele)) | Ok(Event::Empty(ele)) => {
|
||||
let mut attrs = ele.attributes();
|
||||
let mut filtered =
|
||||
attrs.with_checks(false).filter_map(Result::ok);
|
||||
loop {
|
||||
match xmlo.read_event() {
|
||||
Ok(XmloEvent::Package(attrs)) => {
|
||||
if first {
|
||||
name = attrs.name;
|
||||
relroot = attrs.relroot;
|
||||
}
|
||||
elig = attrs.elig;
|
||||
}
|
||||
|
||||
match ele.name() {
|
||||
b"preproc:sym-dep" => filtered
|
||||
.find(|attr| attr.key == b"name")
|
||||
.map(|attr| attr.value)
|
||||
.and_then(|mut name| {
|
||||
read_deps(&mut reader, depgraph, name.to_mut())
|
||||
})
|
||||
.ok_or("Missing name"),
|
||||
Ok(XmloEvent::SymDeps(sym, deps)) => {
|
||||
// TODO: API needs to expose whether a symbol is already
|
||||
// known so that we can warn on them
|
||||
|
||||
b"preproc:sym" => {
|
||||
filtered
|
||||
.find(|attr| attr.key == b"src")
|
||||
.map(|attr| attr.value.to_owned())
|
||||
.and_then(|src| {
|
||||
let path_str =
|
||||
std::str::from_utf8(&src).unwrap();
|
||||
|
||||
found.insert(path_str.to_string());
|
||||
Some(())
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
b"preproc:fragment" => filtered
|
||||
.find(|attr| attr.key == b"id")
|
||||
.map(|attr| String::from_utf8(attr.value.to_vec()))
|
||||
.and_then(|id| {
|
||||
let fragment = reader
|
||||
.read_text(ele.name(), &mut Vec::new())
|
||||
.unwrap_or("".to_string());
|
||||
|
||||
fragments.insert(id.unwrap(), fragment);
|
||||
Some(())
|
||||
})
|
||||
.ok_or("Missing fragment id"),
|
||||
_ => Ok(()),
|
||||
// Maps should not pull in symbols since we may end up
|
||||
// mapping to params that are never actually used
|
||||
if !sym.starts_with(":map:") {
|
||||
for dep_sym in deps {
|
||||
depgraph.add_dep_lookup(sym, dep_sym);
|
||||
}
|
||||
}
|
||||
Ok(Event::Eof) => break (),
|
||||
Err(e) => {
|
||||
panic!("Error at {}: {:?}", reader.buffer_position(), e);
|
||||
}
|
||||
_ => Ok(()),
|
||||
}
|
||||
.unwrap_or_else(|r| panic!("Parse error: {:?}", r));
|
||||
|
||||
buf.clear();
|
||||
},
|
||||
Err(e) => panic!("Error {:?}", e),
|
||||
Ok(XmloEvent::SymDecl(sym, attrs)) => {
|
||||
if let Some(sym_src) = attrs.src {
|
||||
found.insert(sym_src);
|
||||
} else if attrs.extern_ {
|
||||
// TODO: externs (they're implicitly handled, without
|
||||
// checks, by Missing)
|
||||
// depgraph.declare_extern(sym, kind);
|
||||
} else {
|
||||
let owned = attrs.src.is_none();
|
||||
|
||||
let kind = (&attrs).try_into().map_err(|err| {
|
||||
format!("sym `{}` attrs error: {}", sym, err)
|
||||
});
|
||||
|
||||
let mut src: Source = attrs.into();
|
||||
|
||||
// Existing convention is to omit @src of local package
|
||||
// (in this case, the program being linked)
|
||||
if first {
|
||||
src.pkg_name = None;
|
||||
}
|
||||
|
||||
// TODO: should probably track these down in the XSLT linker...
|
||||
match kind {
|
||||
Ok(kindval) => {
|
||||
// TODO: inefficient
|
||||
let link_root = owned
|
||||
&& (kindval == IdentKind::Meta
|
||||
|| kindval == IdentKind::Map
|
||||
|| kindval == IdentKind::RetMap);
|
||||
|
||||
let node = depgraph.declare(sym, kindval, src)?;
|
||||
|
||||
if link_root {
|
||||
roots.push(node);
|
||||
}
|
||||
}
|
||||
Err(e) => println!("{:?}; skipping...", e),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(XmloEvent::Fragment(sym, text)) => {
|
||||
let result = depgraph.set_fragment(
|
||||
depgraph.lookup(sym).unwrap_or_else(|| {
|
||||
panic!("missing symbol for fragment: {}", sym)
|
||||
}),
|
||||
text,
|
||||
);
|
||||
|
||||
match result {
|
||||
Ok(_) => (),
|
||||
Err(e) => println!("{:?}; skipping...", e),
|
||||
};
|
||||
}
|
||||
|
||||
// We don't need to read any further than the end of the
|
||||
// header (symtable, sym-deps, fragments)
|
||||
Ok(XmloEvent::Eoh) => break,
|
||||
|
||||
Err(err @ XmloError::UnassociatedFragment) => {
|
||||
println!("{:?}; skipping...", err);
|
||||
}
|
||||
|
||||
err @ Err(_) => err.map(|_| ())?,
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(elig_sym) = elig {
|
||||
roots.push(depgraph.lookup(elig_sym).expect(
|
||||
"internal error: package elig references nonexistant symbol",
|
||||
));
|
||||
}
|
||||
|
||||
let mut dir = path.clone();
|
||||
|
@ -335,118 +223,131 @@ fn load_xmlo<'a>(
|
|||
path_buf.push(relpath);
|
||||
path_buf.set_extension("xmlo");
|
||||
|
||||
//println!("Trying {:?}", path_buf);
|
||||
// println!("Trying {:?}", path_buf);
|
||||
let path_abs = path_buf.canonicalize().unwrap();
|
||||
let path = path_abs.to_str().unwrap();
|
||||
|
||||
load_xmlo(path, pkgs_seen, fragments, depgraph)?;
|
||||
load_xmlo(path, pkgs_seen, fragments, depgraph, interner, roots)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_deps<B>(
|
||||
reader: &mut Reader<B>,
|
||||
depgraph: &mut DepGraph,
|
||||
name: &[u8],
|
||||
) -> Option<()>
|
||||
where
|
||||
B: BufRead,
|
||||
{
|
||||
// TODO: API needs to expose whether a symbol is already known so that
|
||||
// we can warn on them
|
||||
// note: using from_utf8_unchecked here did _not_ improve performance
|
||||
let sym_node = depgraph.declare(std::str::from_utf8(name).unwrap());
|
||||
|
||||
//println!("processing deps for {}", sym_name);
|
||||
|
||||
loop {
|
||||
match reader.read_event(&mut Vec::new()) {
|
||||
Ok(Event::Start(ele)) | Ok(Event::Empty(ele)) => {
|
||||
let mut attrs = ele.attributes();
|
||||
let mut filtered =
|
||||
attrs.with_checks(false).filter_map(Result::ok);
|
||||
|
||||
filtered.find(|attr| attr.key == b"name").and_then(
|
||||
|mut attr| {
|
||||
let name = attr.value.to_mut();
|
||||
let str = std::str::from_utf8(name).unwrap();
|
||||
|
||||
let dep_node = depgraph.declare(&str);
|
||||
depgraph.declare_dep(sym_node, dep_node);
|
||||
|
||||
Some(())
|
||||
},
|
||||
);
|
||||
|
||||
//println!("{:?}", ele.attributes().collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
Ok(Event::Eof) | Ok(Event::End(_)) => break Some(()),
|
||||
|
||||
Err(e) => {
|
||||
panic!("Error at {}: {:?}", reader.buffer_position(), e);
|
||||
}
|
||||
|
||||
_ => (),
|
||||
}
|
||||
if first {
|
||||
Ok(Some((name, relroot)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn sort_deps(depgraph: &DepGraph) -> Vec<&SymEntry> {
|
||||
fn sort_deps<'a, 'i>(
|
||||
depgraph: &'a LinkerAsg<'i>,
|
||||
roots: &Vec<LinkerObjectRef>,
|
||||
) -> Sections<'a, 'i> {
|
||||
// @type=meta, @preproc:elig-class-yields
|
||||
// @type={ret}map{,:head,:tail}
|
||||
|
||||
let roots = discover_roots(depgraph);
|
||||
let mut deps: Sections = Sections::new();
|
||||
|
||||
// This is technically a topological sort, but functions have
|
||||
// cycles. Once we have more symbol metadata, we can filter them out
|
||||
// and actually invoke toposort.
|
||||
let mut dfs = DfsPostOrder::empty(&depgraph);
|
||||
let mut sorted = Vec::new();
|
||||
|
||||
//println!("discovered roots: {:?}", roots);
|
||||
|
||||
// TODO: we'll be processing various roots separately
|
||||
for index in roots {
|
||||
dfs.stack.push(*index);
|
||||
dfs.stack.push((*index).into());
|
||||
}
|
||||
|
||||
// TODO: can we encapsulate NodeIndex?
|
||||
while let Some(index) = dfs.next(&depgraph) {
|
||||
sorted.push(&depgraph[index]);
|
||||
let ident = depgraph.get(index).unwrap();
|
||||
|
||||
match ident {
|
||||
Object::Ident(_, kind, _)
|
||||
| Object::IdentFragment(_, kind, _, _) => match kind {
|
||||
IdentKind::Meta => deps.meta.push_body(ident),
|
||||
IdentKind::Worksheet => deps.worksheet.push_body(ident),
|
||||
IdentKind::Param(_, _) => deps.params.push_body(ident),
|
||||
IdentKind::Type(_) => deps.types.push_body(ident),
|
||||
IdentKind::Func(_, _) => deps.funcs.push_body(ident),
|
||||
IdentKind::MapHead | IdentKind::Map | IdentKind::MapTail => {
|
||||
deps.map.push_body(ident)
|
||||
}
|
||||
IdentKind::RetMapHead
|
||||
| IdentKind::RetMap
|
||||
| IdentKind::RetMapTail => deps.retmap.push_body(ident),
|
||||
_ => deps.rater.push_body(ident),
|
||||
},
|
||||
_ => panic!("unexpected node: {:?}", ident),
|
||||
}
|
||||
}
|
||||
|
||||
sorted
|
||||
deps
|
||||
}
|
||||
|
||||
fn discover_roots(depgraph: &DepGraph) -> Vec<SymRef> {
|
||||
// TODO: filter_map
|
||||
let mut map_syms = depgraph
|
||||
.index_iter()
|
||||
.filter(|(key, _)| {
|
||||
key.starts_with(":map:") || key.starts_with(":retmap:")
|
||||
})
|
||||
.map(|(_, value)| *value)
|
||||
.collect::<Vec<_>>();
|
||||
fn get_interner_value<'a, 'i, I: Interner<'i>>(
|
||||
depgraph: &'a LinkerAsg<'i>,
|
||||
interner: &'i I,
|
||||
name: &str,
|
||||
) -> &'a Object<'i> {
|
||||
depgraph
|
||||
.get(
|
||||
depgraph
|
||||
.lookup(interner.intern(name))
|
||||
.unwrap_or_else(|| panic!("missing identifier: {}", name)),
|
||||
)
|
||||
.expect("Could not get interner value")
|
||||
}
|
||||
|
||||
let mut roots = vec!["___yield", "___worksheet"]
|
||||
.iter()
|
||||
.filter_map(|sym| depgraph.lookup(sym))
|
||||
.collect::<Vec<_>>();
|
||||
fn output_xmle<'a, 'i, I: Interner<'i>>(
|
||||
depgraph: &'a LinkerAsg<'i>,
|
||||
interner: &'i I,
|
||||
sorted: &mut Sections<'a, 'i>,
|
||||
name: &'i Symbol<'i>,
|
||||
relroot: String,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
if !sorted.map.is_empty() {
|
||||
sorted.map.push_head(get_interner_value(
|
||||
depgraph,
|
||||
interner,
|
||||
&String::from(":map:___head"),
|
||||
));
|
||||
sorted.map.push_tail(get_interner_value(
|
||||
depgraph,
|
||||
interner,
|
||||
&String::from(":map:___tail"),
|
||||
));
|
||||
}
|
||||
|
||||
roots.append(&mut map_syms);
|
||||
if !sorted.retmap.is_empty() {
|
||||
sorted.retmap.push_head(get_interner_value(
|
||||
depgraph,
|
||||
interner,
|
||||
&String::from(":retmap:___head"),
|
||||
));
|
||||
sorted.retmap.push_tail(get_interner_value(
|
||||
depgraph,
|
||||
interner,
|
||||
&String::from(":retmap:___tail"),
|
||||
));
|
||||
}
|
||||
|
||||
//println!(
|
||||
// "found roots: {:?}",
|
||||
// roots
|
||||
// .iter()
|
||||
// .map(|index| &depgraph.graph[*index])
|
||||
// .collect::<Vec<_>>()
|
||||
//);
|
||||
let writer = Cursor::new(Vec::new());
|
||||
let mut xmle_writer = XmleWriter::new(writer);
|
||||
xmle_writer
|
||||
.write(&sorted, name, &relroot)
|
||||
.expect("Could not write xmle output");
|
||||
|
||||
roots
|
||||
print!(
|
||||
"{}",
|
||||
String::from_utf8(xmle_writer.into_inner().into_inner())?
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
mod test {
|
||||
#[test]
|
||||
fn placeholder() {}
|
||||
}
|
||||
|
|
|
@ -15,4 +15,13 @@
|
|||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! An incremental rewrite of TAME in Rust.
|
||||
|
||||
pub mod global;
|
||||
pub mod ir;
|
||||
pub mod ld;
|
||||
pub mod obj;
|
||||
pub mod sym;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test;
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
// Object files
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Object file construction and processing.
|
||||
//!
|
||||
//! An _[object file][]_ contains relocatable compiled code, symbol tables,
|
||||
//! and other information produced by the compiler.
|
||||
//! It is the responsibility of the [linker](super::ld) to construct a final
|
||||
//! executable from these files.
|
||||
//!
|
||||
//! [object file]: https://en.wikipedia.org/wiki/Object_file
|
||||
//!
|
||||
//! The only object file currently supported by TAMER is the [`xmlo`]
|
||||
//! format,
|
||||
//! produced by the XSLT compiler.
|
||||
//! It will likely be replaced with [ELF] object files in the future.
|
||||
//!
|
||||
//! [ELF]: https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
|
||||
|
||||
pub mod xmle;
|
||||
pub mod xmlo;
|
|
@ -0,0 +1,69 @@
|
|||
// xmle object files
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! `xmle` file construction and processing.
|
||||
//!
|
||||
//! This file format exists for compatibility with the old compiler
|
||||
//! written in XSLT; it will be removed in the future.
|
||||
//!
|
||||
//!
|
||||
//! `xmle` Files
|
||||
//! ===================
|
||||
//! An `xmle` file is produced by the for each source file.
|
||||
//! The format is XML because the original compiler was written in XSLT.
|
||||
//!
|
||||
//! The general structure of an `xmle` file consists of different sections:
|
||||
//! - map
|
||||
//! - return map
|
||||
//! - statics
|
||||
//! - rater
|
||||
//!
|
||||
//! For example (with some extra information omitted):
|
||||
//!
|
||||
//! ```xml
|
||||
//! <package xmlns="http://www.lovullo.com/rater"
|
||||
//! xmlns:preproc="http://www.lovullo.com/rater/preproc"
|
||||
//! xmlns:l="http://www.lovullo.com/rater/linker"
|
||||
//! title="suppliers/tax"
|
||||
//! program="true"
|
||||
//! name="suppliers/tax"
|
||||
//! __rootpath="../">
|
||||
//! <l:dep>
|
||||
//! <preproc:sym type="func"
|
||||
//! dim="0"
|
||||
//! dtype="float"
|
||||
//! name="min"
|
||||
//! src="../rater/core/numeric/minmax"
|
||||
//! desc="Return the lesser value"/>
|
||||
//! </l:dep>
|
||||
//! <l:map-from>
|
||||
//! <l:from name="latest_operation_hour"/>
|
||||
//! </l:map-from>
|
||||
//! <l:map-exec>
|
||||
//! function( input, callback ) {)
|
||||
//! </l:map-exec>
|
||||
//! <l:retmap-exec>
|
||||
//! function( input, callback ) {)
|
||||
//! </l:retmap-exec>
|
||||
//! <l:static>
|
||||
//! function func_min( args , min1, min2) {return min1;}
|
||||
//! </l:static>
|
||||
//! <l:exec>consts['CMP_OP_EQ'] = 1;</l:exec>
|
||||
//! </package>
|
||||
//! ```
|
||||
|
||||
pub mod writer;
|
|
@ -0,0 +1,47 @@
|
|||
// Object file writer
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! xmle file writer.
|
||||
//!
|
||||
//! This defines a lower-level event-based `XmleWriter` similar to that of
|
||||
//! `quick_xml`, where the events are a slightly higher-level abstraction
|
||||
//! over the types of nodes present in the file.
|
||||
//!
|
||||
//! For more information on xmle files, see the [parent crate][`super`].
|
||||
//!
|
||||
//! The example below is incomplete, but shows the general usage.
|
||||
//!
|
||||
//! ```
|
||||
//! use tamer::obj::xmle::writer::{Sections, XmleWriter};
|
||||
//! use tamer::sym::{DefaultInterner, Interner, Symbol};
|
||||
//! use std::io::Cursor;
|
||||
//!
|
||||
//! let interner = DefaultInterner::new();
|
||||
//! let name = interner.intern(&String::from("foo"));
|
||||
//!
|
||||
//! let sections = Sections::new();
|
||||
//! let writer = Cursor::new(Vec::new());
|
||||
//! let mut xmle_writer = XmleWriter::new(writer);
|
||||
//! xmle_writer.write(§ions, name, &String::from(""));
|
||||
//! ```
|
||||
|
||||
mod writer;
|
||||
mod xmle;
|
||||
|
||||
pub use writer::{Result, Section, Sections, Writer, WriterError};
|
||||
|
||||
pub use xmle::XmleWriter;
|
|
@ -0,0 +1,320 @@
|
|||
// xmle object file writer
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::ir::asg::Object;
|
||||
use crate::sym::Symbol;
|
||||
use quick_xml::Error as XmlError;
|
||||
use std::io::{Error as IoError, Write};
|
||||
use std::result;
|
||||
use std::str::Utf8Error;
|
||||
|
||||
type ObjectRef<'a, 'i> = &'a Object<'i>;
|
||||
pub type Result<T = ()> = result::Result<T, WriterError>;
|
||||
pub type ObjectVec<'a, 'i> = Vec<ObjectRef<'a, 'i>>;
|
||||
|
||||
/// A wrapper around a `Write` object
|
||||
///
|
||||
/// This is used to take the [`Sections`] and write out the xmle files.
|
||||
pub trait Writer<W: Write> {
|
||||
fn write(
|
||||
&mut self,
|
||||
sections: &Sections,
|
||||
name: Symbol,
|
||||
relroot: &str,
|
||||
) -> Result<()>
|
||||
where
|
||||
Self: Sized;
|
||||
}
|
||||
|
||||
/// A Section that needs to be written to the buffer
|
||||
///
|
||||
/// Most sections will only need a `body`, but some innlude `head` and `tail`
|
||||
/// information. Rather than dealing with those differently, each `Section`
|
||||
/// will have a `head` and `tail` that are empty by default.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Section<'a, 'i> {
|
||||
head: ObjectVec<'a, 'i>,
|
||||
body: ObjectVec<'a, 'i>,
|
||||
tail: ObjectVec<'a, 'i>,
|
||||
}
|
||||
|
||||
impl<'a, 'i> Section<'a, 'i> {
|
||||
/// Constructor for Sections
|
||||
///
|
||||
/// ```
|
||||
/// use tamer::obj::xmle::writer::Section;
|
||||
///
|
||||
/// let section = Section::new();
|
||||
/// ```
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
head: Vec::new(),
|
||||
body: Vec::new(),
|
||||
tail: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// The length of the `Section`
|
||||
pub fn len(&self) -> usize {
|
||||
self.head.len() + self.body.len() + self.tail.len()
|
||||
}
|
||||
|
||||
/// Check if the `Section` is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Push an `Object` into a `Section`'s head
|
||||
pub fn push_head(&mut self, obj: ObjectRef<'a, 'i>) {
|
||||
self.head.push(&obj)
|
||||
}
|
||||
|
||||
/// Push an `Object` into a `Section`'s body
|
||||
pub fn push_body(&mut self, obj: ObjectRef<'a, 'i>) {
|
||||
self.body.push(&obj)
|
||||
}
|
||||
|
||||
/// Push an `Object` into a `Section`'s tail
|
||||
pub fn push_tail(&mut self, obj: ObjectRef<'a, 'i>) {
|
||||
self.tail.push(&obj)
|
||||
}
|
||||
|
||||
/// Merge the parts of a `Section` into one [`SectionIterator`]
|
||||
///
|
||||
/// The `Section` internals need to be iterated as a group so we needed to
|
||||
/// create a custom iterator, [`SectionIterator`] to do this for us. This
|
||||
/// method allows us to access the iterator.
|
||||
///
|
||||
/// ```
|
||||
/// use tamer::obj::xmle::writer::Section;
|
||||
/// use tamer::ir::asg::Object;
|
||||
///
|
||||
/// let mut section = Section::new();
|
||||
/// let obj = Object::Empty;
|
||||
/// let expect = vec![&obj, &obj, &obj];
|
||||
///
|
||||
/// section.push_head(&obj);
|
||||
/// section.push_body(&obj);
|
||||
/// section.push_tail(&obj);
|
||||
/// let section_iter = section.iter();
|
||||
///
|
||||
/// for object in section_iter {
|
||||
/// assert_eq!(&obj, object);
|
||||
/// }
|
||||
/// ```
|
||||
pub fn iter(&self) -> SectionIterator {
|
||||
SectionIterator {
|
||||
inner: Box::new(
|
||||
self.head
|
||||
.iter()
|
||||
.chain(self.body.iter())
|
||||
.chain(self.tail.iter())
|
||||
.copied(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper for an Iterator
|
||||
///
|
||||
/// This allows us to iterate over all parts of a [`Section`].
|
||||
pub struct SectionIterator<'a, 'i> {
|
||||
inner: Box<dyn Iterator<Item = &'a Object<'i>> + 'a>,
|
||||
}
|
||||
|
||||
impl<'a, 'i> Iterator for SectionIterator<'a, 'i> {
|
||||
type Item = &'a Object<'i>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inner.next()
|
||||
}
|
||||
}
|
||||
|
||||
/// Sections that need to be written to a buffer
|
||||
///
|
||||
/// All the properties are public [`Section`] objects and will be accessed
|
||||
/// directly by the [`Writer`].
|
||||
#[derive(Default)]
|
||||
pub struct Sections<'a, 'i> {
|
||||
pub map: Section<'a, 'i>,
|
||||
pub retmap: Section<'a, 'i>,
|
||||
pub meta: Section<'a, 'i>,
|
||||
pub worksheet: Section<'a, 'i>,
|
||||
pub params: Section<'a, 'i>,
|
||||
pub types: Section<'a, 'i>,
|
||||
pub funcs: Section<'a, 'i>,
|
||||
pub rater: Section<'a, 'i>,
|
||||
}
|
||||
|
||||
impl<'a, 'i> Sections<'a, 'i> {
|
||||
/// Constructor for Sections
|
||||
///
|
||||
/// ```
|
||||
/// use tamer::obj::xmle::writer::Sections;
|
||||
///
|
||||
/// let sections = Sections::new();
|
||||
/// ```
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
map: Section::new(),
|
||||
retmap: Section::new(),
|
||||
meta: Section::new(),
|
||||
worksheet: Section::new(),
|
||||
params: Section::new(),
|
||||
types: Section::new(),
|
||||
funcs: Section::new(),
|
||||
rater: Section::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Error implementations for the writer
|
||||
#[derive(Debug)]
|
||||
pub enum WriterError {
|
||||
Io(IoError),
|
||||
Utf8(Utf8Error),
|
||||
XmlError(XmlError),
|
||||
ExpectedFragment(String),
|
||||
}
|
||||
|
||||
impl From<IoError> for WriterError {
|
||||
fn from(err: IoError) -> Self {
|
||||
WriterError::Io(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Utf8Error> for WriterError {
|
||||
fn from(err: Utf8Error) -> Self {
|
||||
WriterError::Utf8(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<XmlError> for WriterError {
|
||||
fn from(err: XmlError) -> Self {
|
||||
WriterError::XmlError(err)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn section_empty() {
|
||||
let section = Section::new();
|
||||
|
||||
assert!(section.head.is_empty());
|
||||
assert!(section.body.is_empty());
|
||||
assert!(section.tail.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_head() {
|
||||
let mut section = Section::new();
|
||||
let obj = Object::Empty;
|
||||
|
||||
assert!(section.head.is_empty());
|
||||
|
||||
section.push_head(&obj);
|
||||
|
||||
assert_eq!(Some(&&obj), section.head.get(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_body() {
|
||||
let mut section = Section::new();
|
||||
let obj = Object::Empty;
|
||||
|
||||
assert!(section.body.is_empty());
|
||||
|
||||
section.push_body(&obj);
|
||||
|
||||
let body = section.body;
|
||||
assert_eq!(Some(&&obj), body.get(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_tail() {
|
||||
let mut section = Section::new();
|
||||
let obj = Object::Empty;
|
||||
|
||||
assert!(section.tail.is_empty());
|
||||
|
||||
section.push_tail(&obj);
|
||||
|
||||
assert_eq!(Some(&&obj), section.tail.get(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_len() {
|
||||
let mut section = Section::new();
|
||||
let obj = Object::Empty;
|
||||
|
||||
assert_eq!(0, section.len());
|
||||
section.push_head(&obj);
|
||||
assert_eq!(1, section.len());
|
||||
section.push_body(&obj);
|
||||
assert_eq!(2, section.len());
|
||||
section.push_tail(&obj);
|
||||
assert_eq!(3, section.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_is_empty_head() {
|
||||
let mut section = Section::new();
|
||||
let obj = Object::Empty;
|
||||
|
||||
assert!(section.is_empty());
|
||||
section.push_head(&obj);
|
||||
assert!(!section.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_is_empty_body() {
|
||||
let mut section = Section::new();
|
||||
let obj = Object::Empty;
|
||||
|
||||
assert!(section.is_empty());
|
||||
section.push_body(&obj);
|
||||
assert!(!section.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_is_empty_tail() {
|
||||
let mut section = Section::new();
|
||||
let obj = Object::Empty;
|
||||
|
||||
assert!(section.is_empty());
|
||||
section.push_tail(&obj);
|
||||
assert!(!section.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn section_iterator() {
|
||||
let mut section = Section::new();
|
||||
let obj = Object::Empty;
|
||||
let expect = vec![&obj, &obj, &obj];
|
||||
|
||||
section.push_head(&obj);
|
||||
section.push_body(&obj);
|
||||
section.push_tail(&obj);
|
||||
|
||||
let collection: Vec<_> = section.iter().collect();
|
||||
|
||||
assert_eq!(expect, collection);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,749 @@
|
|||
// Concrete xmle writer
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use super::writer::{Result, SectionIterator, Sections, WriterError};
|
||||
use crate::ir::asg::{IdentKind, Object};
|
||||
use crate::sym::Symbol;
|
||||
use fxhash::FxHashSet;
|
||||
#[cfg(test)]
|
||||
use mock::MockXmlWriter as XmlWriter;
|
||||
use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
|
||||
#[cfg(not(test))]
|
||||
use quick_xml::Writer as XmlWriter;
|
||||
use std::io::Write;
|
||||
|
||||
/// Responsible for writing to the xmle files
|
||||
pub struct XmleWriter<W: Write> {
|
||||
writer: XmlWriter<W>,
|
||||
}
|
||||
|
||||
impl<W: Write> XmleWriter<W> {
|
||||
/// Create a new instance of `XmleWriter`
|
||||
/// ```
|
||||
/// use std::io::Cursor;
|
||||
/// use tamer::obj::xmle::writer::XmleWriter;
|
||||
///
|
||||
/// let writer = Cursor::new(Vec::new());
|
||||
/// let xmle_writer = XmleWriter::new(writer);
|
||||
/// ```
|
||||
pub fn new(write: W) -> Self {
|
||||
let writer = XmlWriter::new_with_indent(write, b' ', 2);
|
||||
|
||||
Self { writer }
|
||||
}
|
||||
|
||||
/// Consume the `XmleWriter` and return the inner `Write` object
|
||||
///
|
||||
/// ```
|
||||
/// use std::io::Cursor;
|
||||
/// use tamer::obj::xmle::writer::XmleWriter;
|
||||
///
|
||||
/// let writer = Cursor::new(Vec::new());
|
||||
/// let xmle_writer = XmleWriter::new(writer);
|
||||
/// assert!(xmle_writer.into_inner().into_inner().is_empty());
|
||||
/// ```
|
||||
pub fn into_inner(self) -> W {
|
||||
self.writer.into_inner()
|
||||
}
|
||||
|
||||
/// Write xmle
|
||||
///
|
||||
/// Goes through each of the pre-ordered [`Sections`] and writes to the
|
||||
/// buffer.
|
||||
///
|
||||
/// ```
|
||||
/// use std::io::Cursor;
|
||||
/// use tamer::obj::xmle::writer::{Sections, XmleWriter};
|
||||
/// use tamer::sym::{Symbol, SymbolIndex};
|
||||
/// use tamer::sym::{DefaultInterner, Interner};
|
||||
///
|
||||
/// let writer = Cursor::new(Vec::new());
|
||||
/// let mut xmle_writer = XmleWriter::new(writer);
|
||||
/// let sections = Sections::new();
|
||||
/// let a = "foo";
|
||||
/// let interner = DefaultInterner::new();
|
||||
/// let name = interner.intern(&a);
|
||||
/// xmle_writer.write(
|
||||
/// §ions,
|
||||
/// &name,
|
||||
/// &String::from(""),
|
||||
/// );
|
||||
/// let buf = xmle_writer.into_inner().into_inner();
|
||||
/// assert!(!buf.is_empty(), "something was written to the buffer");
|
||||
/// ```
|
||||
pub fn write(
|
||||
&mut self,
|
||||
sections: &Sections,
|
||||
name: &Symbol,
|
||||
relroot: &str,
|
||||
) -> Result {
|
||||
self.write_start_package(name, &relroot)?
|
||||
.write_element(b"l:dep", |writer| {
|
||||
writer.write_sections(§ions, &relroot)
|
||||
})?
|
||||
// This was not in the original linker, but we need to be able to
|
||||
// convey this information for `standalones` (which has received
|
||||
// some logic from the old linker for the time being).
|
||||
.write_element(b"l:map-from", |writer| {
|
||||
writer.write_froms(§ions)
|
||||
})?
|
||||
.write_element(b"l:map-exec", |writer| {
|
||||
writer.write_section(sections.map.iter())
|
||||
})?
|
||||
.write_element(b"l:retmap-exec", |writer| {
|
||||
writer.write_section(sections.retmap.iter())
|
||||
})?
|
||||
.write_element(b"l:static", |writer| {
|
||||
writer
|
||||
.write_section(sections.meta.iter())?
|
||||
.write_section(sections.worksheet.iter())?
|
||||
.write_section(sections.params.iter())?
|
||||
.write_section(sections.types.iter())?
|
||||
.write_section(sections.funcs.iter())
|
||||
})?
|
||||
.write_element(b"l:exec", |writer| {
|
||||
writer.write_section(sections.rater.iter())
|
||||
})?
|
||||
.write_end_tag(b"package")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write an element
|
||||
///
|
||||
/// This writes the opening tag, the content, and the closing tag for a
|
||||
/// given element. The callback is what will write the element's body.
|
||||
#[inline]
|
||||
fn write_element<F>(
|
||||
&mut self,
|
||||
name: &[u8],
|
||||
callback: F,
|
||||
) -> Result<&mut XmleWriter<W>>
|
||||
where
|
||||
F: FnOnce(&mut Self) -> Result<&mut XmleWriter<W>>,
|
||||
{
|
||||
self.write_start_tag(name)?;
|
||||
(callback)(self)?;
|
||||
self.write_end_tag(name)?;
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Open the `package` element
|
||||
///
|
||||
/// The `package` element's opening tag needs attributes, so it cannot use
|
||||
/// `write_start_tag` directly.
|
||||
fn write_start_package(
|
||||
&mut self,
|
||||
name: &Symbol,
|
||||
relroot: &str,
|
||||
) -> Result<&mut XmleWriter<W>> {
|
||||
let root =
|
||||
BytesStart::owned_name(b"package".to_vec()).with_attributes(vec![
|
||||
("xmlns", "http://www.lovullo.com/rater"),
|
||||
("xmlns:preproc", "http://www.lovullo.com/rater/preproc"),
|
||||
("xmlns:l", "http://www.lovullo.com/rater/linker"),
|
||||
("title", &name), // TODO
|
||||
("program", "true"),
|
||||
("name", &name),
|
||||
("__rootpath", &relroot),
|
||||
]);
|
||||
|
||||
self.writer.write_event(Event::Start(root))?;
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Open an element's tag
|
||||
fn write_start_tag(&mut self, name: &[u8]) -> Result<&mut XmleWriter<W>> {
|
||||
self.writer
|
||||
.write_event(Event::Start(BytesStart::borrowed_name(name)))?;
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Close an element's tag
|
||||
fn write_end_tag(&mut self, name: &[u8]) -> Result<&mut XmleWriter<W>> {
|
||||
self.writer
|
||||
.write_event(Event::End(BytesEnd::borrowed(name)))?;
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Write all [`Sections`]
|
||||
///
|
||||
/// All the [`Sections`] found need to be written out using the `writer`
|
||||
/// object.
|
||||
fn write_sections(
|
||||
&mut self,
|
||||
sections: &Sections,
|
||||
relroot: &str,
|
||||
) -> Result<&mut XmleWriter<W>> {
|
||||
let all = sections
|
||||
.meta
|
||||
.iter()
|
||||
.chain(sections.map.iter())
|
||||
.chain(sections.retmap.iter())
|
||||
.chain(sections.worksheet.iter())
|
||||
.chain(sections.params.iter())
|
||||
.chain(sections.types.iter())
|
||||
.chain(sections.funcs.iter())
|
||||
.chain(sections.rater.iter());
|
||||
|
||||
for ident in all {
|
||||
match ident {
|
||||
Object::Ident(sym, kind, src)
|
||||
| Object::IdentFragment(sym, kind, src, _) => {
|
||||
let name: &str = sym;
|
||||
|
||||
// this'll be formalized more sanely
|
||||
let mut attrs = match kind {
|
||||
IdentKind::Cgen(dim) => {
|
||||
vec![("type", "cgen"), ("dim", dim.as_ref())]
|
||||
}
|
||||
IdentKind::Class(dim) => {
|
||||
vec![("type", "class"), ("dim", dim.as_ref())]
|
||||
}
|
||||
IdentKind::Const(dim, dtype) => vec![
|
||||
("type", "const"),
|
||||
("dim", dim.as_ref()),
|
||||
("dtype", dtype.as_ref()),
|
||||
],
|
||||
IdentKind::Func(dim, dtype) => vec![
|
||||
("type", "func"),
|
||||
("dim", dim.as_ref()),
|
||||
("dtype", dtype.as_ref()),
|
||||
],
|
||||
IdentKind::Gen(dim, dtype) => vec![
|
||||
("type", "gen"),
|
||||
("dim", dim.as_ref()),
|
||||
("dtype", dtype.as_ref()),
|
||||
],
|
||||
IdentKind::Lparam(dim, dtype) => vec![
|
||||
("type", "lparam"),
|
||||
("dim", dim.as_ref()),
|
||||
("dtype", dtype.as_ref()),
|
||||
],
|
||||
IdentKind::Param(dim, dtype) => vec![
|
||||
("type", "param"),
|
||||
("dim", dim.as_ref()),
|
||||
("dtype", dtype.as_ref()),
|
||||
],
|
||||
IdentKind::Rate(dtype) => {
|
||||
vec![("type", "rate"), ("dtype", dtype.as_ref())]
|
||||
}
|
||||
IdentKind::Tpl => vec![("type", "tpl")],
|
||||
IdentKind::Type(dtype) => {
|
||||
vec![("type", "type"), ("dtype", dtype.as_ref())]
|
||||
}
|
||||
IdentKind::MapHead => vec![("type", "map:head")],
|
||||
IdentKind::Map => vec![("type", "map")],
|
||||
IdentKind::MapTail => vec![("type", "map:tail")],
|
||||
IdentKind::RetMapHead => vec![("type", "retmap:head")],
|
||||
IdentKind::RetMap => vec![("type", "retmap")],
|
||||
IdentKind::RetMapTail => vec![("type", "retmap:tail")],
|
||||
IdentKind::Meta => vec![("type", "meta")],
|
||||
IdentKind::Worksheet => vec![("type", "worksheet")],
|
||||
};
|
||||
|
||||
attrs.push(("name", name));
|
||||
|
||||
if src.generated {
|
||||
attrs.push(("preproc:generated", "true"));
|
||||
}
|
||||
|
||||
let srcpath: String;
|
||||
if let Some(pkg_name) = src.pkg_name {
|
||||
srcpath = format!("{}{}", relroot, pkg_name);
|
||||
attrs.push(("src", &srcpath));
|
||||
}
|
||||
if let Some(parent) = src.parent {
|
||||
attrs.push(("parent", parent));
|
||||
}
|
||||
if let Some(yields) = src.yields {
|
||||
attrs.push(("yields", yields));
|
||||
}
|
||||
if let Some(desc) = &src.desc {
|
||||
attrs.push(("desc", &desc));
|
||||
}
|
||||
|
||||
let sym = BytesStart::owned_name(b"preproc:sym".to_vec())
|
||||
.with_attributes(attrs);
|
||||
|
||||
self.writer.write_event(Event::Empty(sym))?;
|
||||
}
|
||||
_ => unreachable!("filtered out during sorting"),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Write the source `from`
|
||||
///
|
||||
/// If a `map` object has a `from` attribute in its source, we need to
|
||||
/// write them using the `writer`'s `write_event`.
|
||||
fn write_froms(
|
||||
&mut self,
|
||||
sections: &Sections,
|
||||
) -> Result<&mut XmleWriter<W>> {
|
||||
let mut map_froms: FxHashSet<&str> = Default::default();
|
||||
|
||||
let map_iter = sections.map.iter();
|
||||
|
||||
for map_ident in map_iter {
|
||||
match map_ident {
|
||||
Object::Ident(_, _, src)
|
||||
| Object::IdentFragment(_, _, src, _) => {
|
||||
if let Some(froms) = &src.from {
|
||||
for from in froms {
|
||||
map_froms.insert(from);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!("filtered out during sorting"),
|
||||
}
|
||||
}
|
||||
|
||||
for from in map_froms {
|
||||
let name: &str = from;
|
||||
|
||||
self.writer.write_event(Event::Empty(
|
||||
BytesStart::borrowed_name(b"l:from")
|
||||
.with_attributes(vec![("name", name)]),
|
||||
))?;
|
||||
}
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Write a ['Section`]
|
||||
///
|
||||
/// Iterates through the parts of a `Section` and writes them using the
|
||||
/// `writer`'s 'write_event`.
|
||||
fn write_section(
|
||||
&mut self,
|
||||
idents: SectionIterator,
|
||||
) -> Result<&mut XmleWriter<W>> {
|
||||
for ident in idents {
|
||||
match ident {
|
||||
Object::IdentFragment(_, _, _, frag) => {
|
||||
self.writer.write_event(Event::Text(
|
||||
BytesText::from_plain_str(frag),
|
||||
))?;
|
||||
}
|
||||
// Cgen, Gen, and Lparam are not expected to be present, so we
|
||||
// can ignore them when we determeing when to return an Err.
|
||||
Object::Ident(_, IdentKind::Cgen(_), _)
|
||||
| Object::Ident(_, IdentKind::Gen(_, _), _)
|
||||
| Object::Ident(_, IdentKind::Lparam(_, _), _) => (),
|
||||
obj => {
|
||||
return Err(WriterError::ExpectedFragment(format!(
|
||||
"fragment expected: {:?}",
|
||||
obj
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod mock {
|
||||
use super::*;
|
||||
|
||||
pub struct MockXmlWriter<W: Write> {
|
||||
inner: W,
|
||||
pub write_callback: Option<Box<dyn for<'a> Fn(&Event<'a>) -> Result>>,
|
||||
}
|
||||
|
||||
impl<W: Write> MockXmlWriter<W> {
|
||||
pub fn new(inner: W) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
write_callback: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_indent(inner: W, _: u8, _: u8) -> Self {
|
||||
Self::new(inner)
|
||||
}
|
||||
|
||||
pub fn write_event<'a, E: AsRef<Event<'a>>>(
|
||||
&mut self,
|
||||
event: E,
|
||||
) -> Result<usize> {
|
||||
(self
|
||||
.write_callback
|
||||
.as_ref()
|
||||
.expect("missing mock write_callback"))(
|
||||
event.as_ref()
|
||||
)?;
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> W {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::ir::asg::Dim;
|
||||
use crate::ir::asg::Source;
|
||||
use crate::ir::legacyir::SymAttrs;
|
||||
use crate::obj::xmle::writer::Section;
|
||||
use crate::sym::{Symbol, SymbolIndex};
|
||||
use std::str;
|
||||
|
||||
type Sut<W> = XmleWriter<W>;
|
||||
|
||||
#[test]
|
||||
fn writer_uses_inner_buffer() -> Result {
|
||||
let expected = vec![1, 2, 3];
|
||||
let buf = expected.clone();
|
||||
|
||||
let sut = Sut::new(buf);
|
||||
|
||||
assert_eq!(expected, sut.into_inner());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_start_package() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
Event::Start(bytes_start) => {
|
||||
let name = str::from_utf8(bytes_start.name());
|
||||
match name {
|
||||
Ok("package") => {
|
||||
let attributes = bytes_start.attributes();
|
||||
assert_eq!(7, attributes.count());
|
||||
Ok(())
|
||||
}
|
||||
_ => panic!("unreachable"),
|
||||
}
|
||||
}
|
||||
_ => panic!("did not match expected event"),
|
||||
}));
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
|
||||
|
||||
sut.write_start_package(&sym, &String::from(""))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_start_tag() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
Event::Start(bytes_start) => {
|
||||
let name = str::from_utf8(bytes_start.name());
|
||||
match name {
|
||||
Ok("l:dep") => {
|
||||
let attributes = bytes_start.attributes();
|
||||
assert_eq!(0, attributes.count());
|
||||
Ok(())
|
||||
}
|
||||
_ => panic!("unreachable"),
|
||||
}
|
||||
}
|
||||
_ => panic!("did not match expected event"),
|
||||
}));
|
||||
|
||||
sut.write_start_tag(b"l:dep")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_end_tag() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
Event::End(bytes_end) => {
|
||||
let name = str::from_utf8(bytes_end.name());
|
||||
assert_eq!("package", name?);
|
||||
Ok(())
|
||||
}
|
||||
_ => panic!("did not match expected event"),
|
||||
}));
|
||||
|
||||
sut.write_end_tag(b"package")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_section() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
Event::Text(_) => (Ok(())),
|
||||
_ => panic!("did not trigger event"),
|
||||
}));
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
|
||||
let obj = Object::IdentFragment(
|
||||
&sym,
|
||||
IdentKind::Meta,
|
||||
Source::default(),
|
||||
String::from(""),
|
||||
);
|
||||
|
||||
let mut section = Section::new();
|
||||
section.push_body(&obj);
|
||||
sut.write_section(section.iter())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_section_ignores_other_kinds() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
sut.writer.write_callback = Some(Box::new(|_| {
|
||||
panic!("callback should not have been called");
|
||||
}));
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "sym");
|
||||
let obj = Object::Ident(
|
||||
&sym,
|
||||
IdentKind::Cgen(Dim::default()),
|
||||
Source::default(),
|
||||
);
|
||||
|
||||
let mut section = Section::new();
|
||||
section.push_body(&obj);
|
||||
sut.write_section(section.iter())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_section_catch_missing() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
sut.writer.write_callback = Some(Box::new(|_| {
|
||||
panic!("callback should not have been called");
|
||||
}));
|
||||
|
||||
let obj = Object::Empty;
|
||||
|
||||
let mut section = Section::new();
|
||||
section.push_body(&obj);
|
||||
let result = sut.write_section(section.iter());
|
||||
|
||||
match result {
|
||||
Err(WriterError::ExpectedFragment(_)) => {}
|
||||
_ => panic!("expected Err"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_sections() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
Event::Empty(bytes_start) => {
|
||||
let name = str::from_utf8(bytes_start.name())?;
|
||||
assert_eq!("preproc:sym", name);
|
||||
let mut attributes = bytes_start.attributes();
|
||||
assert_eq!(2, attributes.clone().count());
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("type", str::from_utf8(attr.key)?);
|
||||
assert_eq!("worksheet", str::from_utf8(&attr.value)?);
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("name", str::from_utf8(attr.key)?);
|
||||
assert_eq!("random_symbol", str::from_utf8(&attr.value)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
_ => panic!("unexpected event"),
|
||||
}));
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "random_symbol");
|
||||
let object =
|
||||
Object::Ident(&sym, IdentKind::Worksheet, Source::default());
|
||||
let mut sections = Sections::new();
|
||||
sections.map.push_body(&object);
|
||||
sut.write_sections(§ions, &String::from(""))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_sections_with_sources() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
Event::Empty(bytes_start) => {
|
||||
let name = str::from_utf8(bytes_start.name())?;
|
||||
assert_eq!("preproc:sym", name);
|
||||
|
||||
let mut attributes = bytes_start.attributes();
|
||||
assert_eq!(7, attributes.clone().count());
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("type", str::from_utf8(attr.key)?);
|
||||
assert_eq!("worksheet", str::from_utf8(&attr.value)?);
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("name", str::from_utf8(attr.key)?);
|
||||
assert_eq!("name", str::from_utf8(&attr.value)?);
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("preproc:generated", str::from_utf8(attr.key)?);
|
||||
assert_eq!("true", str::from_utf8(&attr.value)?);
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("src", str::from_utf8(attr.key)?);
|
||||
assert_eq!("rootname", str::from_utf8(&attr.value)?);
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("parent", str::from_utf8(attr.key)?);
|
||||
assert_eq!("parent", str::from_utf8(&attr.value)?);
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("yields", str::from_utf8(attr.key)?);
|
||||
assert_eq!("yields", str::from_utf8(&attr.value)?);
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("desc", str::from_utf8(attr.key)?);
|
||||
assert_eq!("sym desc", str::from_utf8(&attr.value)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
_ => panic!("unexpected event"),
|
||||
}));
|
||||
|
||||
let nsym = Symbol::new_dummy(SymbolIndex::from_u32(1), "name");
|
||||
let ssym = Symbol::new_dummy(SymbolIndex::from_u32(2), "src");
|
||||
let psym = Symbol::new_dummy(SymbolIndex::from_u32(3), "parent");
|
||||
let ysym = Symbol::new_dummy(SymbolIndex::from_u32(4), "yields");
|
||||
let fsym = Symbol::new_dummy(SymbolIndex::from_u32(5), "from");
|
||||
|
||||
let attrs = SymAttrs {
|
||||
pkg_name: Some(&nsym),
|
||||
src: Some(&ssym),
|
||||
generated: true,
|
||||
parent: Some(&psym),
|
||||
yields: Some(&ysym),
|
||||
desc: Some("sym desc".to_string()),
|
||||
from: Some(vec![&fsym]),
|
||||
virtual_: true,
|
||||
..Default::default()
|
||||
};
|
||||
let object = Object::Ident(&nsym, IdentKind::Worksheet, attrs.into());
|
||||
let mut sections = Sections::new();
|
||||
sections.map.push_body(&object);
|
||||
sut.write_sections(§ions, &String::from("root"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_froms() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
Event::Empty(bytes_start) => {
|
||||
let name = str::from_utf8(bytes_start.name())?;
|
||||
assert_eq!("l:from", name);
|
||||
|
||||
let mut attributes = bytes_start.attributes();
|
||||
assert_eq!(1, attributes.clone().count());
|
||||
|
||||
let attr = attributes.next().expect("Expects attributes")?;
|
||||
assert_eq!("name", str::from_utf8(attr.key)?);
|
||||
assert_eq!("dest symbol", str::from_utf8(&attr.value)?);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
_ => panic!("unexpected event"),
|
||||
}));
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "source symbol");
|
||||
let symb = Symbol::new_dummy(SymbolIndex::from_u32(2), "dest symbol");
|
||||
|
||||
let mut src = Source::default();
|
||||
src.from = Some(vec![&symb]);
|
||||
let object = Object::Ident(&sym, IdentKind::Worksheet, src);
|
||||
let mut sections = Sections::new();
|
||||
sections.map.push_body(&object);
|
||||
sut.write_froms(§ions)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_froms_no_from_no_write() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
_ => panic!("unexpected write"),
|
||||
}));
|
||||
|
||||
let sym = Symbol::new_dummy(SymbolIndex::from_u32(1), "random_symbol");
|
||||
|
||||
let object =
|
||||
Object::Ident(&sym, IdentKind::Worksheet, Source::default());
|
||||
let mut sections = Sections::new();
|
||||
sections.map.push_body(&object);
|
||||
sut.write_froms(§ions)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_element() -> Result {
|
||||
let mut sut = Sut::new(vec![]);
|
||||
|
||||
sut.writer.write_callback = Some(Box::new(|event| match event {
|
||||
Event::Start(bytes) => {
|
||||
let name = str::from_utf8(bytes.name());
|
||||
match name {
|
||||
Ok("foo") => {
|
||||
let attributes = bytes.attributes();
|
||||
assert_eq!(0, attributes.count());
|
||||
Ok(())
|
||||
}
|
||||
_ => panic!("unreachable"),
|
||||
}
|
||||
}
|
||||
Event::End(bytes) => {
|
||||
let name = str::from_utf8(bytes.name());
|
||||
match name {
|
||||
Ok("foo") => Ok(()),
|
||||
_ => panic!("unreachable"),
|
||||
}
|
||||
}
|
||||
_ => panic!("did not match expected event"),
|
||||
}));
|
||||
|
||||
sut.write_element(b"foo", |writer| Ok(writer))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
// xmlo object files
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! `xmlo` object file construction and processing.
|
||||
//!
|
||||
//! This object file format exists for compatibility with the old compiler
|
||||
//! written in XSLT;
|
||||
//! it will be removed in the future.
|
||||
//!
|
||||
//!
|
||||
//! `xmlo` Object Files
|
||||
//! ===================
|
||||
//! An `xmlo` object file is produced by the for each source file.
|
||||
//! It is a terribly inefficient object format and will be eliminated in the
|
||||
//! future.
|
||||
//! The format is XML because the original compiler was written in XSLT.
|
||||
//!
|
||||
//! The general structure of an `xmlo` file consists of:
|
||||
//! - Package metadata as attributes on the root node;
|
||||
//! - A symbol table along with symbol metadata;
|
||||
//! - Symbol dependencies (as [adjacency lists][]);
|
||||
//! - Compiled JavaScript fragments for each applicable symbol; and
|
||||
//! - Expanded source XML.
|
||||
//!
|
||||
//! [adjacency lists]: https://en.wikipedia.org/wiki/Adjacency_list
|
||||
//!
|
||||
//! For example (with some extra information omitted):
|
||||
//!
|
||||
//! ```xml
|
||||
//! <package xmlns="http://www.lovullo.com/rater"
|
||||
//! xmlns:preproc="http://www.lovullo.com/rater/preproc"
|
||||
//! title="Example Package"
|
||||
//! name="example/package"
|
||||
//! __rootpath="../"
|
||||
//! preproc:elig-class-yields="isEligexamplepackage">
|
||||
//! <!-- Symbol table -->
|
||||
//! <preproc:symtable>
|
||||
//! <preproc:sym name=":class:some-sym" type="class" ... />
|
||||
//! <!-- ... -->
|
||||
//! </preproc:symtable>
|
||||
//!
|
||||
//! <!-- Dependency graph (adjacency lists) -->
|
||||
//! <preproc:sym-deps>
|
||||
//! <preproc:sym-dep name=":class:some-sym">
|
||||
//! <preproc:sym-ref name="someOtherSym" />
|
||||
//! <!-- ... -->
|
||||
//! </preproc:sym-dep>
|
||||
//! </preproc:sym-deps>
|
||||
//!
|
||||
//! <!-- Compiled JS fragments -->
|
||||
//! <preproc:fragments>
|
||||
//! <preproc:fragment id=":class:some-sym">
|
||||
//! classes['some-sym'] = '...generated JS code...';
|
||||
//! </preproc:fragment>
|
||||
//! </preproc:fragments>
|
||||
//!
|
||||
//! <!-- Expanded src -->
|
||||
//! </package>
|
||||
//! ```
|
||||
|
||||
pub mod reader;
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,826 @@
|
|||
// String internment
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! String internment system.
|
||||
//!
|
||||
//! Interned strings are represented by [`Symbol`],
|
||||
//! created by an [`Interner`]:
|
||||
//~
|
||||
//! - [`ArenaInterner`] - Intern pool backed by an [arena][] for fast
|
||||
//! and stable allocation.
|
||||
//! - [`DefaultInterner`] - The currently recommended intern pool
|
||||
//! configuration for symbol interning.
|
||||
//! - [`FxArenaInterner`] - Intern pool backed by an [arena][] using the
|
||||
//! [Fx Hash][fxhash] hashing algorithm.
|
||||
//!
|
||||
//! Interners return symbols by reference which allows for `O(1)` comparison
|
||||
//! by pointer.
|
||||
//!
|
||||
//! [arena]: bumpalo
|
||||
//!
|
||||
//! ```
|
||||
//! use tamer::sym::{Interner, DefaultInterner, Symbol, SymbolIndex};
|
||||
//!
|
||||
//! // Inputs to be interned
|
||||
//! let a = "foo";
|
||||
//! let b = &"foo".to_string();
|
||||
//! let c = "foobar";
|
||||
//! let d = &c[0..3];
|
||||
//!
|
||||
//! // Interners employ interior mutability and so do not need to be
|
||||
//! // declared `mut`
|
||||
//! let interner = DefaultInterner::new();
|
||||
//!
|
||||
//! let (ia, ib, ic, id) = (
|
||||
//! interner.intern(a),
|
||||
//! interner.intern(b),
|
||||
//! interner.intern(c),
|
||||
//! interner.intern(d),
|
||||
//! );
|
||||
//!
|
||||
//! assert_eq!(ia, ib);
|
||||
//! assert_eq!(ia, id);
|
||||
//! assert_eq!(ib, id);
|
||||
//! assert_ne!(ia, ic);
|
||||
//!
|
||||
//! // All interns can be cloned and clones are eq
|
||||
//! assert_eq!(*ia, ia.clone());
|
||||
//!
|
||||
//! // Only "foo" and "foobar" are interned
|
||||
//! assert_eq!(2, interner.len());
|
||||
//! assert!(interner.contains("foo"));
|
||||
//! assert!(interner.contains("foobar"));
|
||||
//! assert!(!interner.contains("something else"));
|
||||
//!
|
||||
//! // Each symbol has an associated, densely-packed integer value
|
||||
//! // that can be used for indexing
|
||||
//! assert_eq!(SymbolIndex::from_u32(1), ia.index());
|
||||
//! assert_eq!(SymbolIndex::from_u32(1), ib.index());
|
||||
//! assert_eq!(SymbolIndex::from_u32(2), ic.index());
|
||||
//! assert_eq!(SymbolIndex::from_u32(1), id.index());
|
||||
//! ```
|
||||
//!
|
||||
//! What Is String Interning?
|
||||
//! =========================
|
||||
//! _[String interning][]_ is a process by which a single copy of a string
|
||||
//! is stored immutably in memory as part of a _pool_.
|
||||
//! When the same string is later encountered,
|
||||
//! a reference to the string in the pool is used rather than allocating a
|
||||
//! new string.
|
||||
//! Interned strings are typically referred to as "symbols" or "atoms".
|
||||
//!
|
||||
//! String comparison then amounts to comparing pointers (`O(1)`)
|
||||
//! rather than having to scan the string (`O(n)`).
|
||||
//! There is, however, a hashing cost of interning strings,
|
||||
//! as well as looking up strings in the intern pool.
|
||||
//!
|
||||
//! [string interning]: https://en.wikipedia.org/wiki/String_interning
|
||||
//!
|
||||
//!
|
||||
//! Internment Mechanism
|
||||
//! ====================
|
||||
//! The current [`DefaultInterner`] is [`FxArenaInterner`],
|
||||
//! which is an [arena][]-allocated intern pool mapped by the
|
||||
//! [Fx Hash][fxhash] hash function:
|
||||
//!
|
||||
//! 1. Strings are compared against the existing intern pool using a
|
||||
//! [`HashMap`].
|
||||
//! 2. If a string has not yet been interned:
|
||||
//! - The string is copied into the arena-backed pool;
|
||||
//! - A new [`Symbol`] is allocated adjacent to it in the arena holding
|
||||
//! a string slice referencing the arena-allocated string; and
|
||||
//! - The symbol is stored as the value in the [`HashMap`] for that key.
|
||||
//! 3. Otherwise, a reference to the existing [`Symbol`] is returned.
|
||||
//!
|
||||
//! Since the arena provides a stable location in memory,
|
||||
//! and all symbols are immutable,
|
||||
//! [`ArenaInterner`] is able to safely return any number of references to
|
||||
//! a single [`Symbol`],
|
||||
//! bound to the lifetime of the arena itself.
|
||||
//! Since the [`Symbol`] contains the string slice,
|
||||
//! it also acts as a [smart pointer] for the interned string itself,
|
||||
//! allowing [`Symbol`] to be used in any context where `&str` is
|
||||
//! expected.
|
||||
//! Dropping a [`Symbol`] does _not_ affect the underlying arena-allocated
|
||||
//! data.
|
||||
//!
|
||||
//! [smart pointer]: https://doc.rust-lang.org/book/ch15-00-smart-pointers.html
|
||||
//!
|
||||
//! Each symbol also has an associated integer index value
|
||||
//! (see [`Symbol::index`]),
|
||||
//! which provides a dense range of values suitable for use in vectors
|
||||
//! as an alternative to [`HashMap`] for mapping symbol data.
|
||||
//!
|
||||
//! Since a reference to the same [`Symbol`] is returned for each
|
||||
//! [`Interner::intern`] and [`Interner::intern_soft`] call,
|
||||
//! symbols can be compared by pointer in `O(1)` time.
|
||||
//! Symbols also implement [`Copy`],
|
||||
//! and will still compare equal to other symbols referencing the same
|
||||
//! interned value by comparing the underlying string slice pointers.
|
||||
//!
|
||||
//! This implementation was heavily motivated by [Rustc's own internment
|
||||
//! system][rustc-intern],
|
||||
//! but differs in significant ways:
|
||||
//!
|
||||
//! - This implementation stores string references in [`Symbol`] rather
|
||||
//! than relying on a global singleton [`Interner`];
|
||||
//! - Consequently, associates the lifetime of interned strings with that
|
||||
//! of the underlying arena rather than casting to `&'static`;
|
||||
//! - Retrieves symbol values by pointer reference without requiring use
|
||||
//! of [`Interner`] or a locking mechanism; and
|
||||
//! - Stores [`Symbol`] objects in the arena rather than within a vector
|
||||
//! indexed by [`SymbolIndex`].
|
||||
//!
|
||||
//!
|
||||
//! Name Mangling
|
||||
//! =============
|
||||
//! Interners do not perform [name mangling][].
|
||||
//! For future consideration,
|
||||
//! see [RFC 2603][rfc-2603] and the [Itanium ABI][itanium-abi].
|
||||
//!
|
||||
//! [name mangling]: https://en.wikipedia.org/wiki/Name_mangling
|
||||
//! [rfc-2603]: https://rust-lang.github.io/rfcs/2603-symbol-name-mangling-v2.html
|
||||
//! [itanium-abi]: http://refspecs.linuxbase.org/cxxabi-1.86.html#mangling
|
||||
//!
|
||||
//!
|
||||
//! Related Work and Further Reading
|
||||
//! ================================
|
||||
//! String interning is often tightly coupled with symbols (in the generic
|
||||
//! sense),
|
||||
//! sometimes called atoms.
|
||||
//! Symbols can often be either interned,
|
||||
//! and therefore compared for equivalency,
|
||||
//! or _uninterned_,
|
||||
//! which makes them unique even to symbols of the same name.
|
||||
//! Interning may also be done automatically by a language for performance.
|
||||
//! Languages listed below that allow for explicit interning may also
|
||||
//! perform automatic interning as well
|
||||
//! (for example, `'symbol` in Lisp and `lowercase_vars` as atoms in
|
||||
//! Erlang).
|
||||
//!
|
||||
//! | Language | Interned | Uninterned |
|
||||
//! | -------- | -------- | ---------- |
|
||||
//! | [Erlang][] | [`list_to_atom`][edt] | _(None)_ |
|
||||
//! | [GNU Emacs Lisp][] | [`intern`][es], [`intern-soft`][es] | [`make-symbol`][es], [`gensym`][es] |
|
||||
//! | [GNU Guile][] | [`string->symbol`][gs], [`gensym`][gs] | [`make-symbol`][gu] |
|
||||
//! | [JavaScript][] | [`Symbol.for`][js] | [`Symbol`][js] |
|
||||
//! | [Java][] | [`intern`][jvs] | _(None)_ |
|
||||
//! | [Lua][] | _(Automatic for string performance)_ | _(None)_ |
|
||||
//! | [MIT/GNU Scheme][] | [`intern`][ms], [`intern-soft`][ms], [`string->symbol`][ms] | [`string->uninterned-symbol`][ms], [`generate-uninterned-symbol`][ms] |
|
||||
//! | [PHP][] | _(Automatic for string [performance][pp])_ | _(None)_ |
|
||||
//! | [Python][] | [`sys.intern`][pys] | _(None)_ |
|
||||
//! | [R6RS Scheme][] | [`string->symbol`][r6s] | _(None)_ |
|
||||
//! | [Racket][] | [`string->symbol`][rs], [`string->unreadable-symbol`][rs] | [`string->uninterned-symbol`][rs], [`gensym`][rs] |
|
||||
//!
|
||||
//! [gnu guile]: https://www.gnu.org/software/guile/
|
||||
//! [gs]: https://www.gnu.org/software/guile/manual/html_node/Symbol-Primitives.html#Symbol-Primitives
|
||||
//! [gu]: https://www.gnu.org/software/guile/manual/html_node/Symbol-Uninterned.html#Symbol-Uninterned
|
||||
//! [gnu emacs lisp]: https://www.gnu.org/software/emacs/
|
||||
//! [es]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Creating-Symbols.html
|
||||
//! [racket]: https://racket-lang.org/
|
||||
//! [rs]: https://docs.racket-lang.org/reference/symbols.html
|
||||
//! [r6rs scheme]: http://www.r6rs.org/
|
||||
//! [r6s]: http://www.r6rs.org/final/html/r6rs/r6rs-Z-H-14.html
|
||||
//! [mit/gnu scheme]: https://www.gnu.org/software/mit-scheme/
|
||||
//! [ms]: https://www.gnu.org/software/mit-scheme/documentation/mit-scheme-ref/Symbols.html
|
||||
//! [javascript]: https://developer.mozilla.org/en-US/docs/Web/JavaScript
|
||||
//! [js]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol
|
||||
//! [java]: http://openjdk.java.net/
|
||||
//! [jvs]: https://cr.openjdk.java.net/~iris/se/12/latestSpec/api/java.base/java/lang/String.html#intern()
|
||||
//! [php]: https://www.php.net/
|
||||
//! [pp]: https://wiki.php.net/rfc/performanceimprovements
|
||||
//! [erlang]: https://erlang.org/
|
||||
//! [edt]: http://erlang.org/doc/reference_manual/data_types.html
|
||||
//! [lua]: https://www.lua.org/
|
||||
//! [python]: https://www.python.org/
|
||||
//! [pys]: https://docs.python.org/3/library/sys.html
|
||||
//!
|
||||
//! More information:
|
||||
//! - Wikipedia entry on [string interning][].
|
||||
//! - The [flyweight pattern][] in object-oriented programming is a type
|
||||
//! of interning.
|
||||
//! - [RFC 1845][rfc-1845] gives an example of string interning using
|
||||
//! `Rc<str>`.
|
||||
//! - Emacs directly exposes the intern pool at runtime as
|
||||
//! [`obarray`][es].
|
||||
//! - [`string-cache`][rust-string-cache] is a string interning system
|
||||
//! for Rust developed by Mozilla for Servo.
|
||||
//! - [`string-interner`][rust-string-interner] is another string
|
||||
//! interning library for Rust.
|
||||
//! - [Rustc interns strings as `Symbol`s][rustc-intern] using an
|
||||
//! [arena allocator][rustc-arena] and avoids `Rc` by representing
|
||||
//! symbols as integer values and converting them to strings using a
|
||||
//! global pool and unsafe rust to cast to a `static` slice.
|
||||
//! - Rustc identifies symbols by integer value encapsulated within a
|
||||
//! `Symbol`.
|
||||
//! - Rustc's [`newtype_index!` macro][rustc-nt] uses
|
||||
//! [`global::NonZeroProgSymSize`] so that [`Option`] uses no
|
||||
//! additional space (see [pull request `53315`][rustc-nt-pr]).
|
||||
//! - Differences between TAMER and Rustc's implementations are outlined
|
||||
//! above.
|
||||
//!
|
||||
//! [flyweight pattern]: https://en.wikipedia.org/wiki/Flyweight_pattern
|
||||
//! [rust-string-cache]: https://github.com/servo/string-cache
|
||||
//! [rust-string-interner]: https://github.com/robbepop/string-interner
|
||||
//! [rfc-1845]: https://rust-lang.github.io/rfcs/1845-shared-from-slice.html
|
||||
//! [rustc-intern]: https://doc.rust-lang.org/nightly/nightly-rustc/syntax/ast/struct.Name.html
|
||||
//! [rustc-arena]: https://doc.rust-lang.org/nightly/nightly-rustc/arena/index.html
|
||||
//! [rustc-nt]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/macro.newtype_index.html
|
||||
//! [rustc-nt-pr]: https://github.com/rust-lang/rust/pull/53315
|
||||
//!
|
||||
//! The hash function chosen for this module is [Fx Hash][fxhash].
|
||||
//!
|
||||
//! - Rustc previously used the [Fowler-Noll-Vo (FNV)][fnv] hash
|
||||
//! function,
|
||||
//! but [now uses Fx Hash][rustc-fx].
|
||||
//! This was extracted into the [`fxhash`][fxhash] crate,
|
||||
//! which is used by TAMER.
|
||||
//! - TAMER originally used FNV,
|
||||
//! but benchmarks showed that Fx Hash was more performant.
|
||||
//! - Benchmarks for other hash functions,
|
||||
//! including FNV,
|
||||
//! can be found at the [`hash-rs`][hash-rs] project.
|
||||
//!
|
||||
//! [fnv]: https://doc.servo.org/fnv/
|
||||
//! [rustc-fx]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/fx/index.html
|
||||
//! [hash-rs]: https://github.com/Gankra/hash-rs
|
||||
|
||||
use crate::global;
|
||||
use bumpalo::Bump;
|
||||
use fxhash::FxBuildHasher;
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::fmt;
|
||||
use std::hash::BuildHasher;
|
||||
use std::ops::Deref;
|
||||
|
||||
/// Unique symbol identifier.
|
||||
///
|
||||
/// _Do not construct this value yourself;_
|
||||
/// use an [`Interner`].
|
||||
///
|
||||
/// This newtype helps to prevent other indexes from being used where a
|
||||
/// symbol index is expected.
|
||||
/// Note, however, that it provides no defense against mixing symbol indexes
|
||||
/// between multiple [`Interner`]s.
|
||||
///
|
||||
/// The index `0` is never valid because of [`global::NonZeroProgSymSize`],
|
||||
/// which allows us to have `Option<SymbolIndex>` at no space cost.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub struct SymbolIndex(global::NonZeroProgSymSize);
|
||||
|
||||
impl From<SymbolIndex> for usize {
|
||||
fn from(value: SymbolIndex) -> usize {
|
||||
value.0.get().try_into().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolIndex {
|
||||
/// Construct index from a non-zero `u32` value.
|
||||
///
|
||||
/// Panics
|
||||
/// ------
|
||||
/// Will panic if `n == 0`.
|
||||
pub fn from_u32(n: u32) -> SymbolIndex {
|
||||
SymbolIndex(global::NonZeroProgSymSize::new(n).unwrap())
|
||||
}
|
||||
|
||||
/// Construct index from an unchecked non-zero `u32` value.
|
||||
///
|
||||
/// This does not verify that `n > 0` and so must only be used in
|
||||
/// contexts where this invariant is guaranteed to hold.
|
||||
/// Unlike [`from_u32`](SymbolIndex::from_u32),
|
||||
/// this never panics.
|
||||
unsafe fn from_u32_unchecked(n: u32) -> SymbolIndex {
|
||||
SymbolIndex(global::NonZeroProgSymSize::new_unchecked(n))
|
||||
}
|
||||
}
|
||||
|
||||
/// Interned string.
|
||||
///
|
||||
/// A reference to this symbol is returned each time the same string is
|
||||
/// interned with the same [`Interner`];
|
||||
/// as such,
|
||||
/// symbols can be compared for equality by pointer;
|
||||
/// the underlying symbol id need not be used.
|
||||
///
|
||||
/// Each symbol is identified by a unique integer
|
||||
/// (see [`index`](Symbol::index)).
|
||||
/// The use of integers creates a more dense range of values than pointers,
|
||||
/// which allows callers to use a plain [`Vec`] as a map instead of
|
||||
/// something far more expensive like
|
||||
/// [`HashSet`](std::collections::HashSet);
|
||||
/// this is especially beneficial for portions of the system that make
|
||||
/// use of nearly all interned symbols,
|
||||
/// like the ASG.
|
||||
///
|
||||
/// The symbol also stores a string slice referencing the interned string
|
||||
/// itself,
|
||||
/// whose lifetime is that of the [`Interner`]'s underlying data store.
|
||||
/// Dereferencing the symbol will expose the underlying slice.
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct Symbol<'i> {
|
||||
index: SymbolIndex,
|
||||
str: &'i str,
|
||||
}
|
||||
|
||||
impl<'i> Symbol<'i> {
|
||||
/// Construct a new interned value.
|
||||
///
|
||||
/// _This must only be done by an [`Interner`]._
|
||||
/// As such,
|
||||
/// this function is not public.
|
||||
///
|
||||
/// For test builds (when `cfg(test)`),
|
||||
/// `new_dummy` is available to create symbols for tests.
|
||||
#[inline]
|
||||
fn new(index: SymbolIndex, str: &'i str) -> Symbol<'i> {
|
||||
Self { index, str }
|
||||
}
|
||||
|
||||
/// Retrieve unique symbol index.
|
||||
///
|
||||
/// This is a densely-packed identifier that can be used as an index for
|
||||
/// mapping.
|
||||
/// See [`SymbolIndex`] for more information.
|
||||
#[inline]
|
||||
pub fn index(&self) -> SymbolIndex {
|
||||
self.index
|
||||
}
|
||||
|
||||
/// Construct a new interned value _for testing_.
|
||||
///
|
||||
/// This is a public version of [`Symbol::new`] available for test
|
||||
/// builds.
|
||||
/// This separate name is meant to strongly imply that you should not be
|
||||
/// doing this otherwise.
|
||||
#[cfg(test)]
|
||||
#[inline(always)]
|
||||
pub fn new_dummy(index: SymbolIndex, str: &'i str) -> Symbol<'i> {
|
||||
Self::new(index, str)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i> PartialEq for Symbol<'i> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
std::ptr::eq(self as *const _, other as *const _)
|
||||
|| std::ptr::eq(self.str.as_ptr(), other.str.as_ptr())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i> Eq for Symbol<'i> {}
|
||||
|
||||
impl<'i> Deref for Symbol<'i> {
|
||||
type Target = str;
|
||||
|
||||
/// Dereference to interned string slice.
|
||||
///
|
||||
/// This allows for symbols to be used where strings are expected.
|
||||
#[inline]
|
||||
fn deref(&self) -> &str {
|
||||
self.str
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i> fmt::Display for Symbol<'i> {
|
||||
/// Display name of underlying string.
|
||||
///
|
||||
/// Since symbols contain pointers to their interned slices,
|
||||
/// we effectively get this for free.
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.str)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create, store, compare, and retrieve [`Symbol`] values.
|
||||
///
|
||||
/// Interners accept string slices and produce values of type [`Symbol`].
|
||||
/// A reference to the same [`Symbol`] will always be returned for a given
|
||||
/// string,
|
||||
/// allowing symbols to be compared for equality cheaply by comparing
|
||||
/// pointers.
|
||||
/// Symbol locations in memory are fixed for the lifetime of the interner.
|
||||
///
|
||||
/// If you care whether a value has been interned yet or not,
|
||||
/// see [`intern_soft`][Interner::intern_soft`] and
|
||||
/// [`contains`](Interner::contains).
|
||||
///
|
||||
/// See the [module-level documentation](self) for an example.
|
||||
pub trait Interner<'i> {
|
||||
/// Intern a string slice or return an existing [`Symbol`].
|
||||
///
|
||||
/// If the provided string has already been interned,
|
||||
/// then a reference to the existing [`Symbol`] will be returned.
|
||||
/// Otherwise,
|
||||
/// the string will be interned and a new [`Symbol`] created.
|
||||
///
|
||||
/// The lifetime of the returned symbol is bound to the lifetime of the
|
||||
/// underlying intern pool.
|
||||
///
|
||||
/// To retrieve an existing symbol _without_ interning,
|
||||
/// see [`intern_soft`](Interner::intern_soft).
|
||||
fn intern(&'i self, value: &str) -> &'i Symbol<'i>;
|
||||
|
||||
/// Retrieve an existing intern for the string slice `s`.
|
||||
///
|
||||
/// Unlike [`intern`](Interner::intern),
|
||||
/// this will _not_ intern the string if it has not already been
|
||||
/// interned.
|
||||
fn intern_soft(&'i self, value: &str) -> Option<&'i Symbol<'i>>;
|
||||
|
||||
/// Determine whether the given value has already been interned.
|
||||
fn contains(&self, value: &str) -> bool;
|
||||
|
||||
/// Number of interned strings.
|
||||
///
|
||||
/// This count will increase each time a unique string is interned.
|
||||
/// It does not increase when a string is already interned.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Intern an assumed-UTF8 slice of bytes or return an existing
|
||||
/// [`Symbol`].
|
||||
///
|
||||
/// Safety
|
||||
/// ======
|
||||
/// This function is unsafe because it uses
|
||||
/// [`std::str::from_utf8_unchecked`].
|
||||
/// It is provided for convenience when interning from trusted binary
|
||||
/// data
|
||||
/// (such as [object files][]).
|
||||
///
|
||||
/// [object files]: crate::obj
|
||||
unsafe fn intern_utf8_unchecked(&'i self, value: &[u8]) -> &'i Symbol<'i> {
|
||||
self.intern(std::str::from_utf8_unchecked(value))
|
||||
}
|
||||
}
|
||||
|
||||
/// An interner backed by an [arena](bumpalo).
|
||||
///
|
||||
/// Since interns exist until the interner itself is freed,
|
||||
/// an arena is a much more efficient and appropriate memory allocation
|
||||
/// strategy.
|
||||
/// This further provides a stable location in memory for symbol data.
|
||||
///
|
||||
/// For the recommended configuration,
|
||||
/// see [`DefaultInterner`].
|
||||
///
|
||||
/// See the [module-level documentation](self) for examples and more
|
||||
/// information on how to use this interner.
|
||||
pub struct ArenaInterner<'i, S>
|
||||
where
|
||||
S: BuildHasher + Default,
|
||||
{
|
||||
/// String and [`Symbol`] storage.
|
||||
arena: Bump,
|
||||
|
||||
/// Next available symbol index.
|
||||
///
|
||||
/// This must always be ≥1.
|
||||
/// It is not defined as `NonZeroProgSymSize` because
|
||||
/// `intern` enforces the invariant.
|
||||
next_index: Cell<global::ProgSymSize>,
|
||||
|
||||
/// Map of interned strings to their respective [`Symbol`].
|
||||
///
|
||||
/// Both strings and symbols are allocated within `arena`.
|
||||
map: RefCell<HashMap<&'i str, &'i Symbol<'i>, S>>,
|
||||
}
|
||||
|
||||
impl<'i, S> ArenaInterner<'i, S>
|
||||
where
|
||||
S: BuildHasher + Default,
|
||||
{
|
||||
/// Initialize a new interner with no initial capacity.
|
||||
///
|
||||
/// Prefer [`with_capacity`](ArenaInterner::with_capacity) when possible.
|
||||
#[inline]
|
||||
pub fn new() -> Self {
|
||||
Self::with_capacity(0)
|
||||
}
|
||||
|
||||
/// Initialize a new interner with an initial capacity for the
|
||||
/// underlying [`HashMap`].
|
||||
///
|
||||
/// The given `capacity` has no affect on arena allocation.
|
||||
/// Specifying initial capacity is important only for the map of strings
|
||||
/// to symbols because it will reallocate and re-hash its contents
|
||||
/// once capacity is exceeded.
|
||||
/// See benchmarks.
|
||||
///
|
||||
/// If reallocation is a major concern,
|
||||
/// a [consistent hashing algorithm][consistent] could be considered,
|
||||
/// but the implementation will still incur the cost of copying
|
||||
/// the [`HashMap`]'s contents to a new location in memory.
|
||||
///
|
||||
/// [consistent]: https://en.wikipedia.org/wiki/Consistent_hashing
|
||||
#[inline]
|
||||
pub fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
arena: Bump::new(),
|
||||
next_index: Cell::new(1),
|
||||
map: RefCell::new(HashMap::with_capacity_and_hasher(
|
||||
capacity,
|
||||
Default::default(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'i, S> Interner<'i> for ArenaInterner<'i, S>
|
||||
where
|
||||
S: BuildHasher + Default,
|
||||
{
|
||||
fn intern(&'i self, value: &str) -> &'i Symbol<'i> {
|
||||
let mut map = self.map.borrow_mut();
|
||||
|
||||
if let Some(sym) = map.get(value) {
|
||||
return sym;
|
||||
}
|
||||
|
||||
let next_index = self.next_index.get();
|
||||
|
||||
// Next_index should always be initialized to at least 1.
|
||||
debug_assert!(next_index != 0);
|
||||
let id = unsafe { SymbolIndex::from_u32_unchecked(next_index) };
|
||||
|
||||
// Copy string slice into the arena.
|
||||
let clone: &'i str = unsafe {
|
||||
&*(std::str::from_utf8_unchecked(
|
||||
self.arena.alloc_slice_clone(value.as_bytes()),
|
||||
) as *const str)
|
||||
};
|
||||
|
||||
// Symbols are also stored within the arena, adjacent to the
|
||||
// string. This ensures that both have stable locations in memory.
|
||||
let sym: &'i Symbol<'i> = self.arena.alloc(Symbol::new(id, &clone));
|
||||
|
||||
map.insert(clone, sym);
|
||||
self.next_index.set(next_index + 1);
|
||||
|
||||
sym
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn intern_soft(&'i self, value: &str) -> Option<&'i Symbol<'i>> {
|
||||
self.map.borrow().get(value).map(|sym| *sym)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn contains(&self, value: &str) -> bool {
|
||||
self.map.borrow().contains_key(value)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn len(&self) -> usize {
|
||||
self.map.borrow().len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Interner using the [Fx Hash][fxhash] hashing function.
|
||||
///
|
||||
/// _This is currently the hash function used by [`DefaultInterner`]._
|
||||
///
|
||||
/// If denial of service is not a concern,
|
||||
/// then this will outperform the default
|
||||
/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
|
||||
/// (which uses SipHash at the time of writing).
|
||||
///
|
||||
/// See intern benchmarks for a comparison.
|
||||
pub type FxArenaInterner<'i> = ArenaInterner<'i, FxBuildHasher>;
|
||||
|
||||
/// Recommended [`Interner`] and configuration.
|
||||
///
|
||||
/// The choice of this default relies on the assumption that
|
||||
/// denial-of-service attacks against the hash function are not a
|
||||
/// concern.
|
||||
///
|
||||
/// For more information on the hashing algorithm,
|
||||
/// see [`FxArenaInterner`].
|
||||
pub type DefaultInterner<'i> = FxArenaInterner<'i>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
mod symbol {
|
||||
use super::*;
|
||||
|
||||
/// Option<Symbol> should have no space cost.
|
||||
#[test]
|
||||
fn symbol_index_option_no_cost() {
|
||||
use std::mem::size_of;
|
||||
|
||||
assert_eq!(
|
||||
size_of::<Option<Symbol>>(),
|
||||
size_of::<Symbol>(),
|
||||
"Option<Symbol> should be the same size as Symbol"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn self_compares_eq() {
|
||||
let sym = Symbol::new(SymbolIndex::from_u32(1), "str");
|
||||
|
||||
assert_eq!(&sym, &sym);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn copy_compares_equal() {
|
||||
let sym = Symbol::new(SymbolIndex::from_u32(1), "str");
|
||||
let cpy = sym;
|
||||
|
||||
assert_eq!(sym, cpy);
|
||||
}
|
||||
|
||||
// Integer values are for convenience, not identity. They cannot be
|
||||
// used as a unique identifier across different interners.
|
||||
#[test]
|
||||
fn same_index_different_slices_compare_unequal() {
|
||||
let a = Symbol::new(SymbolIndex::from_u32(1), "a");
|
||||
let b = Symbol::new(SymbolIndex::from_u32(1), "b");
|
||||
|
||||
assert_ne!(a, b);
|
||||
}
|
||||
|
||||
// As mentioned above, ids are _not_ the identity of the symbol. If
|
||||
// two values point to the same location in memory, they are assumed
|
||||
// to have come from the same interner, and should therefore have
|
||||
// the same index this should never happen unless symbols are
|
||||
// being created without the use of interners, which is unsupported.
|
||||
//
|
||||
// This test is a cautionary tale.
|
||||
#[test]
|
||||
fn different_index_same_slices_compare_equal() {
|
||||
let slice = "str";
|
||||
|
||||
let a = Symbol::new(SymbolIndex::from_u32(1), slice);
|
||||
let b = Symbol::new(SymbolIndex::from_u32(2), slice);
|
||||
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cloned_symbols_compare_equal() {
|
||||
let sym = Symbol::new(SymbolIndex::from_u32(1), "foo");
|
||||
|
||||
assert_eq!(sym, sym.clone());
|
||||
}
|
||||
|
||||
// &Symbol can be used where string slices are expected (this won't
|
||||
// compile otherwise).
|
||||
#[test]
|
||||
fn ref_can_be_used_as_string_slice() {
|
||||
let slice = "str";
|
||||
let sym_slice: &str = &Symbol::new(SymbolIndex::from_u32(1), slice);
|
||||
|
||||
assert_eq!(slice, sym_slice);
|
||||
}
|
||||
|
||||
// For use when we can guarantee proper ids.
|
||||
#[test]
|
||||
fn can_create_index_unchecked() {
|
||||
assert_eq!(SymbolIndex::from_u32(1), unsafe {
|
||||
SymbolIndex::from_u32_unchecked(1)
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_retrieve_symbol_index() {
|
||||
let index = SymbolIndex::from_u32(1);
|
||||
|
||||
assert_eq!(index, Symbol::new(index, "").index());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn displays_as_interned_value() {
|
||||
let sym = Symbol::new(SymbolIndex::from_u32(1), "foo");
|
||||
|
||||
assert_eq!(format!("{}", sym), sym.str);
|
||||
}
|
||||
}
|
||||
|
||||
mod interner {
|
||||
use super::*;
|
||||
|
||||
type Sut<'i> = DefaultInterner<'i>;
|
||||
|
||||
#[test]
|
||||
fn recognizes_equal_strings() {
|
||||
let a = "foo";
|
||||
let b = a.to_string();
|
||||
let c = "bar";
|
||||
let d = c.to_string();
|
||||
|
||||
let sut = Sut::new();
|
||||
|
||||
let (ia, ib, ic, id) =
|
||||
(sut.intern(a), sut.intern(&b), sut.intern(c), sut.intern(&d));
|
||||
|
||||
assert_eq!(ia, ib);
|
||||
assert_eq!(&ia, &ib);
|
||||
assert_eq!(*ia, *ib);
|
||||
|
||||
assert_eq!(ic, id);
|
||||
assert_eq!(&ic, &id);
|
||||
assert_eq!(*ic, *id);
|
||||
|
||||
assert_ne!(ia, ic);
|
||||
assert_ne!(&ia, &ic);
|
||||
assert_ne!(*ia, *ic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symbol_id_increases_with_each_new_intern() {
|
||||
let sut = Sut::new();
|
||||
|
||||
// Remember that identifiers begin at 1
|
||||
assert_eq!(
|
||||
SymbolIndex::from_u32(1),
|
||||
sut.intern("foo").index(),
|
||||
"First index should be 1"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
SymbolIndex::from_u32(1),
|
||||
sut.intern("foo").index(),
|
||||
"Index should not increment for already-interned symbols"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
SymbolIndex::from_u32(2),
|
||||
sut.intern("bar").index(),
|
||||
"Index should increment for new symbols"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn length_increases_with_each_new_intern() {
|
||||
let sut = Sut::new();
|
||||
|
||||
assert_eq!(0, sut.len(), "invalid empty len");
|
||||
|
||||
sut.intern("foo");
|
||||
assert_eq!(1, sut.len(), "increment len");
|
||||
|
||||
// duplicate
|
||||
sut.intern("foo");
|
||||
assert_eq!(1, sut.len(), "do not increment len on duplicates");
|
||||
|
||||
sut.intern("bar");
|
||||
assert_eq!(2, sut.len(), "increment len (2)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_check_wither_string_is_interned() {
|
||||
let sut = Sut::new();
|
||||
|
||||
assert!(!sut.contains("foo"), "recognize missing value");
|
||||
sut.intern("foo");
|
||||
assert!(sut.contains("foo"), "recognize interned value");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn intern_soft() {
|
||||
let sut = Sut::new();
|
||||
|
||||
assert_eq!(None, sut.intern_soft("foo"));
|
||||
|
||||
let foo = sut.intern("foo");
|
||||
assert_eq!(Some(foo), sut.intern_soft("foo"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_with_capacity() {
|
||||
let n = 512;
|
||||
let sut = Sut::with_capacity(n);
|
||||
|
||||
// note that this is not publicly available
|
||||
assert!(sut.map.borrow().capacity() >= n);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn intern_utf8_unchecked() {
|
||||
let sut = Sut::new();
|
||||
|
||||
let a = sut.intern("foo");
|
||||
let b = unsafe { sut.intern_utf8_unchecked(b"foo") };
|
||||
|
||||
assert_eq!(a, b);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
// Mocks, stubs, and other stuff for testing
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
pub mod quick_xml;
|
|
@ -0,0 +1,125 @@
|
|||
// quick_xml mocks
|
||||
//
|
||||
// Copyright (C) 2014-2019 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use quick_xml::Result as XmlResult;
|
||||
use std::borrow::Cow;
|
||||
use std::cell::Cell;
|
||||
|
||||
pub enum MockXmlEvent<'a> {
|
||||
Start(MockBytesStart<'a>),
|
||||
End(MockBytesEnd<'a>),
|
||||
Empty(MockBytesStart<'a>),
|
||||
#[allow(dead_code)]
|
||||
Text(MockBytesText<'a>),
|
||||
}
|
||||
|
||||
pub struct MockBytesStart<'a> {
|
||||
name: &'a [u8],
|
||||
attrs: Cell<Option<MockAttributes<'a>>>,
|
||||
}
|
||||
|
||||
impl<'a> MockBytesStart<'a> {
|
||||
pub fn new(name: &'a [u8], attrs: Option<MockAttributes<'a>>) -> Self {
|
||||
Self {
|
||||
name,
|
||||
attrs: Cell::new(attrs),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &[u8] {
|
||||
self.name
|
||||
}
|
||||
|
||||
pub fn attributes(&self) -> MockAttributes {
|
||||
self.attrs.take().expect("missing mock attributes")
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MockBytesEnd<'a> {
|
||||
name: Cow<'a, [u8]>,
|
||||
}
|
||||
|
||||
impl<'a> MockBytesEnd<'a> {
|
||||
pub fn new(name: &'a [u8]) -> Self {
|
||||
Self {
|
||||
name: Cow::Borrowed(name),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &[u8] {
|
||||
&*self.name
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MockBytesText<'a> {
|
||||
#[allow(dead_code)]
|
||||
content: Cow<'a, [u8]>,
|
||||
}
|
||||
|
||||
impl<'a> MockBytesText<'a> {
|
||||
pub fn new(content: &'a [u8]) -> Self {
|
||||
Self {
|
||||
content: Cow::Borrowed(content),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MockAttributes<'a> {
|
||||
attrs: Vec<MockAttribute<'a>>,
|
||||
with_checks: Option<bool>,
|
||||
}
|
||||
|
||||
impl<'a> MockAttributes<'a> {
|
||||
pub fn new(attrs: Vec<MockAttribute<'a>>) -> Self {
|
||||
Self {
|
||||
attrs,
|
||||
with_checks: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_checks(&mut self, val: bool) -> &mut Self {
|
||||
self.with_checks = Some(val);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for MockAttributes<'a> {
|
||||
type Item = XmlResult<MockAttribute<'a>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// We read output from Saxon, which will always be valid
|
||||
if self.with_checks != Some(false) {
|
||||
panic!("MockAttributes expected with_checks false")
|
||||
}
|
||||
|
||||
self.attrs.pop().map(|attr| Ok(attr))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MockAttribute<'a> {
|
||||
pub key: &'a [u8],
|
||||
pub value: Cow<'a, [u8]>,
|
||||
}
|
||||
|
||||
impl<'a> MockAttribute<'a> {
|
||||
pub fn new(key: &'a [u8], value: &'a [u8]) -> Self {
|
||||
Self {
|
||||
key,
|
||||
value: Cow::Borrowed(&value[..]),
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue