tamer::sym: FNV => Fx Hash

For strings of any notable length, Fx Hash outperforms FNV.  Rustc also
moved to this hash function and noticed performance
improvements.  Fortunately, as was accounted for in the design, this was a
trivial switch.

Here are some benchmarks to back up that claim:

test hash_set::fnv::with_all_new_1000                 ... bench:     133,096 ns/iter (+/- 1,430)
test hash_set::fnv::with_all_new_1000_with_capacity   ... bench:      82,591 ns/iter (+/- 592)
test hash_set::fnv::with_all_new_rc_str_1000_baseline ... bench:     162,073 ns/iter (+/- 1,277)
test hash_set::fnv::with_one_new_1000                 ... bench:      37,334 ns/iter (+/- 256)
test hash_set::fnv::with_one_new_rc_str_1000_baseline ... bench:      18,263 ns/iter (+/- 261)
test hash_set::fx::with_all_new_1000                  ... bench:      85,217 ns/iter (+/- 1,111)
test hash_set::fx::with_all_new_1000_with_capacity    ... bench:      59,383 ns/iter (+/- 752)
test hash_set::fx::with_all_new_rc_str_1000_baseline  ... bench:      98,802 ns/iter (+/- 1,117)
test hash_set::fx::with_one_new_1000                  ... bench:      42,484 ns/iter (+/- 1,239)
test hash_set::fx::with_one_new_rc_str_1000_baseline  ... bench:      15,000 ns/iter (+/- 233)
test hash_set::with_all_new_1000                      ... bench:     137,645 ns/iter (+/- 1,186)
test hash_set::with_all_new_rc_str_1000_baseline      ... bench:     163,129 ns/iter (+/- 1,725)
test hash_set::with_one_new_1000                      ... bench:      59,051 ns/iter (+/- 1,202)
test hash_set::with_one_new_rc_str_1000_baseline      ... bench:      37,986 ns/iter (+/- 771)
master
Mike Gerwitz 2019-12-10 15:32:25 -05:00
parent 0d2bb5de59
commit 176d099fb6
4 changed files with 86 additions and 59 deletions

17
tamer/Cargo.lock generated
View File

@ -1,14 +1,22 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "byteorder"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fixedbitset"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fnv"
version = "1.0.6"
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memchr"
@ -42,14 +50,15 @@ name = "tamer"
version = "0.0.0"
dependencies = [
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-xml 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "86d4de0081402f5e88cdac65c8dcdcc73118c1a7a465e2a05f0da05843a8ea33"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"

View File

@ -23,7 +23,7 @@ lto = true
lto = true
[dependencies]
fnv = ">= 1.0.3"
fxhash = ">= 0.2.1"
petgraph = ">= 0.4.13"
quick-xml = ">= 0.17.0"
# used by petgraph

View File

@ -159,16 +159,16 @@ mod hash_set {
});
}
mod fnv {
mod fx {
use super::*;
use ::fnv::FnvBuildHasher;
use fxhash::FxBuildHasher;
/// This is our baseline with a raw Rc<str>.
#[bench]
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
let mut sut = HashSetSut::<FnvBuildHasher>::new();
let mut sut = HashSetSut::<FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@ -178,8 +178,7 @@ mod hash_set {
let strs = gen_strs(1000);
bench.iter(|| {
let mut sut =
HashSetInterner::<SymbolRc, FnvBuildHasher>::new();
let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@ -188,7 +187,7 @@ mod hash_set {
/// This is our baseline with a raw Rc<str>.
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
bench.iter(|| {
let mut sut: HashSetSut<FnvBuildHasher> = HashSetSut {
let mut sut: HashSetSut<FxBuildHasher> = HashSetSut {
map: HashSet::with_hasher(Default::default()),
};
(0..1000).map(|_| sut.intern("first")).for_each(drop);
@ -198,13 +197,12 @@ mod hash_set {
#[bench]
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
let mut sut =
HashSetInterner::<SymbolRc, FnvBuildHasher>::new();
let mut sut = HashSetInterner::<SymbolRc, FxBuildHasher>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
/// Since FNV is the best-performing, let's build upon it to demonstrate
/// Since Fx is the best-performing, let's build upon it to demonstrate
/// the benefits of with_capacity
#[bench]
fn with_all_new_1000_with_capacity(bench: &mut Bencher) {
@ -213,7 +211,7 @@ mod hash_set {
bench.iter(|| {
let mut sut =
HashSetInterner::<SymbolRc, FnvBuildHasher>::with_capacity(
HashSetInterner::<SymbolRc, FxBuildHasher>::with_capacity(
n,
);
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
@ -298,16 +296,16 @@ mod hash_map {
});
}
mod fnv {
mod fx {
use super::*;
use ::fnv::FnvBuildHasher;
use fxhash::FxBuildHasher;
/// This is our baseline with a raw Rc<str>.
#[bench]
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
let strs = gen_strs(1000);
bench.iter(|| {
let mut sut = HashMapSut::<(), FnvBuildHasher>::new();
let mut sut = HashMapSut::<(), FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@ -318,7 +316,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
HashMapInterner::<SymbolRc, (), FnvBuildHasher>::new();
HashMapInterner::<SymbolRc, (), FxBuildHasher>::new();
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@ -327,7 +325,7 @@ mod hash_map {
/// This is our baseline with a raw Rc<str>.
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
bench.iter(|| {
let mut sut: HashMapSut<(), FnvBuildHasher> = HashMapSut {
let mut sut: HashMapSut<(), FxBuildHasher> = HashMapSut {
map: HashMap::with_hasher(Default::default()),
};
(0..1000).map(|_| sut.intern("first")).for_each(drop);
@ -338,7 +336,7 @@ mod hash_map {
fn with_one_new_1000(bench: &mut Bencher) {
bench.iter(|| {
let mut sut =
HashMapInterner::<SymbolRc, (), FnvBuildHasher>::new();
HashMapInterner::<SymbolRc, (), FxBuildHasher>::new();
(0..1000).map(|_| sut.intern("first")).for_each(drop);
});
}
@ -352,7 +350,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
HashMapInterner::<SymbolRc, (), FnvBuildHasher>::with_capacity(n);
HashMapInterner::<SymbolRc, (), FxBuildHasher>::with_capacity(n);
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
});
}
@ -363,7 +361,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
HashMapInterner::<SymbolRc, u8, FnvBuildHasher>::new();
HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new();
strs.iter().map(|s| sut.intern_meta(&s, 0)).for_each(drop);
});
}
@ -374,7 +372,7 @@ mod hash_map {
bench.iter(|| {
let mut sut =
HashMapInterner::<SymbolRc, u8, FnvBuildHasher>::new();
HashMapInterner::<SymbolRc, u8, FxBuildHasher>::new();
strs.iter()
.map(|s| {
sut.intern(&s);

View File

@ -30,14 +30,14 @@
//! - [`HashSetInterner`] - Intern pool backed by a [`HashSet`].
//! - [`DefaultSetInterner`] - The currently recommended intern pool
//! configuration for symbol interning without metadata.
//! - [`FnvHashSetInterner`] - Intern pool backed by a [`HashSet`] using the
//! Fowler-Noll-Vo hashing algorithm.
//! - [`FxHashSetInterner`] - Intern pool backed by a [`HashSet`] using the
//! [Fx Hash][fxhash] hashing algorithm.
//! - [`HashMapInterner`] - Intern pool backed by a [`HashMap`] to provide
//! associated metadata.
//! - [`DefaultMapInterner`] - The currently recommended intern pool
//! configuration for symbol interning with metadata.
//! - [`FnvHashMapInterner`] - Intern pool backed by a [`HashMap`] using
//! the Fowler-Noll-Vo hashing algorithm.
//! - [`FxHashMapInterner`] - Intern pool backed by a [`HashMap`] using
//! the [Fx Hash][fxhash] hashing algorithm.
//!
//! When basic interning is required,
//! use a set-based interner.
@ -297,12 +297,42 @@
//! for Rust developed by Mozilla for Servo.
//! - [`string-interner`][rust-string-interner] is another string
//! interning library for Rust.
//! - [Rustc interns strings as `Symbol`s][rustc-intern] using an
//! [arena allocator][rustc-arena] and avoids `Rc` by representing
//! symbols as integer values and converting them to strings using a
//! global pool and unsafe rust to cast to a `static` slice.
//! - This was the direction TAMER was initially going to take,
//! but the simplicity of `Rc` was chosen over unsafe rust until
//! such an optimization is needed and its benefits demonstrable.
//! - Rustc identifies symbols by integer value encapsulated within a
//! `Symbol`.
//! TAMER does this at the level of the ASG instead,
//! treating the interner as a separate system.
//!
//! [flyweight pattern]: https://en.wikipedia.org/wiki/Flyweight_pattern
//! [rust-string-cache]: https://github.com/servo/string-cache
//! [rust-string-interner]: https://github.com/robbepop/string-interner
//! [rustc-intern]: https://doc.rust-lang.org/nightly/nightly-rustc/syntax/ast/struct.Name.html
//! [rustc-arena]: https://doc.rust-lang.org/nightly/nightly-rustc/arena/index.html
//!
//! The hash function chosen for this module is [Fx Hash][fxhash].
//!
//! - Rustc previously used the [Fowler-Noll-Vo (FNV)][fnv] hash
//! function,
//! but [now uses Fx Hash][rustc-fx].
//! This was extracted into the [`fxhash`][fxhash] crate,
//! which is used by TAMER.
//! - TAMER originally used FNV,
//! but benchmarks showed that Fx Hash was more performant.
//! - Benchmarks for other hash functions,
//! including FNV,
//! can be found at the [`hash-rs`][hash-rs] project.
//!
//! [fnv]: https://doc.servo.org/fnv/
//! [rustc-fx]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_data_structures/fx/index.html
//! [hash-rs]: https://github.com/Gankra/hash-rs
use ::fnv::FnvBuildHasher;
use fxhash::FxBuildHasher;
use std::collections::hash_map::{Entry, RandomState};
use std::collections::{HashMap, HashSet};
use std::hash::{BuildHasher, Hash};
@ -617,32 +647,27 @@ where
}
}
/// Interner using the [Fowler-Noll-Vo][fnv] (FNV) hashing function.
/// Interner using the [Fx Hash][fxhash] hashing function.
///
/// _This is currently the hash function used by [`DefaultSetInterner`]._
///
/// If the intern pool will contains strings primarily of length  60,
/// and if denial of service is not a concern,
/// then use of the FNV hash function will outperform the default
/// If denial of service is not a concern,
/// then this will outperform the default
/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
/// (which uses SipHash at the time of writing).
/// See intern benchmarks for a comparison.
/// For a comparison of various hashing algorithms,
/// see the [`hash-rs`][hash-rs] project.
///
/// [hash-rs]: https://github.com/Gankra/hash-rs
pub type FnvHashSetInterner<T> = HashSetInterner<T, FnvBuildHasher>;
/// See intern benchmarks for a comparison.
pub type FxHashSetInterner<T> = HashSetInterner<T, FxBuildHasher>;
/// Recommended configuration for set-based interners.
///
/// The choice of this default relies on certain assumptions:
/// - Denial-of-service attacks against the hash function are not a
/// concern; and
/// - Strings to be interned are mostly ≤ 60 characters.
/// The choice of this default relies on the assumption that
/// denial-of-service attacks against the hash function are not a
/// concern.
///
/// For more information on the hashing algorithm,
/// see [`FnvHashSetInterner`].
pub type DefaultSetInterner = FnvHashSetInterner<SymbolRc>;
/// see [`FxHashSetInterner`].
pub type DefaultSetInterner = FxHashSetInterner<SymbolRc>;
/// An interner backed by a [`HashMap`].
///
@ -749,32 +774,27 @@ where
}
}
/// Map interner using the [Fowler-Noll-Vo][fnv] (FNV) hashing function.
/// Interner using the [Fx Hash][fxhash] hashing function.
///
/// _This is currently the hash function used by [`DefaultMapInterner`]._
///
/// If the intern pool will contains strings primarily of length  60,
/// and if denial of service is not a concern,
/// then use of the FNV hash function will outperform the default
/// If denial of service is not a concern,
/// then this will outperform the default
/// [`DefaultHasher`](std::collections::hash_map::DefaultHasher)
/// (which uses SipHash at the time of writing).
/// See intern benchmarks for a comparison.
/// For a comparison of various hashing algorithms,
/// see the [`hash-rs`][hash-rs] project.
///
/// [hash-rs]: https://github.com/Gankra/hash-rs
pub type FnvHashMapInterner<T, M> = HashMapInterner<T, M, FnvBuildHasher>;
/// See intern benchmarks for a comparison.
pub type FxHashMapInterner<T, M> = HashMapInterner<T, M, FxBuildHasher>;
/// Recommended configuration for map-based interners.
///
/// The choice of this default relies on certain assumptions:
/// - Denial-of-service attacks against the hash function are not a
/// concern; and
/// - Strings to be interned are mostly ≤ 60 characters.
/// The choice of this default relies on the assumption that
/// denial-of-service attacks against the hash function are not a
/// concern.
///
/// For more information on the hashing algorithm,
/// see [`FnvHashMapInterner`].
pub type DefaultMapInterner<M> = FnvHashMapInterner<SymbolRc, M>;
/// see [`FxHashMapInterner`].
pub type DefaultMapInterner<M> = FxHashMapInterner<SymbolRc, M>;
#[cfg(test)]
mod test {