2019-12-06 15:03:29 -05:00
|
|
|
// String internment benchmarks and baselines
|
|
|
|
//
|
2021-07-22 15:00:15 -04:00
|
|
|
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
2020-03-06 11:05:18 -05:00
|
|
|
//
|
|
|
|
// This file is part of TAME.
|
2019-12-06 15:03:29 -05:00
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
//
|
|
|
|
// Note that the baseline tests have a _suffix_ rather than a prefix so that
|
|
|
|
// they are still grouped with the associated test in the output, since it's
|
|
|
|
// sorted lexically by function name.
|
|
|
|
|
|
|
|
#![feature(test)]
|
|
|
|
|
|
|
|
extern crate tamer;
|
|
|
|
extern crate test;
|
|
|
|
|
|
|
|
use std::rc::Rc;
|
|
|
|
use tamer::sym::*;
|
|
|
|
use test::Bencher;
|
|
|
|
|
2019-12-09 23:13:17 -05:00
|
|
|
fn gen_strs(n: usize) -> Vec<String> {
|
|
|
|
(0..n)
|
|
|
|
.map(|n| n.to_string() + "foobarbazquuxlongsymbol")
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2019-12-23 23:26:42 -05:00
|
|
|
mod interner {
|
2019-12-06 15:03:29 -05:00
|
|
|
use super::*;
|
|
|
|
use std::collections::hash_map::RandomState;
|
|
|
|
use std::collections::HashSet;
|
|
|
|
use std::hash::BuildHasher;
|
|
|
|
|
|
|
|
pub struct HashSetSut<S = RandomState>
|
|
|
|
where
|
|
|
|
S: BuildHasher,
|
|
|
|
{
|
|
|
|
pub map: HashSet<Rc<str>, S>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<S> HashSetSut<S>
|
|
|
|
where
|
|
|
|
S: BuildHasher + Default,
|
|
|
|
{
|
|
|
|
#[inline]
|
|
|
|
fn new() -> Self {
|
|
|
|
Self {
|
|
|
|
map: HashSet::with_hasher(Default::default()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn intern(&mut self, value: &str) -> Rc<str> {
|
|
|
|
if !self.map.contains(value) {
|
|
|
|
self.map.insert(value.into());
|
|
|
|
}
|
|
|
|
|
|
|
|
self.map.get(value).unwrap().clone()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// This is our baseline with a raw Rc<str>.
|
|
|
|
#[bench]
|
|
|
|
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
|
|
|
|
let strs = gen_strs(1000);
|
|
|
|
|
|
|
|
bench.iter(|| {
|
|
|
|
let mut sut = HashSetSut::<RandomState>::new();
|
|
|
|
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn with_all_new_1000(bench: &mut Bencher) {
|
|
|
|
let strs = gen_strs(1000);
|
|
|
|
|
|
|
|
bench.iter(|| {
|
2019-12-23 23:26:42 -05:00
|
|
|
let sut = ArenaInterner::<RandomState>::new();
|
2019-12-06 15:03:29 -05:00
|
|
|
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
/// This is our baseline with a raw Rc<str>.
|
|
|
|
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
|
|
|
|
bench.iter(|| {
|
|
|
|
let mut sut = HashSetSut::<RandomState>::new();
|
|
|
|
(0..1000).map(|_| sut.intern("first")).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn with_one_new_1000(bench: &mut Bencher) {
|
|
|
|
bench.iter(|| {
|
2019-12-23 23:26:42 -05:00
|
|
|
let sut = ArenaInterner::<RandomState>::new();
|
2019-12-06 15:03:29 -05:00
|
|
|
(0..1000).map(|_| sut.intern("first")).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-04-29 00:48:07 -04:00
|
|
|
#[bench]
|
|
|
|
fn index_lookup_unique_1000(bench: &mut Bencher) {
|
|
|
|
let sut = ArenaInterner::<RandomState>::new();
|
|
|
|
let strs = gen_strs(1000);
|
|
|
|
|
|
|
|
let syms = strs
|
|
|
|
.iter()
|
|
|
|
.map(|s| sut.intern(s).index())
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
|
|
|
bench.iter(|| {
|
|
|
|
syms.iter().map(|si| sut.index_lookup(*si)).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
tamer::sym: FNV => Fx Hash
For strings of any notable length, Fx Hash outperforms FNV. Rustc also
moved to this hash function and noticed performance
improvements. Fortunately, as was accounted for in the design, this was a
trivial switch.
Here are some benchmarks to back up that claim:
test hash_set::fnv::with_all_new_1000 ... bench: 133,096 ns/iter (+/- 1,430)
test hash_set::fnv::with_all_new_1000_with_capacity ... bench: 82,591 ns/iter (+/- 592)
test hash_set::fnv::with_all_new_rc_str_1000_baseline ... bench: 162,073 ns/iter (+/- 1,277)
test hash_set::fnv::with_one_new_1000 ... bench: 37,334 ns/iter (+/- 256)
test hash_set::fnv::with_one_new_rc_str_1000_baseline ... bench: 18,263 ns/iter (+/- 261)
test hash_set::fx::with_all_new_1000 ... bench: 85,217 ns/iter (+/- 1,111)
test hash_set::fx::with_all_new_1000_with_capacity ... bench: 59,383 ns/iter (+/- 752)
test hash_set::fx::with_all_new_rc_str_1000_baseline ... bench: 98,802 ns/iter (+/- 1,117)
test hash_set::fx::with_one_new_1000 ... bench: 42,484 ns/iter (+/- 1,239)
test hash_set::fx::with_one_new_rc_str_1000_baseline ... bench: 15,000 ns/iter (+/- 233)
test hash_set::with_all_new_1000 ... bench: 137,645 ns/iter (+/- 1,186)
test hash_set::with_all_new_rc_str_1000_baseline ... bench: 163,129 ns/iter (+/- 1,725)
test hash_set::with_one_new_1000 ... bench: 59,051 ns/iter (+/- 1,202)
test hash_set::with_one_new_rc_str_1000_baseline ... bench: 37,986 ns/iter (+/- 771)
2019-12-10 15:32:25 -05:00
|
|
|
mod fx {
|
2019-12-06 15:03:29 -05:00
|
|
|
use super::*;
|
tamer::sym: FNV => Fx Hash
For strings of any notable length, Fx Hash outperforms FNV. Rustc also
moved to this hash function and noticed performance
improvements. Fortunately, as was accounted for in the design, this was a
trivial switch.
Here are some benchmarks to back up that claim:
test hash_set::fnv::with_all_new_1000 ... bench: 133,096 ns/iter (+/- 1,430)
test hash_set::fnv::with_all_new_1000_with_capacity ... bench: 82,591 ns/iter (+/- 592)
test hash_set::fnv::with_all_new_rc_str_1000_baseline ... bench: 162,073 ns/iter (+/- 1,277)
test hash_set::fnv::with_one_new_1000 ... bench: 37,334 ns/iter (+/- 256)
test hash_set::fnv::with_one_new_rc_str_1000_baseline ... bench: 18,263 ns/iter (+/- 261)
test hash_set::fx::with_all_new_1000 ... bench: 85,217 ns/iter (+/- 1,111)
test hash_set::fx::with_all_new_1000_with_capacity ... bench: 59,383 ns/iter (+/- 752)
test hash_set::fx::with_all_new_rc_str_1000_baseline ... bench: 98,802 ns/iter (+/- 1,117)
test hash_set::fx::with_one_new_1000 ... bench: 42,484 ns/iter (+/- 1,239)
test hash_set::fx::with_one_new_rc_str_1000_baseline ... bench: 15,000 ns/iter (+/- 233)
test hash_set::with_all_new_1000 ... bench: 137,645 ns/iter (+/- 1,186)
test hash_set::with_all_new_rc_str_1000_baseline ... bench: 163,129 ns/iter (+/- 1,725)
test hash_set::with_one_new_1000 ... bench: 59,051 ns/iter (+/- 1,202)
test hash_set::with_one_new_rc_str_1000_baseline ... bench: 37,986 ns/iter (+/- 771)
2019-12-10 15:32:25 -05:00
|
|
|
use fxhash::FxBuildHasher;
|
2019-12-06 15:03:29 -05:00
|
|
|
|
|
|
|
/// This is our baseline with a raw Rc<str>.
|
|
|
|
#[bench]
|
|
|
|
fn with_all_new_rc_str_1000_baseline(bench: &mut Bencher) {
|
|
|
|
let strs = gen_strs(1000);
|
|
|
|
bench.iter(|| {
|
tamer::sym: FNV => Fx Hash
For strings of any notable length, Fx Hash outperforms FNV. Rustc also
moved to this hash function and noticed performance
improvements. Fortunately, as was accounted for in the design, this was a
trivial switch.
Here are some benchmarks to back up that claim:
test hash_set::fnv::with_all_new_1000 ... bench: 133,096 ns/iter (+/- 1,430)
test hash_set::fnv::with_all_new_1000_with_capacity ... bench: 82,591 ns/iter (+/- 592)
test hash_set::fnv::with_all_new_rc_str_1000_baseline ... bench: 162,073 ns/iter (+/- 1,277)
test hash_set::fnv::with_one_new_1000 ... bench: 37,334 ns/iter (+/- 256)
test hash_set::fnv::with_one_new_rc_str_1000_baseline ... bench: 18,263 ns/iter (+/- 261)
test hash_set::fx::with_all_new_1000 ... bench: 85,217 ns/iter (+/- 1,111)
test hash_set::fx::with_all_new_1000_with_capacity ... bench: 59,383 ns/iter (+/- 752)
test hash_set::fx::with_all_new_rc_str_1000_baseline ... bench: 98,802 ns/iter (+/- 1,117)
test hash_set::fx::with_one_new_1000 ... bench: 42,484 ns/iter (+/- 1,239)
test hash_set::fx::with_one_new_rc_str_1000_baseline ... bench: 15,000 ns/iter (+/- 233)
test hash_set::with_all_new_1000 ... bench: 137,645 ns/iter (+/- 1,186)
test hash_set::with_all_new_rc_str_1000_baseline ... bench: 163,129 ns/iter (+/- 1,725)
test hash_set::with_one_new_1000 ... bench: 59,051 ns/iter (+/- 1,202)
test hash_set::with_one_new_rc_str_1000_baseline ... bench: 37,986 ns/iter (+/- 771)
2019-12-10 15:32:25 -05:00
|
|
|
let mut sut = HashSetSut::<FxBuildHasher>::new();
|
2019-12-06 15:03:29 -05:00
|
|
|
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn with_all_new_1000(bench: &mut Bencher) {
|
|
|
|
let strs = gen_strs(1000);
|
|
|
|
|
|
|
|
bench.iter(|| {
|
2019-12-23 23:26:42 -05:00
|
|
|
let sut = ArenaInterner::<FxBuildHasher>::new();
|
2019-12-06 15:03:29 -05:00
|
|
|
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
/// This is our baseline with a raw Rc<str>.
|
|
|
|
fn with_one_new_rc_str_1000_baseline(bench: &mut Bencher) {
|
|
|
|
bench.iter(|| {
|
tamer::sym: FNV => Fx Hash
For strings of any notable length, Fx Hash outperforms FNV. Rustc also
moved to this hash function and noticed performance
improvements. Fortunately, as was accounted for in the design, this was a
trivial switch.
Here are some benchmarks to back up that claim:
test hash_set::fnv::with_all_new_1000 ... bench: 133,096 ns/iter (+/- 1,430)
test hash_set::fnv::with_all_new_1000_with_capacity ... bench: 82,591 ns/iter (+/- 592)
test hash_set::fnv::with_all_new_rc_str_1000_baseline ... bench: 162,073 ns/iter (+/- 1,277)
test hash_set::fnv::with_one_new_1000 ... bench: 37,334 ns/iter (+/- 256)
test hash_set::fnv::with_one_new_rc_str_1000_baseline ... bench: 18,263 ns/iter (+/- 261)
test hash_set::fx::with_all_new_1000 ... bench: 85,217 ns/iter (+/- 1,111)
test hash_set::fx::with_all_new_1000_with_capacity ... bench: 59,383 ns/iter (+/- 752)
test hash_set::fx::with_all_new_rc_str_1000_baseline ... bench: 98,802 ns/iter (+/- 1,117)
test hash_set::fx::with_one_new_1000 ... bench: 42,484 ns/iter (+/- 1,239)
test hash_set::fx::with_one_new_rc_str_1000_baseline ... bench: 15,000 ns/iter (+/- 233)
test hash_set::with_all_new_1000 ... bench: 137,645 ns/iter (+/- 1,186)
test hash_set::with_all_new_rc_str_1000_baseline ... bench: 163,129 ns/iter (+/- 1,725)
test hash_set::with_one_new_1000 ... bench: 59,051 ns/iter (+/- 1,202)
test hash_set::with_one_new_rc_str_1000_baseline ... bench: 37,986 ns/iter (+/- 771)
2019-12-10 15:32:25 -05:00
|
|
|
let mut sut: HashSetSut<FxBuildHasher> = HashSetSut {
|
2019-12-06 15:03:29 -05:00
|
|
|
map: HashSet::with_hasher(Default::default()),
|
|
|
|
};
|
|
|
|
(0..1000).map(|_| sut.intern("first")).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn with_one_new_1000(bench: &mut Bencher) {
|
|
|
|
bench.iter(|| {
|
2019-12-23 23:26:42 -05:00
|
|
|
let sut = ArenaInterner::<FxBuildHasher>::new();
|
2019-12-06 15:03:29 -05:00
|
|
|
(0..1000).map(|_| sut.intern("first")).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-01-24 11:09:24 -05:00
|
|
|
#[bench]
|
|
|
|
fn with_one_new_1000_utf8_unchecked(bench: &mut Bencher) {
|
|
|
|
bench.iter(|| {
|
|
|
|
let sut = ArenaInterner::<FxBuildHasher>::new();
|
|
|
|
(0..1000)
|
|
|
|
.map(|_| unsafe { sut.intern_utf8_unchecked(b"first") })
|
|
|
|
.for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
tamer::sym: FNV => Fx Hash
For strings of any notable length, Fx Hash outperforms FNV. Rustc also
moved to this hash function and noticed performance
improvements. Fortunately, as was accounted for in the design, this was a
trivial switch.
Here are some benchmarks to back up that claim:
test hash_set::fnv::with_all_new_1000 ... bench: 133,096 ns/iter (+/- 1,430)
test hash_set::fnv::with_all_new_1000_with_capacity ... bench: 82,591 ns/iter (+/- 592)
test hash_set::fnv::with_all_new_rc_str_1000_baseline ... bench: 162,073 ns/iter (+/- 1,277)
test hash_set::fnv::with_one_new_1000 ... bench: 37,334 ns/iter (+/- 256)
test hash_set::fnv::with_one_new_rc_str_1000_baseline ... bench: 18,263 ns/iter (+/- 261)
test hash_set::fx::with_all_new_1000 ... bench: 85,217 ns/iter (+/- 1,111)
test hash_set::fx::with_all_new_1000_with_capacity ... bench: 59,383 ns/iter (+/- 752)
test hash_set::fx::with_all_new_rc_str_1000_baseline ... bench: 98,802 ns/iter (+/- 1,117)
test hash_set::fx::with_one_new_1000 ... bench: 42,484 ns/iter (+/- 1,239)
test hash_set::fx::with_one_new_rc_str_1000_baseline ... bench: 15,000 ns/iter (+/- 233)
test hash_set::with_all_new_1000 ... bench: 137,645 ns/iter (+/- 1,186)
test hash_set::with_all_new_rc_str_1000_baseline ... bench: 163,129 ns/iter (+/- 1,725)
test hash_set::with_one_new_1000 ... bench: 59,051 ns/iter (+/- 1,202)
test hash_set::with_one_new_rc_str_1000_baseline ... bench: 37,986 ns/iter (+/- 771)
2019-12-10 15:32:25 -05:00
|
|
|
/// Since Fx is the best-performing, let's build upon it to demonstrate
|
2019-12-06 15:03:29 -05:00
|
|
|
/// the benefits of with_capacity
|
|
|
|
#[bench]
|
|
|
|
fn with_all_new_1000_with_capacity(bench: &mut Bencher) {
|
|
|
|
let n = 1000;
|
|
|
|
let strs = gen_strs(n);
|
|
|
|
|
|
|
|
bench.iter(|| {
|
2019-12-23 23:26:42 -05:00
|
|
|
let sut = ArenaInterner::<FxBuildHasher>::with_capacity(n);
|
2019-12-06 15:03:29 -05:00
|
|
|
strs.iter().map(|s| sut.intern(&s)).for_each(drop);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|