tamer: benches/sym.rs: Add additional UTF-8-related tests
The intent of this is to demonstrate how significant of an impact checking byte arrays for UTF-8 validity will have, since the existing tests do not make that clear (a static string in Rust is always valid UTF-8). These benchmarks show that the cost when re-interning an already existing value is +50%. This is important, because the new reader will be interning a _lot_ of duplicate strings, whereas the existing reader operates on byte arrays without interning unless necessary. And, when it does, it does so unchecked. But we'd rather not do that, since we cannot guarantee that those XML files are valid (and not modified in some way). Upcoming commits will have what I think is a reasonable compromise to this, based on the fact that we'll be encountering _many_ duplicate strings in parsing XML files. DEV-10920main
parent
2715f3e845
commit
63e5a0d441
|
@ -164,6 +164,34 @@ mod interner {
|
|||
});
|
||||
}
|
||||
|
||||
// Unlike the above, which already has a UTF-8 string, this actually
|
||||
// performs a conversion and check from `&[u8]`.
|
||||
#[bench]
|
||||
fn with_all_new_1000_utf8_checked(bench: &mut Bencher) {
|
||||
let strs = gen_strs(1000);
|
||||
let bs: Vec<&[u8]> = strs.iter().map(|s| s.as_bytes()).collect();
|
||||
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<FxBuildHasher, u32>::new();
|
||||
bs.iter()
|
||||
.map(|b| sut.intern(std::str::from_utf8(&b).unwrap()))
|
||||
.for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn with_all_new_1000_utf8_unchecked(bench: &mut Bencher) {
|
||||
let strs = gen_strs(1000);
|
||||
let bs: Vec<&[u8]> = strs.iter().map(|s| s.as_bytes()).collect();
|
||||
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<FxBuildHasher, u32>::new();
|
||||
bs.iter()
|
||||
.map(|b| unsafe { sut.intern_utf8_unchecked(&b) })
|
||||
.for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn with_all_new_uninterned_1000(bench: &mut Bencher) {
|
||||
let strs = gen_strs(1000);
|
||||
|
@ -193,6 +221,18 @@ mod interner {
|
|||
});
|
||||
}
|
||||
|
||||
// Unlike the above, which already has a UTF-8 string, this actually
|
||||
// performs a conversion and check from `&[u8]`.
|
||||
#[bench]
|
||||
fn with_one_new_1000_utf8_checked(bench: &mut Bencher) {
|
||||
bench.iter(|| {
|
||||
let sut = ArenaInterner::<FxBuildHasher, u32>::new();
|
||||
(0..1000)
|
||||
.map(|_| sut.intern(std::str::from_utf8(b"first").unwrap()))
|
||||
.for_each(drop);
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn with_one_new_1000_utf8_unchecked(bench: &mut Bencher) {
|
||||
bench.iter(|| {
|
||||
|
|
Loading…
Reference in New Issue