tamer: sym::prefill: Introduce static symbols
This is the beginning of static symbols, which is becoming increasing necessary as it's quite a pain to have to deal with interning static strings any place they're used. It's _more_ of a pain to do that in conjunction with newtypes (e.g. `QName`, `AttValue`, etc) that make use of `SymbolId`; this will allow us to construct _those_ statically as well, and additional work to support that will be coming up. DEV-10701main
parent
e0a209d417
commit
366fef714b
|
@ -45,6 +45,9 @@ pub type NonZeroPkgSymSize = num::NonZeroU16;
|
|||
/// A size capable of representing every interned string in a program.
|
||||
pub type ProgSymSize = u32;
|
||||
|
||||
/// The initial capacity for global interners.
|
||||
pub const INIT_GLOBAL_INTERNER_CAPACITY: usize = 1024;
|
||||
|
||||
/// A non-zero equivalent of [`ProgSymSize`];
|
||||
pub type NonZeroProgSymSize = num::NonZeroU32;
|
||||
|
||||
|
|
|
@ -218,6 +218,23 @@
|
|||
//! if you utilize interners for any other purpose,
|
||||
//! it is advised that you create newtypes for their [`SymbolId`]s.
|
||||
//!
|
||||
//! Static Symbols
|
||||
//! --------------
|
||||
//! Since nearly every string in the system is represented by a symbol,
|
||||
//! comparing against static string slices would require awkward interning
|
||||
//! of a static string at each relevant point in the program.
|
||||
//! Instead,
|
||||
//! common static strings are pre-interned when the global interner is
|
||||
//! first initialized.
|
||||
//!
|
||||
//! These symbols are allocated statically,
|
||||
//! so they can be used in `const` expressions and include additional
|
||||
//! metadata allowing for safe type conversions in circumstances that
|
||||
//! aren't typically permitted.
|
||||
//! This further allows constructing symbol newtypes at compile-time.
|
||||
//!
|
||||
//! These symbol constants can be found in the [`st`] module.
|
||||
//!
|
||||
//! Uninterned Symbols
|
||||
//! ------------------
|
||||
//! Interners are able to allocate a [`SymbolId`] without interning,
|
||||
|
@ -308,8 +325,7 @@
|
|||
//! - Rustc's [`newtype_index!` macro][rustc-nt] uses
|
||||
//! [`NonZeroU32`] so that [`Option`] uses no
|
||||
//! additional space (see [pull request `53315`][rustc-nt-pr]).
|
||||
//! - Differences between TAMER and Rustc's implementations are outlined
|
||||
//! above.
|
||||
//! - Rustc also [prefills interners][rustc-intern] with common symbols.
|
||||
//!
|
||||
//! [flyweight pattern]: https://en.wikipedia.org/wiki/Flyweight_pattern
|
||||
//! [rust-string-cache]: https://github.com/servo/string-cache
|
||||
|
@ -338,8 +354,11 @@
|
|||
//! [hash-rs]: https://github.com/Gankra/hash-rs
|
||||
|
||||
mod interner;
|
||||
mod prefill;
|
||||
mod symbol;
|
||||
|
||||
pub use prefill::st;
|
||||
|
||||
pub use interner::{
|
||||
ArenaInterner, DefaultInterner, DefaultPkgInterner, DefaultProgInterner,
|
||||
FxArenaInterner, Interner,
|
||||
|
|
|
@ -0,0 +1,201 @@
|
|||
// Pre-interned strings
|
||||
//
|
||||
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//! Pre-interned strings.
|
||||
//!
|
||||
//! These strings are expected to be encountered nearly every run,
|
||||
//! and substitute static strings that would otherwise appear hard-coded
|
||||
//! in the system and have to be interned to be compared against other
|
||||
//! values.
|
||||
//!
|
||||
//! See the [parent module](super) for more information.
|
||||
|
||||
use super::{Interner, SymbolId, SymbolIndexSize};
|
||||
use crate::global;
|
||||
use std::array;
|
||||
|
||||
type NonZero = <global::ProgSymSize as SymbolIndexSize>::NonZero;
|
||||
|
||||
/// Generate symbols of size [`global::ProgSymSize`] for preinterned strings.
|
||||
///
|
||||
/// These symbols,
|
||||
/// rather than being generated by the global internment system,
|
||||
/// are generated statically.
|
||||
/// Once the global interner is initialized
|
||||
/// (see [parent module](`super`)),
|
||||
/// which is on first access,
|
||||
/// these symbols will reference valid values.
|
||||
macro_rules! static_symbol_consts {
|
||||
(@i $i:expr; $name:ident: $str:expr, $($ti:ident: $ts:expr,)*) => {
|
||||
#[doc=concat!("Interned string `\"", $str, "\"`.")]
|
||||
#[allow(non_upper_case_globals)]
|
||||
pub const $name: SymbolId<global::ProgSymSize> = unsafe {
|
||||
SymbolId(NonZero::new_unchecked($i))
|
||||
};
|
||||
|
||||
// Recurse until no tail is left (terminating condition below).
|
||||
static_symbol_consts!{
|
||||
// This will result in 1 + 1 + 1 + 1 ... and will eventually hit
|
||||
// the recursion limit if we have too many static symbols, after
|
||||
// which time we may have to switch methodology.
|
||||
@i $i + 1;
|
||||
|
||||
$($ti: $ts,)*
|
||||
}
|
||||
};
|
||||
|
||||
// Terminating condition.
|
||||
(@i $i:expr;) => {}
|
||||
}
|
||||
|
||||
/// Statically allocate [`SymbolId`]s for the provided symbols,
|
||||
/// and schedule their static strings to be interned upon initialization
|
||||
/// of the global interner.
|
||||
///
|
||||
/// This generates [`fill`],
|
||||
/// which the global interners call by default.
|
||||
/// Any interner may optionally invoke this,
|
||||
/// immediately after initialization,
|
||||
/// /before/ any internment requests.
|
||||
macro_rules! static_symbols {
|
||||
($($name:ident : $str:expr),*) => {
|
||||
/// Static symbols (pre-allocated).
|
||||
///
|
||||
/// Each of the constants in this module represent a [`SymbolId`]
|
||||
/// statically allocated at compile-time.
|
||||
/// The strings that they represent are automatically populated into
|
||||
/// the global interners when the interner is first accessed.
|
||||
///
|
||||
/// See [`crate::sym`] for more information on static symbols.
|
||||
///
|
||||
/// `static` is a keyword in Rust,
|
||||
/// so we shorten the module name to `st`.
|
||||
pub mod st {
|
||||
use super::*;
|
||||
|
||||
static_symbol_consts! {
|
||||
// Index 0 is not valid, so begin at 1
|
||||
@i 1;
|
||||
|
||||
$(
|
||||
$name: $str,
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
/// Fill a new interner with static symbols.
|
||||
///
|
||||
/// Panics
|
||||
/// ======
|
||||
/// This function will panic if the interner has any symbols,
|
||||
/// which would cause misalignment with the generated constants.
|
||||
pub(super) fn fill<'a, I, Ix>(interner: I) -> I
|
||||
where
|
||||
I: Interner<'a, Ix>,
|
||||
Ix: SymbolIndexSize
|
||||
{
|
||||
assert!(
|
||||
interner.len() == 0,
|
||||
"cannot fill non-empty Interner with static symbols"
|
||||
);
|
||||
|
||||
// This array does not exist as a constant, because that would
|
||||
// require that we count the number of items first for the
|
||||
// sake of the type definition.
|
||||
// This is more convenient.
|
||||
array::IntoIter::new([
|
||||
$(
|
||||
$str,
|
||||
)*
|
||||
]).for_each(|sym| { interner.intern(sym); });
|
||||
|
||||
interner
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Static symbols that will have their strings interned upon global
|
||||
// interner initialization.
|
||||
//
|
||||
// Each of these generates a constant of the same name with a [`SymbolId`].
|
||||
// This symbol is constant,
|
||||
// generated at compile-time,
|
||||
// and is intended to be used with a global interner.
|
||||
// Since a global interner is initialized on first use,
|
||||
// which in turn populates the interner using [`fill`] above,
|
||||
// this constant will always represent a valid global symbol within the
|
||||
// context of reads.
|
||||
//
|
||||
// The constants are not all-uppercase,
|
||||
// which creates the illusion that the symbols were dynamically generated;
|
||||
// this isn't entirely false,
|
||||
// given that internment _is_ a runtime operation even for these
|
||||
// symbols.
|
||||
//
|
||||
// Certain symbols are Rust identifiers,
|
||||
// and therefore begin with a capital letter;
|
||||
// this is also done by rustc
|
||||
// (see https://doc.rust-lang.org/nightly/nightly-rustc/src/rustc_span/symbol.rs.html).
|
||||
//
|
||||
// See parent documentation for more information.
|
||||
//
|
||||
// These end up in the `st` module,
|
||||
// which is re-exported by the parent module.
|
||||
static_symbols! {
|
||||
// Index begins at 1, since 0 is reserved during interner initialization
|
||||
True: "true",
|
||||
False: "false",
|
||||
|
||||
// [Symbols will be added here as they are needed.]
|
||||
|
||||
// Marker indicating the end of the static symbols
|
||||
END_STATIC: "{{end static}}"
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::st;
|
||||
use crate::{
|
||||
global,
|
||||
sym::{GlobalSymbolIntern, SymbolId},
|
||||
};
|
||||
|
||||
type Ix = global::ProgSymSize;
|
||||
|
||||
// The global interners are instantiated with the prefill.
|
||||
#[test]
|
||||
fn global_sanity_check() {
|
||||
// If we _don't_ prefill, make sure we're not starting at the first
|
||||
// offset when interning, otherwise it'll look correct.
|
||||
let new: SymbolId<Ix> = "force offset".intern();
|
||||
|
||||
assert!(
|
||||
new.as_usize() > st::END_STATIC.as_usize(),
|
||||
"a new global symbol allocation was not > END_STATIC, \
|
||||
indicating that prefill is not working!"
|
||||
);
|
||||
|
||||
// Further sanity check to make sure indexes align as expected,
|
||||
// not that you wouldn't otherwise notice that the whole system is
|
||||
// broken, but this ought to offer a more direct hint as to what
|
||||
// went wrong.
|
||||
assert_eq!(st::True, "true".intern());
|
||||
assert_eq!(st::False, "false".intern());
|
||||
}
|
||||
}
|
|
@ -55,7 +55,7 @@ use std::thread::LocalKey;
|
|||
/// see either [`GlobalSymbolResolve::lookup_str`] or
|
||||
/// [`Interner::index_lookup`].
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct SymbolId<Ix: SymbolIndexSize>(Ix::NonZero);
|
||||
pub struct SymbolId<Ix: SymbolIndexSize>(pub(super) Ix::NonZero);
|
||||
assert_eq_size!(Option<SymbolId<u16>>, SymbolId<u16>);
|
||||
|
||||
/// Identifier of a symbol within a single package.
|
||||
|
@ -153,7 +153,9 @@ pub trait SymbolIndexSize:
|
|||
macro_rules! supported_symbol_index {
|
||||
($prim:ty, $nonzero:ty, $interner:ty, $global:ident) => {
|
||||
thread_local! {
|
||||
pub(super) static $global: $interner = <$interner>::new();
|
||||
pub(super) static $global: $interner = super::prefill::fill(
|
||||
<$interner>::with_capacity(global::INIT_GLOBAL_INTERNER_CAPACITY)
|
||||
);
|
||||
}
|
||||
|
||||
impl SymbolIndexSize for $prim {
|
||||
|
|
Loading…
Reference in New Issue