tame/tamer/src/sym/prefill.rs

270 lines
8.9 KiB
Rust

// Pre-interned strings
//
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Pre-interned strings.
//!
//! These strings are expected to be encountered nearly every run,
//! and substitute static strings that would otherwise appear hard-coded
//! in the system and have to be interned to be compared against other
//! values.
//!
//! See the [parent module](super) for more information.
use super::{Interner, SymbolId, SymbolIndexSize};
use crate::global;
use std::array;
/// Statically-allocated symbol.
///
/// This symbol is generated at compile-time and expected to be available in
/// any global interner once it has been initialized.
///
/// The size of this symbol is as small as possible to hold the necessary
/// number of values.
///
/// This symbol contains a number of `const` methods,
/// allowing for this symbol to be easily used to construct static
/// newtypes.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct StaticSymbolId(u8);
impl StaticSymbolId {
/// Cast static symbol into a [`SymbolId`] suitable for the global
/// program-level interner.
///
/// This is safe since global interners will always contain this
/// symbol before it can be read.
pub const fn as_prog_sym(self) -> SymbolId<global::ProgSymSize> {
SymbolId(unsafe {
<global::ProgSymSize as SymbolIndexSize>::NonZero::new_unchecked(
self.0 as global::ProgSymSize,
)
})
}
/// Cast static symbol into a [`SymbolId`] suitable for the global
/// package-level interner.
///
/// This is safe since global interners will always contain this
/// symbol before it can be read.
pub const fn as_pkg_sym(self) -> SymbolId<global::PkgSymSize> {
SymbolId(unsafe {
<global::PkgSymSize as SymbolIndexSize>::NonZero::new_unchecked(
self.0 as global::PkgSymSize,
)
})
}
pub const fn as_usize(self) -> usize {
self.0 as usize
}
}
impl From<StaticSymbolId> for SymbolId<global::ProgSymSize> {
fn from(st: StaticSymbolId) -> Self {
st.as_prog_sym()
}
}
impl From<StaticSymbolId> for SymbolId<global::PkgSymSize> {
fn from(st: StaticSymbolId) -> Self {
st.as_pkg_sym()
}
}
/// Generate symbols of size [`global::ProgSymSize`] for preinterned strings.
///
/// These symbols,
/// rather than being generated by the global internment system,
/// are generated statically.
/// Once the global interner is initialized
/// (see [parent module](`super`)),
/// which is on first access,
/// these symbols will reference valid values.
macro_rules! static_symbol_consts {
(@i $i:expr; $name:ident: $str:expr, $($ti:ident: $ts:expr,)*) => {
#[doc=concat!("Interned string `\"", $str, "\"`.")]
#[allow(non_upper_case_globals)]
pub const $name: StaticSymbolId = StaticSymbolId($i);
// Recurse until no tail is left (terminating condition below).
static_symbol_consts!{
// This will result in 1 + 1 + 1 + 1 ... and will eventually hit
// the recursion limit if we have too many static symbols, after
// which time we may have to switch methodology.
@i $i + 1;
$($ti: $ts,)*
}
};
// Terminating condition.
(@i $i:expr;) => {}
}
/// Statically allocate [`SymbolId`]s for the provided symbols,
/// and schedule their static strings to be interned upon initialization
/// of the global interner.
///
/// This generates [`fill`],
/// which the global interners call by default.
/// Any interner may optionally invoke this,
/// immediately after initialization,
/// /before/ any internment requests.
macro_rules! static_symbols {
($($name:ident : $str:expr),*) => {
/// Static symbols (pre-allocated).
///
/// Each of the constants in this module represent a [`SymbolId`]
/// statically allocated at compile-time.
/// The strings that they represent are automatically populated into
/// the global interners when the interner is first accessed.
///
/// See [`crate::sym`] for more information on static symbols.
///
/// `static` is a keyword in Rust,
/// so we shorten the module name to `st`.
pub mod st {
use super::*;
static_symbol_consts! {
// Index 0 is not valid, so begin at 1
@i 1;
$(
$name: $str,
)*
}
}
/// Fill a new interner with static symbols.
///
/// Panics
/// ======
/// This function will panic if the interner has any symbols,
/// which would cause misalignment with the generated constants.
pub(super) fn fill<'a, I, Ix>(interner: I) -> I
where
I: Interner<'a, Ix>,
Ix: SymbolIndexSize
{
assert!(
interner.len() == 0,
"cannot fill non-empty Interner with static symbols"
);
// This array does not exist as a constant, because that would
// require that we count the number of items first for the
// sake of the type definition.
// This is more convenient.
array::IntoIter::new([
$(
$str,
)*
]).for_each(|sym| { interner.intern(sym); });
interner
}
}
}
// Static symbols that will have their strings interned upon global
// interner initialization.
//
// Each of these generates a constant of the same name with a [`SymbolId`].
// This symbol is constant,
// generated at compile-time,
// and is intended to be used with a global interner.
// Since a global interner is initialized on first use,
// which in turn populates the interner using [`fill`] above,
// this constant will always represent a valid global symbol within the
// context of reads.
//
// The constants are not all-uppercase,
// which creates the illusion that the symbols were dynamically generated;
// this isn't entirely false,
// given that internment _is_ a runtime operation even for these
// symbols.
//
// Certain symbols are Rust identifiers,
// and therefore begin with a capital letter;
// this is also done by rustc
// (see https://doc.rust-lang.org/nightly/nightly-rustc/src/rustc_span/symbol.rs.html).
//
// See parent documentation for more information.
//
// These end up in the `st` module,
// which is re-exported by the parent module.
static_symbols! {
// Index begins at 1, since 0 is reserved during interner initialization
True: "true",
False: "false",
// [Symbols will be added here as they are needed.]
// Marker indicating the end of the static symbols
END_STATIC: "{{end static}}"
}
#[cfg(test)]
mod test {
use super::st;
use crate::{
global,
sym::{GlobalSymbolIntern, SymbolId},
};
macro_rules! global_sanity_check {
($name:ident, $ix:ty, $method:ident) => {
#[test]
fn $name() {
type Ix = $ix;
// If we _don't_ prefill, make sure we're not starting at the first
// offset when interning, otherwise it'll look correct.
let new: SymbolId<Ix> = "force offset".intern();
assert!(
new.as_usize() > st::END_STATIC.as_usize(),
"a new global symbol allocation was not > END_STATIC, \
indicating that prefill is not working!"
);
// Further sanity check to make sure indexes align as expected,
// not that you wouldn't otherwise notice that the whole system is
// broken, but this ought to offer a more direct hint as to what
// went wrong.
assert_eq!(st::True.$method(), "true".intern());
assert_eq!(st::False.$method(), "false".intern());
}
};
}
global_sanity_check!(
global_sanity_check_prog,
global::ProgSymSize,
as_prog_sym
);
global_sanity_check!(
global_sanity_check_pkg,
global::PkgSymSize,
as_pkg_sym
);
}