tamer: sym: 16-bit static symbol prefill

The 16-bit interner at present will be used only for span contexts.  In the
future, this interner may become specialized specifically for that, but for
now let's just re-use what we already have so that I can move on.

DEV-10733
main
Mike Gerwitz 2021-09-28 10:39:20 -04:00
parent 96b16c6de9
commit 863d990cbd
3 changed files with 114 additions and 92 deletions

View File

@ -224,7 +224,8 @@
//! symbol newtypes and objects composed of symbols are able to be
//! statically constructed as well.
//!
//! These generated symbol constants can be found in the [`st`] module.
//! These generated symbol constants can be found in the [`st`] and [`st16`]
//! modules.
//!
//! Uninterned Symbols
//! ------------------

View File

@ -30,13 +30,9 @@ use super::{Interner, SymbolId, SymbolIndexSize};
use crate::global;
use std::array;
/// A size that is as small as possible to hold the necessary number of
/// values.
type StaticSymbolSize = u8;
/// Generate a newtype containing a condensed [`SymbolId`].
macro_rules! static_symbol_newtype {
($(#[$attr:meta])* $name:ident) => {
($(#[$attr:meta])* $name:ident<$size:ty>) => {
$(#[$attr])*
#[doc=""]
/// This is a statically-allocated symbol.
@ -45,11 +41,13 @@ macro_rules! static_symbol_newtype {
/// available in the 32-bit global interner once it has been
/// initialized.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct $name(StaticSymbolSize);
pub struct $name(<$size as SymbolIndexSize>::NonZero);
impl $name {
const fn new(id: StaticSymbolSize) -> Self {
Self(id)
const fn new(id: $size) -> Self {
Self(unsafe {
<$size as SymbolIndexSize>::NonZero::new_unchecked(id)
})
}
/// Cast static symbol into a [`SymbolId`] suitable for the global
@ -57,20 +55,16 @@ macro_rules! static_symbol_newtype {
///
/// This is safe since global interner will always contain this
/// symbol before it can be read.
pub const fn as_sym(self) -> SymbolId<global::ProgSymSize> {
SymbolId(unsafe {
<global::ProgSymSize as SymbolIndexSize>::NonZero::new_unchecked(
self.0 as global::ProgSymSize,
)
})
pub const fn as_sym(self) -> SymbolId<$size> {
SymbolId(self.0)
}
pub const fn as_usize(self) -> usize {
self.0 as usize
self.0.get() as usize
}
}
impl From<$name> for SymbolId<global::ProgSymSize> {
impl From<$name> for SymbolId<$size> {
fn from(st: $name) -> Self {
st.as_sym()
}
@ -82,9 +76,9 @@ macro_rules! static_symbol_newtype {
/// can be used to take a short identifier and convert it into its full
/// type identifier.
macro_rules! static_symbol_newtypes {
($($(#[$attr:meta])* $short:ident: $ty:ident),*) => {
($($(#[$attr:meta])* $short:ident: $ty:ident<$size:ty>,)*) => {
$(
static_symbol_newtype!($(#[$attr])* $ty);
static_symbol_newtype!($(#[$attr])* $ty<$size>);
)*
macro_rules! static_symbol_ty {
@ -93,11 +87,6 @@ macro_rules! static_symbol_newtypes {
$ty
};
)*
// Just some string value; a misc case.
(str) => {
StaticSymbolId
};
}
}
}
@ -112,7 +101,7 @@ macro_rules! static_symbol_newtypes {
/// which is on first access,
/// these symbols will reference valid values.
macro_rules! static_symbol_consts {
(@i $i:expr; $name:ident: $ty:ident $str:expr, $($tail:tt)*) => {
(@i $i:expr; <$size:ty> $name:ident: $ty:ident $str:expr, $($tail:tt)*) => {
#[doc=concat!(
"Interned `",
stringify!($ty),
@ -129,13 +118,14 @@ macro_rules! static_symbol_consts {
// the recursion limit if we have too many static symbols, after
// which time we may have to switch methodology.
@i $i + 1;
<$size>
$($tail)*
}
};
// Terminating condition.
(@i $i:expr;) => {}
(@i $i:expr; <$size:ty>) => {}
}
/// Statically allocate [`SymbolId`]s for the provided symbols,
@ -148,37 +138,15 @@ macro_rules! static_symbol_consts {
/// immediately after initialization,
/// /before/ any internment requests.
macro_rules! static_symbols {
($($name:ident : $ty:ident $str:expr),*) => {
/// Static symbols (pre-allocated).
///
/// Each of the constants in this module represent a [`SymbolId`]
/// statically allocated at compile-time.
/// The strings that they represent are automatically populated into
/// the global interners when the interner is first accessed.
///
/// _You should always use the generated constant to reference these
/// symbols!_
/// Do not rely upon their integer value,
/// as it _will_ change over time.
/// The sole exception is to use marker symbols to identify ranges
/// of symbols;
/// see [`MarkStaticSymbolId`].
///
/// See [`crate::sym`] for more information on static symbols.
///
/// `static` is a keyword in Rust,
/// so we shorten the module name to `st`.
pub mod st {
use super::*;
(<$size:ty>; $($name:ident : $ty:ident $str:expr),*) => {
static_symbol_consts! {
// Index 0 is not valid, so begin at 1
@i 1;
<$size>
static_symbol_consts! {
// Index 0 is not valid, so begin at 1
@i 1;
$(
$name: $ty $str,
)*
}
$(
$name: $ty $str,
)*
}
/// Fill a new interner with static symbols.
@ -187,7 +155,7 @@ macro_rules! static_symbols {
/// ======
/// This function will panic if the interner has any symbols,
/// which would cause misalignment with the generated constants.
pub(super) fn fill<'a, I, Ix>(interner: I) -> I
pub(in super::super) fn fill<'a, I, Ix>(interner: I) -> I
where
I: Interner<'a, Ix>,
Ix: SymbolIndexSize
@ -217,51 +185,91 @@ static_symbol_newtypes! {
///
/// This is the traditional `[a-zA-Z_][a-zA-Z0-9_]*`,
/// common in many programming languages.
cid: CIdentStaticSymbolId,
cid: CIdentStaticSymbolId<global::ProgSymSize>,
/// This symbol serves only as a marker in the internment pool to
/// delimit symbol ranges;
/// its string value is incidental and should not be relied upon.
mark: MarkStaticSymbolId
mark: MarkStaticSymbolId<global::ProgSymSize>,
/// Static 16-bit [`Span`](crate::span::Span) context.
///
/// These contexts are intended for use in generated code where a better
/// context cannot be derived.
ctx: ContextStaticSymbolId<u16>,
}
// Static symbols that will have their strings interned upon global
// interner initialization.
//
// Each of these generates a constant of the same name with a [`SymbolId`].
// This symbol is constant,
// generated at compile-time,
// and is intended to be used with a global interner.
// Since a global interner is initialized on first use,
// which in turn populates the interner using [`fill`] above,
// this constant will always represent a valid global symbol within the
// context of reads.
//
// The constants follow a naming convention:
// - `L_` indicates that the identifier is all-lowercase.
//
// See parent documentation for more information.
//
// These end up in the `st` module,
// which is re-exported by the parent module.
static_symbols! {
// Index begins at 1, since 0 is reserved during interner initialization
L_TRUE: cid "true",
L_FALSE: cid "false",
/// Static symbols (pre-allocated).
///
/// Each of the constants in this module represent a [`SymbolId`] statically
/// allocated at compile-time.
/// The strings that they represent are automatically populated into the
/// global interners when the interner is first accessed.
///
/// _You should always use the generated constant to reference these
/// symbols!_
/// Do not rely upon their integer value,
/// as it _will_ change over time.
/// The sole exception is to use marker symbols to identify ranges
/// of symbols;
/// see [`MarkStaticSymbolId`].
///
/// See [`crate::sym`] for more information on static symbols.
///
/// `static` is a keyword in Rust,
/// so we shorten the module name to `st`.
///
/// The constants follow a naming convention:
/// - `L_` indicates that the identifier is all-lowercase.
pub mod st {
use super::*;
// [Symbols will be added here as they are needed.]
static_symbols! {
<global::ProgSymSize>;
// Marker indicating the end of the static symbols
END_STATIC: mark "{{end static}}"
L_TRUE: cid "true",
L_FALSE: cid "false",
// [Symbols will be added here as they are needed.]
// Marker indicating the end of the static symbols
// (this must always be last).
END_STATIC: mark "{{end}}"
}
}
/// Static 16-bit symbols (pre-allocated).
///
/// These symbols are intended for situations where a smaller symbol size is
/// necessary.
/// Presently,
/// this includes only the [`Span`](crate::span::Span) context.
///
/// See also [st](super::st) for general static symbols.
pub mod st16 {
use super::*;
static_symbols! {
<u16>;
// Special contexts.
CTX_LINKER: ctx "#!LINKER",
// [Symbols will be added here as they are needed.]
// Marker indicating the end of the static symbols
// (this must always be last).
END_STATIC: mark "{{end}}"
}
}
#[cfg(test)]
mod test {
use super::st;
use super::{st, st16};
use crate::sym::{GlobalSymbolIntern, SymbolId};
#[test]
fn global_sanity_check() {
fn global_sanity_check_st() {
// If we _don't_ prefill, make sure we're not starting at the first
// offset when interning, otherwise it'll look correct.
let new: SymbolId = "force offset".intern();
@ -280,4 +288,18 @@ mod test {
assert_eq!(st::L_TRUE.as_sym(), "true".intern());
assert_eq!(st::L_FALSE.as_sym(), "false".intern());
}
#[test]
fn global_sanity_check_st16() {
// If we _don't_ prefill, make sure we're not starting at the first
// offset when interning, otherwise it'll look correct.
let new: SymbolId<u16> = "force offset".intern();
assert!(
new.as_usize() > st16::END_STATIC.as_usize(),
"a new 16-bit global symbol allocation was not > END_STATIC, \
indicating that prefill is either not working or that \
the prefill contains duplicate strings!"
);
}
}

View File

@ -167,12 +167,11 @@ type Static16Interner = DefaultInterner<'static, u16>;
type Static32Interner = DefaultInterner<'static, u32>;
thread_local! {
pub(super) static INTERNER_16: Static16Interner =
Static16Interner::with_capacity(global::INIT_GLOBAL_INTERNER_CAPACITY);
pub(super) static INTERNER_16: Static16Interner = super::prefill::st16::fill(
Static16Interner::with_capacity(global::INIT_GLOBAL_INTERNER_CAPACITY)
);
// Only the 32-bit integer, which is the default for `SymbolId`, gets
// prefilled with static symbols.
pub(super) static INTERNER_32: Static32Interner = super::prefill::fill(
pub(super) static INTERNER_32: Static32Interner = super::prefill::st::fill(
Static32Interner::with_capacity(global::INIT_GLOBAL_INTERNER_CAPACITY)
);
}