549 lines
18 KiB
Rust
549 lines
18 KiB
Rust
// Pre-interned strings
|
|
//
|
|
// Copyright (C) 2014-2021 Ryan Specialty Group, LLC.
|
|
//
|
|
// This file is part of TAME.
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
//! Pre-interned strings.
|
|
//!
|
|
//! These strings are expected to be encountered nearly every run,
|
|
//! and substitute static strings that would otherwise appear hard-coded
|
|
//! in the system and have to be interned to be compared against other
|
|
//! values.
|
|
//!
|
|
//! See the [parent module](super) for more information.
|
|
|
|
use super::{Interner, SymbolId, SymbolIndexSize};
|
|
use crate::global;
|
|
|
|
/// Static symbol identifier that is stable between runs of the same version
|
|
/// of TAMER.
|
|
///
|
|
/// This symbol id is allocated at compile-time.
|
|
///
|
|
/// _All objects implementing this trait must have the same byte
|
|
/// representation as its inner [`SymbolId`]_.
|
|
pub unsafe trait StaticSymbolId<Ix: SymbolIndexSize = global::ProgSymSize>:
|
|
private::Sealed
|
|
{
|
|
// Traits cannot contain constant functions.
|
|
// See [`st_as_sym`] below.
|
|
}
|
|
|
|
/// Convert any [`StaticSymbolId`] into its inner [`SymbolId`].
|
|
///
|
|
/// Static symbols are typed to convey useful information to newtypes that
|
|
/// wish to wrap or compose them.
|
|
/// This function peels back that type information to expose the inner
|
|
/// symbol.
|
|
///
|
|
/// Safety and Rationale
|
|
/// ====================
|
|
/// This function does its best to work around the limitation in Rust that
|
|
/// traits cannot contain constant functions
|
|
/// (at the time of writing).
|
|
///
|
|
/// To do this,
|
|
/// we require that every object of type [`StaticSymbolId`] have _the same
|
|
/// byte representation_ as [`SymbolId`].
|
|
/// Since Rust optimizes away simple newtype wrappers,
|
|
/// this means that we can simply cast the value to a symbol.
|
|
///
|
|
/// For example, if we have `StaticSymbolId<u32>`,
|
|
/// this would cast to a `SymbolId<u32>`.
|
|
/// The inner value of `SymbolId<u32>` is
|
|
/// `<u32 as SymbolIndexSize>::NonZero`,
|
|
/// which has the same byte representation as `u32`.
|
|
///
|
|
/// This would normally be done using [`std::mem::transmute`],
|
|
/// which ensures that the two types have compatible sizes.
|
|
/// Unfortunately,
|
|
/// the types here do not have fixed size and constant functions are
|
|
/// unable to verify that they are compatible at the time of writing.
|
|
/// We therefore must use [`std::mem::transmute_copy`] to circumvent this
|
|
/// size check.
|
|
///
|
|
/// Circumventing this check is safe given our trait bounds for all static
|
|
/// symbols in this module and its children.
|
|
/// However,
|
|
/// for this safety to hold,
|
|
/// we must ensure that no outside modules can implement
|
|
/// [`StaticSymbolId`] on their own objects.
|
|
/// For this reason,
|
|
/// [`StaticSymbolId`] implements [`private::Sealed`].
|
|
///
|
|
/// With that,
|
|
/// we get [`SymbolId`] polymorphism despite Rust's limitations.
|
|
///
|
|
/// A Note About Nightly
|
|
/// ====================
|
|
/// At the time of writing,
|
|
/// though,
|
|
/// this _does_ require two unstable features:
|
|
/// `const_fn_trait_bound` and `const_transmute_copy`.
|
|
/// We can get rid of the latter using raw pointer casts,
|
|
/// just as it does,
|
|
/// but since we're already relying on unstable flags,
|
|
/// we may as well use it while we require nightly for other things as
|
|
/// well.
|
|
///
|
|
/// `const_fn_trait_bound` cannot be removed in this situation without
|
|
/// another plan.
|
|
/// `const_panic` could be used with an enum,
|
|
/// but that still requires nightly.
|
|
pub const fn st_as_sym<T, Ix>(st: &T) -> SymbolId<Ix>
|
|
where
|
|
T: StaticSymbolId<Ix>,
|
|
Ix: SymbolIndexSize,
|
|
{
|
|
// SAFETY: A number of precautions are taken to make this a safe and
|
|
// sensible transformation; see function doc above.
|
|
SymbolId(unsafe { std::mem::transmute_copy(st) })
|
|
}
|
|
|
|
/// Generate a newtype containing a condensed [`SymbolId`].
|
|
macro_rules! static_symbol_newtype {
|
|
($(#[$attr:meta])* $name:ident<$size:ty>) => {
|
|
$(#[$attr])*
|
|
/// This is a statically-allocated symbol.
|
|
///
|
|
/// This symbol is generated at compile-time and expected to be
|
|
/// available in the 32-bit global interner once it has been
|
|
/// initialized.
|
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct $name(<$size as SymbolIndexSize>::NonZero);
|
|
|
|
// Mark this as a static symbol type, ensuring that it size is fully
|
|
// compatible with the underlying `SymbolId` so as not to cause
|
|
// problems with `st_as_sym`.
|
|
impl private::Sealed for $name {}
|
|
unsafe impl StaticSymbolId<$size> for $name {}
|
|
assert_eq_size!($name, SymbolId<$size>);
|
|
|
|
impl $name {
|
|
const fn new(id: $size) -> Self {
|
|
Self(unsafe {
|
|
<$size as SymbolIndexSize>::NonZero::new_unchecked(id)
|
|
})
|
|
}
|
|
|
|
/// Cast static symbol into a [`SymbolId`] suitable for the global
|
|
/// program-level interner.
|
|
///
|
|
/// This is safe since global interner will always contain this
|
|
/// symbol before it can be read.
|
|
pub const fn as_sym(self) -> SymbolId<$size> {
|
|
SymbolId(self.0)
|
|
}
|
|
|
|
pub const fn as_usize(self) -> usize {
|
|
self.0.get() as usize
|
|
}
|
|
}
|
|
|
|
impl From<$name> for SymbolId<$size> {
|
|
fn from(st: $name) -> Self {
|
|
st.as_sym()
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// Generate a series of newtypes and the macro `static_symbol_ty!` which
|
|
/// can be used to take a short identifier and convert it into its full
|
|
/// type identifier.
|
|
macro_rules! static_symbol_newtypes {
|
|
($($(#[$attr:meta])* $short:ident: $ty:ident<$size:ty>,)*) => {
|
|
$(
|
|
static_symbol_newtype!($(#[$attr])* $ty<$size>);
|
|
)*
|
|
|
|
macro_rules! static_symbol_ty {
|
|
$(
|
|
($short) => {
|
|
$ty
|
|
};
|
|
)*
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Generate symbols for preinterned strings.
|
|
///
|
|
/// These symbols,
|
|
/// rather than being generated by the global internment system,
|
|
/// are generated statically.
|
|
/// Once the global interner is initialized
|
|
/// (see [parent module](`super`)),
|
|
/// which is on first access,
|
|
/// these symbols will reference valid values.
|
|
macro_rules! static_symbol_consts {
|
|
(@i $i:expr; <$size:ty> $name:ident: $ty:ident $str:expr, $($tail:tt)*) => {
|
|
#[doc=concat!(
|
|
"Interned `",
|
|
stringify!($ty),
|
|
"` string `\"",
|
|
$str,
|
|
"\"`."
|
|
)]
|
|
pub const $name: static_symbol_ty!($ty) =
|
|
<static_symbol_ty!($ty)>::new($i);
|
|
|
|
// Recurse until no tail is left (terminating condition below).
|
|
static_symbol_consts!{
|
|
// This will result in 1 + 1 + 1 + 1 ... and will eventually hit
|
|
// the recursion limit if we have too many static symbols, after
|
|
// which time we may have to switch methodology.
|
|
@i $i + 1;
|
|
<$size>
|
|
|
|
$($tail)*
|
|
}
|
|
};
|
|
|
|
// Terminating condition.
|
|
(@i $i:expr; <$size:ty>) => {}
|
|
}
|
|
|
|
/// Statically allocate [`SymbolId`]s for the provided symbols,
|
|
/// and schedule their static strings to be interned upon initialization
|
|
/// of the global interner.
|
|
///
|
|
/// This generates `fill`,
|
|
/// which the global interners call by default.
|
|
/// Any interner may optionally invoke this,
|
|
/// immediately after initialization,
|
|
/// /before/ any internment requests.
|
|
macro_rules! static_symbols {
|
|
(<$size:ty>; $($name:ident : $ty:ident $str:expr),*) => {
|
|
static_symbol_consts! {
|
|
// Index 0 is not valid, so begin at 1
|
|
@i 1;
|
|
<$size>
|
|
|
|
$(
|
|
$name: $ty $str,
|
|
)*
|
|
}
|
|
|
|
/// Fill a new interner with static symbols.
|
|
///
|
|
/// Panics
|
|
/// ======
|
|
/// This function will panic if the interner has any symbols,
|
|
/// which would cause misalignment with the generated constants.
|
|
pub(in super::super) fn fill<'a, I, Ix>(interner: I) -> I
|
|
where
|
|
I: Interner<'a, Ix>,
|
|
Ix: SymbolIndexSize
|
|
{
|
|
assert!(
|
|
interner.len() == 0,
|
|
"cannot fill non-empty Interner with static symbols"
|
|
);
|
|
|
|
// This array does not exist as a constant, because that would
|
|
// require that we count the number of items first for the
|
|
// sake of the type definition.
|
|
// This is more convenient.
|
|
[
|
|
$(
|
|
$str,
|
|
)*
|
|
].into_iter().for_each(|sym| { interner.intern(sym); });
|
|
|
|
interner
|
|
}
|
|
}
|
|
}
|
|
|
|
static_symbol_newtypes! {
|
|
/// A symbol suitable as a C-style identifier.
|
|
///
|
|
/// This is the traditional `[a-zA-Z_][a-zA-Z0-9_]*`,
|
|
/// common in many programming languages.
|
|
cid: CIdentStaticSymbolId<global::ProgSymSize>,
|
|
|
|
/// Base-10 (decimal) integer value as a string.
|
|
dec: DecStaticSymbolId<global::ProgSymSize>,
|
|
|
|
/// A symbol resembling a QName of the form `prefix:local`.
|
|
///
|
|
/// A symbol of this type does _not_ mean that the symbol is intended to
|
|
/// be a QName;
|
|
/// this is merely a way to describe it.
|
|
/// For example,
|
|
/// `map:head` is intended as an identifier type,
|
|
/// not a QName.
|
|
qname: QnameIdentStaticSymbolId<global::ProgSymSize>,
|
|
|
|
/// This symbol serves only as a marker in the internment pool to
|
|
/// delimit symbol ranges;
|
|
/// its string value is incidental and should not be relied upon.
|
|
mark: MarkStaticSymbolId<global::ProgSymSize>,
|
|
|
|
/// A symbol suitable as a TAME identifier.
|
|
///
|
|
/// This is [`CIdentStaticSymbolId`] with `-` added:
|
|
/// `[a-zA-Z_-][a-zA-Z0-9_-]*`.
|
|
/// This is also suitable as an XML node or attribute name.
|
|
tid: TameIdentStaticSymbolId<global::ProgSymSize>,
|
|
|
|
/// Symbol representing a URI.
|
|
///
|
|
/// This is intended for use primarily as an XML namespace.
|
|
/// URIs are expected to _not_ contain quotes and other characters that
|
|
/// may need escaping in XML attributes.
|
|
uri: UriStaticSymbolId<global::ProgSymSize>,
|
|
|
|
/// Static 16-bit [`Span`](crate::span::Span) context.
|
|
///
|
|
/// These contexts are intended for use in generated code where a better
|
|
/// context cannot be derived.
|
|
ctx: ContextStaticSymbolId<u16>,
|
|
}
|
|
|
|
/// Static symbols (pre-allocated).
|
|
///
|
|
/// Each of the constants in this module represent a [`SymbolId`] statically
|
|
/// allocated at compile-time.
|
|
/// The strings that they represent are automatically populated into the
|
|
/// global interners when the interner is first accessed.
|
|
///
|
|
/// _You should always use the generated constant to reference these
|
|
/// symbols!_
|
|
/// Do not rely upon their integer value,
|
|
/// as it _will_ change over time.
|
|
/// The sole exception is to use marker symbols to identify ranges
|
|
/// of symbols;
|
|
/// see [`MarkStaticSymbolId`].
|
|
///
|
|
/// See [`crate::sym`] for more information on static symbols.
|
|
///
|
|
/// `static` is a keyword in Rust,
|
|
/// so we shorten the module name to `st`.
|
|
///
|
|
/// The constants follow a naming convention:
|
|
/// - `L_` indicates that the identifier is all-lowercase.
|
|
pub mod st {
|
|
use super::*;
|
|
|
|
// Convert `0 ≤ n ≤ 9` into a static symbol representing a single
|
|
// decimal digit.
|
|
//
|
|
// Panics
|
|
// ======
|
|
// This will panic if `n > 9`.
|
|
pub fn decimal1(n: u8) -> DecStaticSymbolId {
|
|
assert!(n < 10);
|
|
|
|
// The symbols are expected to be in a very specific position in the
|
|
// pool (n+1).
|
|
// This is verified by tests at the bottom of this file.
|
|
DecStaticSymbolId(unsafe {
|
|
<global::ProgSymSize as SymbolIndexSize>::NonZero::new_unchecked(
|
|
(n as global::ProgSymSize) + 1,
|
|
)
|
|
})
|
|
}
|
|
|
|
impl From<u8> for DecStaticSymbolId {
|
|
// Convert `0 ≤ n ≤ 9` into a static symbol representing a single
|
|
// decimal digit.
|
|
//
|
|
// See [`decimal1`].
|
|
fn from(n: u8) -> Self {
|
|
decimal1(n)
|
|
}
|
|
}
|
|
|
|
static_symbols! {
|
|
<global::ProgSymSize>;
|
|
|
|
// Decimal strings are expected to be at index (n+1).
|
|
// See `decimal1`.
|
|
N0: dec "0",
|
|
N1: dec "1",
|
|
N2: dec "2",
|
|
N3: dec "3",
|
|
N4: dec "4",
|
|
N5: dec "5",
|
|
N6: dec "6",
|
|
N7: dec "7",
|
|
N8: dec "8",
|
|
N9: dec "9",
|
|
|
|
|
|
L_BOOLEAN: cid "boolean",
|
|
L_CGEN: cid "cgen",
|
|
L_CLASS: cid "class",
|
|
L_CONST: cid "const",
|
|
L_DEP: cid "dep",
|
|
L_DESC: cid "desc",
|
|
L_DIM: cid "dim",
|
|
L_DTYPE: cid "dtype",
|
|
L_EMPTY: cid "empty",
|
|
L_FALSE: cid "false",
|
|
L_FLOAT: cid "float",
|
|
L_FUNC: cid "func",
|
|
L_FROM: cid "from",
|
|
L_GEN: cid "gen",
|
|
L_GENERATED: cid "generated",
|
|
L_INTEGER: cid "integer",
|
|
L_L: cid "l",
|
|
L_LPARAM: cid "lparam",
|
|
L_MAP: cid "map",
|
|
L_MAP_FROM: tid "map-from",
|
|
L_MAP_HEAD: qname "map:head",
|
|
L_MAP_TAIL: qname "map:tail",
|
|
L_META: cid "meta",
|
|
L_NAME: cid "name",
|
|
L_PACKAGE: cid "package",
|
|
L_PARAM: cid "param",
|
|
L_PARENT: cid "parent",
|
|
L_PREPROC: cid "preproc",
|
|
L_PROGRAM: cid "program",
|
|
L_RATE: cid "rate",
|
|
L_RETMAP: cid "retmap",
|
|
L_RETMAP_HEAD: qname "retmap:head",
|
|
L_RETMAP_TAIL: qname "retmap:tail",
|
|
L_SRC: cid "src",
|
|
L_SYM: cid "sym",
|
|
L_TITLE: cid "title",
|
|
L_TPL: cid "tpl",
|
|
L_TRUE: cid "true",
|
|
L_TYPE: cid "type",
|
|
L_UUROOTPATH: cid "__rootpath",
|
|
L_WORKSHEET: cid "worksheet",
|
|
L_XMLNS: cid "xmlns",
|
|
L_YIELDS: cid "yields",
|
|
|
|
URI_LV_RATER: uri "http://www.lovullo.com/rater",
|
|
URI_LV_PREPROC: uri "http://www.lovullo.com/rater/preproc",
|
|
URI_LV_LINKER: uri "http://www.lovullo.com/rater/linker",
|
|
|
|
// [Symbols will be added here as they are needed.]
|
|
|
|
// Marker indicating the end of the static symbols
|
|
// (this must always be last).
|
|
END_STATIC: mark "{{end}}"
|
|
}
|
|
}
|
|
|
|
/// Static 16-bit symbols (pre-allocated).
|
|
///
|
|
/// These symbols are intended for situations where a smaller symbol size is
|
|
/// necessary.
|
|
/// Presently,
|
|
/// this includes only the [`Span`](crate::span::Span) context.
|
|
///
|
|
/// See also [st](super::st) for general static symbols.
|
|
pub mod st16 {
|
|
use super::*;
|
|
|
|
static_symbols! {
|
|
<u16>;
|
|
|
|
// Special contexts.
|
|
CTX_LINKER: ctx "#!LINKER",
|
|
|
|
// [Symbols will be added here as they are needed.]
|
|
|
|
// Marker indicating the end of the static symbols
|
|
// (this must always be last).
|
|
END_STATIC: mark "{{end}}"
|
|
}
|
|
}
|
|
|
|
/// Non-public module that can contain public traits.
|
|
///
|
|
/// The problem this module tries to solve is preventing anything outside of
|
|
/// this crate from implementing the `StaticSymbolId` trait,
|
|
/// since doing so opens us up to undefined behavior when transmuting
|
|
/// via [`st_as_sym`](super::st_as_sym).
|
|
mod private {
|
|
/// Extend this trait to prevent other modules from implementing the
|
|
/// subtype.
|
|
///
|
|
/// Since other modules extend [`StaticSymbolId`](super::StaticSymbolId)
|
|
/// for their own traits,
|
|
/// this trait must be `pub`.
|
|
/// But, since it is contained within a private module,
|
|
/// it is not possible to import the trait to implement it on other
|
|
/// things.
|
|
pub trait Sealed {}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::{st, st16, DecStaticSymbolId};
|
|
use crate::sym::{GlobalSymbolIntern, GlobalSymbolResolve, SymbolId};
|
|
|
|
#[test]
|
|
fn global_sanity_check_st() {
|
|
// If we _don't_ prefill, make sure we're not starting at the first
|
|
// offset when interning, otherwise it'll look correct.
|
|
let new: SymbolId = "force offset".intern();
|
|
|
|
assert!(
|
|
new.as_usize() > st::END_STATIC.as_usize(),
|
|
"a new global symbol allocation was not > END_STATIC, \
|
|
indicating that prefill is either not working or that \
|
|
the prefill contains duplicate strings!"
|
|
);
|
|
|
|
// Further sanity check to make sure indexes align as expected,
|
|
// not that you wouldn't otherwise notice that the whole system is
|
|
// broken, but this ought to offer a more direct hint as to what
|
|
// went wrong.
|
|
assert_eq!(st::L_TRUE.as_sym(), "true".intern());
|
|
assert_eq!(st::L_FALSE.as_sym(), "false".intern());
|
|
}
|
|
|
|
#[test]
|
|
fn global_sanity_check_st16() {
|
|
// If we _don't_ prefill, make sure we're not starting at the first
|
|
// offset when interning, otherwise it'll look correct.
|
|
let new: SymbolId<u16> = "force offset".intern();
|
|
|
|
assert!(
|
|
new.as_usize() > st16::END_STATIC.as_usize(),
|
|
"a new 16-bit global symbol allocation was not > END_STATIC, \
|
|
indicating that prefill is either not working or that \
|
|
the prefill contains duplicate strings!"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decimal1_0_to_9() {
|
|
for n in 0..=9 {
|
|
assert_eq!(st::decimal1(n).as_sym().lookup_str(), n.to_string());
|
|
|
|
// From<u8>
|
|
assert_eq!(
|
|
DecStaticSymbolId::from(n).as_sym().lookup_str(),
|
|
n.to_string()
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
#[should_panic]
|
|
fn decimal1_gt_9_panics() {
|
|
st::decimal1(10);
|
|
}
|
|
}
|