974 lines
31 KiB
Rust
974 lines
31 KiB
Rust
// Source spans
|
||
//
|
||
// Copyright (C) 2014-2023 Ryan Specialty, LLC.
|
||
//
|
||
// This file is part of TAME.
|
||
//
|
||
// This program is free software: you can redistribute it and/or modify
|
||
// it under the terms of the GNU General Public License as published by
|
||
// the Free Software Foundation, either version 3 of the License, or
|
||
// (at your option) any later version.
|
||
//
|
||
// This program is distributed in the hope that it will be useful,
|
||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
// GNU General Public License for more details.
|
||
//
|
||
// You should have received a copy of the GNU General Public License
|
||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
//! Mapping to source input byte intervals.
|
||
//!
|
||
//! A [`Span`] is a mapping to a byte interval within a source file,
|
||
//! representing primarily where some IR entity originated.
|
||
//! This underpins the diagnostic system,
|
||
//! intended to:
|
||
//!
|
||
//! 1. Give the user specific information for debugging errors in their
|
||
//! programs; and
|
||
//! 2. Provide high-resolution information for source code inquiries,
|
||
//! such as "where is this identifier?" and "what exists at my cursor
|
||
//! position within this file"?
|
||
//!
|
||
//! A span contains a [`Context`] representing the source location.
|
||
//! A context's path is a [`PathSymbolId`],
|
||
//! which represents an interned string slice,
|
||
//! _not_ a [`PathBuf`](std::path::PathBuf) or
|
||
//! [`OsStr`](std::ffi::OsStr).
|
||
//!
|
||
//! ```
|
||
//! use tamer::span::{Span, Context};
|
||
//! use tamer::sym::GlobalSymbolIntern;
|
||
//!
|
||
//! // From raw parts
|
||
//! let ctx: Context = "some/path/foo".intern().into();
|
||
//! let span = Span::new(2, 6, ctx);
|
||
//!
|
||
//! assert_eq!(2, span.offset());
|
||
//! assert_eq!(6, span.len());
|
||
//! assert_eq!(ctx, span.context());
|
||
//!
|
||
//! // From a closed byte interval
|
||
//! let spani = Span::from_byte_interval((10, 25), "some/path/bar".intern());
|
||
//! assert_eq!(10, spani.offset());
|
||
//! assert_eq!(15, spani.len());
|
||
//!
|
||
//! // Freely copyable
|
||
//! let cp = span;
|
||
//! assert_eq!(cp, span);
|
||
//! ```
|
||
//!
|
||
//! Span is expected to be able to fit within a general-purpose CPU register
|
||
//! on a 64-bit system, and so does not exceed 8 bytes in length.
|
||
//!
|
||
//! Spans are one of the most common objects in TAMER,
|
||
//! competing only with [symbols](crate::sym).
|
||
//! But unlike symbols,
|
||
//! [`Span`]s are designed to be meaningfully identifiable and copyable
|
||
//! without interning.
|
||
//!
|
||
//! A span is ordered as such:
|
||
//!
|
||
//! 1. Spans group by [`Context`],
|
||
//! though the relative ordering of each [`Context`] isn't
|
||
//! necessarily meaningful;
|
||
//! 2. Spans are then ordered relative to their offset; and
|
||
//! 3. Spans are finally ordered by their length.
|
||
//!
|
||
//! Note that this means that a span beginning after but ending before
|
||
//! another span will still order higher,
|
||
//! as shown in the example below.
|
||
//!
|
||
//! ```
|
||
//! # use tamer::span::{Span, Context};
|
||
//! # use tamer::sym::GlobalSymbolIntern;
|
||
//! #
|
||
//! # let ctx: Context = "some/path/foo".intern().into();
|
||
//! #
|
||
//! // Visualization of spans:
|
||
//! // [....,....,....,....,]
|
||
//! // [A-+-] [B-+]|
|
||
//! // | [C-] |
|
||
//! // | [D-+-]
|
||
//! // | [E]
|
||
//! // [F----] |
|
||
//! // [G------]
|
||
//!
|
||
//! let A = Span::new(2, 6, ctx);
|
||
//! let B = Span::new(10, 5, ctx);
|
||
//! let C = Span::new(10, 4, ctx);
|
||
//! let D = Span::new(10, 6, ctx);
|
||
//! let E = Span::new(11, 3, ctx);
|
||
//! let F = Span::new(5, 7, ctx);
|
||
//! let G = Span::new(5, 8, ctx);
|
||
//!
|
||
//! let mut spans = vec![A, B, C, D, E, F, G];
|
||
//! spans.sort();
|
||
//!
|
||
//! assert_eq!(spans, vec![A, F, G, C, B, D, E]);
|
||
//! ```
|
||
//!
|
||
//! Design Rationale
|
||
//! ================
|
||
//! It is expected that spans will be created and copied frequently,
|
||
//! as they are propagated to every IR in the system.
|
||
//! It is further expected that the data within a span will only be
|
||
//! referenced for diagnostic purposes,
|
||
//! or for utilities operating on original source code
|
||
//! (such as code formatters).
|
||
//!
|
||
//! When a span is referenced,
|
||
//! it will either be to determine the exact location of a particular
|
||
//! entity,
|
||
//! or it will be to attempt to locate a similar entity in a higher-level
|
||
//! IR associated with the same region of code.
|
||
//! The latter requires that spans be comparable in a meaningful way,
|
||
//! exhibiting at least partial ordering.
|
||
//!
|
||
//! Spans are therefore optimized for three primary use cases:
|
||
//! - copying;
|
||
//! - comparison; and
|
||
//! - ordering.
|
||
//!
|
||
//! Span Structure
|
||
//! --------------
|
||
//! Spans are packed into 64-bit values that can be readily converted into a
|
||
//! [`u64`] value that is totally ordered relative to a given [`Context`],
|
||
//! byte offset, and byte length.
|
||
//! This means that sorting a collection of [`Span`]s will group spans by
|
||
//! their [`Context`];
|
||
//! will sort those spans relative to their starting offset within that
|
||
//! context; and
|
||
//! will sort again by the ending offset.
|
||
//!
|
||
//! This means that spans are [`Eq`] and [`Ord`],
|
||
//! and efficiently so by simply comparing the byte values of the entire
|
||
//! struct as a single [`u64`].
|
||
//! This allows spans to be sorted relative to their positions within a
|
||
//! context;
|
||
//! be placed into a binary tree for mapping back to higher-level IRs;
|
||
//! gives spans a meaningful unique identifier;
|
||
//! be freely copied without cost;
|
||
//! and more,
|
||
//! all very efficiently and without having to access individual
|
||
//! struct members.
|
||
//!
|
||
//! To accomplish this,
|
||
//! [`Span`] uses `repr(packed)` and orders the fields for little endian
|
||
//! systems like `x86_64`,
|
||
//! which is what our team uses.
|
||
//! The `packed` representation had to be used because the byte orderings
|
||
//! are [`u16`], [`u32`], [`u16`],
|
||
//! which makes the [`u32`] byte offset unaligned.
|
||
//! Note that,
|
||
//! while this _is_ unaligned,
|
||
//! this is _not_ unaligned _memory_ access,
|
||
//! since the entire [`Span`] will be retrieved from (aligned) memory at
|
||
//! once;
|
||
//! the unaligned fields within the [`u64`] do not incur a measurable
|
||
//! performance cost.
|
||
//!
|
||
//! Related Work
|
||
//! ============
|
||
//! This span is motivated by [rustc's compressed `Span`](rustc-span).
|
||
//! TAMER's span size relies on 16 bits being sufficient for holding
|
||
//! interned paths,
|
||
//! which _should_ be a very reasonable assumption unless the interner
|
||
//! ends up being shared with too many different things.
|
||
//! If ever that assumption becomes violated,
|
||
//! and it is deemed that packages containing so many symbols should be permitted,
|
||
//! TAMER's [`Span`] can accommodate in a similar with to rustc's by
|
||
//! interning the larger span data and tagging this span as such.
|
||
//!
|
||
//!
|
||
//! [rustc-span]: https://doc.rust-lang.org/stable/nightly-rustc/rustc_span/struct.Span.html
|
||
|
||
use crate::{
|
||
debug_diagnostic_panic, global,
|
||
sym::{st16, ContextStaticSymbolId, GlobalSymbolResolve, SymbolId},
|
||
};
|
||
use std::{convert::TryInto, fmt::Display, path::Path};
|
||
|
||
/// A symbol size sufficient for holding interned paths.
|
||
pub type PathSymbolId = SymbolId<u16>;
|
||
|
||
/// Size of a [`Span`]'s `offset` field.
|
||
pub type SpanOffsetSize = global::SourceFileSize;
|
||
|
||
/// Size of a [`Span`]'s `len` field.
|
||
pub type SpanLenSize = global::FrontendTokenLength;
|
||
|
||
/// Description of a source location and byte interval for some object.
|
||
///
|
||
/// Spans represent byte intervals within a given source context.
|
||
/// A span should map to useful positions for helping users debug error
|
||
/// messages.
|
||
/// If code is generated, desugared, or otherwise manipulated,
|
||
/// the span ought to reference the original location of the code that can
|
||
/// be referenced and modified to correct any problems.
|
||
///
|
||
/// See the [module-level documentation](self) for more information.
|
||
#[cfg(target_endian = "little")]
|
||
#[repr(packed)]
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub struct Span {
|
||
/// Token length (ending byte offset - `offset`).
|
||
len: SpanLenSize,
|
||
|
||
/// Starting 0-indexed byte position, inclusive.
|
||
offset: SpanOffsetSize,
|
||
|
||
/// Context onto which byte offsets are mapped,
|
||
/// such as a source file.
|
||
///
|
||
/// N.B.: This is an unaligned field,
|
||
/// and accessing it frequently may have a negative impact on
|
||
/// performance.
|
||
ctx: Context,
|
||
}
|
||
|
||
assert_eq_size!(Span, Option<Span>);
|
||
|
||
impl Span {
|
||
/// Create a new span from its constituent parts.
|
||
pub fn new<C: Into<Context>>(
|
||
offset: SpanOffsetSize,
|
||
len: SpanLenSize,
|
||
ctx: C,
|
||
) -> Self {
|
||
Self {
|
||
ctx: ctx.into(),
|
||
offset,
|
||
len,
|
||
}
|
||
}
|
||
|
||
/// Create a constant span from a static context.
|
||
pub const fn st_ctx(sym: ContextStaticSymbolId) -> Self {
|
||
Self {
|
||
ctx: Context(sym.as_sym()),
|
||
offset: 0,
|
||
len: 0,
|
||
}
|
||
}
|
||
|
||
/// Create a span from a byte interval and context.
|
||
///
|
||
/// Panics
|
||
/// ======
|
||
/// This will panic in the unlikely case that the difference between the
|
||
/// start and end of the interval exceeds the maximum of
|
||
/// [`global::FrontendTokenLength`].
|
||
///
|
||
/// If this error occurs,
|
||
/// the parser should consider splitting large tokens up into multiple
|
||
/// tokens;
|
||
/// increasing [`global::FrontendTokenLength`] should be a last
|
||
/// resort,
|
||
/// since it has wide-reaching implications on the size of
|
||
/// [`Span`].
|
||
///
|
||
/// The user is not expected to know how to recover from this error
|
||
/// without debugging the compiler.
|
||
/// It is not expected that this would occur on any valid inputs.
|
||
pub fn from_byte_interval<B, C>(interval: B, ctx: C) -> Self
|
||
where
|
||
B: Into<ClosedByteInterval>,
|
||
C: Into<Context>,
|
||
{
|
||
let binterval = interval.into();
|
||
|
||
Self {
|
||
offset: binterval.0,
|
||
len: (binterval.1 - binterval.0)
|
||
.try_into()
|
||
.expect("span length exceeds global::FrontendTokenLength"),
|
||
ctx: ctx.into(),
|
||
}
|
||
}
|
||
|
||
// A span represented uniquely as a totally ordered [`u64`].
|
||
//
|
||
// For more information on this important properly,
|
||
// see the documentation for [`Span`] itself.
|
||
pub fn as_u64(self) -> u64 {
|
||
// We take a number of precautions to make this safe (in the sense
|
||
// of correctness),
|
||
// through struct packing and a `cfg` directive for endianness.
|
||
// In any case,
|
||
// a `u64` isn't going to harm anyone.
|
||
unsafe { std::mem::transmute(self) }
|
||
}
|
||
|
||
/// Byte offset of the beginning of the span relative to its context.
|
||
pub fn offset(&self) -> SpanOffsetSize {
|
||
self.offset
|
||
}
|
||
|
||
/// Length of the span in bytes.
|
||
///
|
||
/// The interval of the span is `[offset, offset+len]`.
|
||
pub fn len(&self) -> SpanLenSize {
|
||
self.len
|
||
}
|
||
|
||
/// The context to which the span applies.
|
||
///
|
||
/// The context is, for example, a file.
|
||
pub fn context(&self) -> Context {
|
||
self.ctx
|
||
}
|
||
|
||
/// Further offset a span.
|
||
///
|
||
/// This attempts to offset a span relative to its current offset by the
|
||
/// provided value.
|
||
/// If the resulting offset exceeds [`SpanOffsetSize`],
|
||
/// the result will be [`None`].
|
||
pub const fn offset_add(self, value: SpanOffsetSize) -> Option<Self> {
|
||
match self.offset.checked_add(value) {
|
||
Some(offset) => Some(Self { offset, ..self }),
|
||
None => None,
|
||
}
|
||
}
|
||
|
||
/// Create two zero-length spans representing respectively the first and
|
||
/// last offsets in the span.
|
||
///
|
||
/// The second endpoint will be [`None`] if the offset cannot be
|
||
/// represented by [`SpanOffsetSize`].
|
||
///
|
||
/// ```
|
||
/// # use tamer::span::{Span, Context};
|
||
/// # use tamer::sym::GlobalSymbolIntern;
|
||
/// #
|
||
/// # let ctx: Context = "some/path/foo".intern().into();
|
||
/// #
|
||
/// // [0123456789]
|
||
/// // [---]
|
||
/// // 2 6
|
||
/// // A
|
||
/// let A = Span::new(2, 6, ctx);
|
||
///
|
||
/// assert_eq!(
|
||
/// A.endpoints(),
|
||
/// (
|
||
/// Span::new(2, 0, ctx),
|
||
/// Some(Span::new(8, 0, ctx)),
|
||
/// ),
|
||
/// );
|
||
/// ```
|
||
pub const fn endpoints(self) -> (Self, Option<Self>) {
|
||
(
|
||
// First endpoint.
|
||
Self {
|
||
offset: self.offset,
|
||
len: 0,
|
||
..self
|
||
},
|
||
// Second endpoint.
|
||
match self.offset.checked_add(self.len as u32) {
|
||
Some(offset) => Some(Self {
|
||
offset,
|
||
len: 0,
|
||
..self
|
||
}),
|
||
None => None,
|
||
},
|
||
)
|
||
}
|
||
|
||
/// Create two zero-length spans representing respectively the first and
|
||
/// last offsets in the span,
|
||
/// saturating the ending offset if it cannot be represented by
|
||
/// [`SpanOffsetSize`].
|
||
///
|
||
/// Aside from the saturation,
|
||
/// this is identical to [`Span::endpoints`].
|
||
pub fn endpoints_saturated(self) -> (Self, Self) {
|
||
let endpoints = self.endpoints();
|
||
|
||
(
|
||
endpoints.0,
|
||
endpoints.1.unwrap_or(Self {
|
||
offset: SpanOffsetSize::MAX,
|
||
..endpoints.0
|
||
}),
|
||
)
|
||
}
|
||
|
||
/// Create a new span that is a slice of this one.
|
||
///
|
||
/// If either `rel_offset` or `len` are too large,
|
||
/// then a copy of the span will be returned unsliced.
|
||
///
|
||
/// Panics (Debug Mode)
|
||
/// -------------------
|
||
/// If the offset and length exceeds the bounds of the span,
|
||
/// then the system has an arithmetic bug that ought to be corrected,
|
||
/// and so this will panic with a diagnostic message.
|
||
/// This check does not occur on release builds since this is not a
|
||
/// safety issue and should be caught by tests.
|
||
pub fn slice(self, rel_offset: usize, len: usize) -> Self {
|
||
let (irel_offset, ilen) = match (rel_offset.try_into(), len.try_into())
|
||
{
|
||
(Ok(x), Ok(y)) => (x, y),
|
||
_ => (0, self.len()),
|
||
};
|
||
|
||
// We shouldn't ignore slices that exceed the length of the span,
|
||
// since this represents a bug that'll cause nonsense diagnostic
|
||
// data and it represents an arithmetic bug in the system
|
||
// (but there are no safety concerns).
|
||
if ((irel_offset as usize).saturating_add(ilen as usize))
|
||
> self.len() as usize
|
||
{
|
||
use crate::diagnose::Annotate;
|
||
debug_diagnostic_panic!(
|
||
self.error("attempting to slice this span").into(),
|
||
"length {len} at offset {rel_offset} \
|
||
exceeds bounds of span {self}",
|
||
);
|
||
}
|
||
|
||
Self {
|
||
ctx: self.ctx,
|
||
offset: self.offset.saturating_add(irel_offset),
|
||
len: ilen,
|
||
}
|
||
}
|
||
|
||
/// Adjust span such that its offset is relative to the provided span.
|
||
///
|
||
/// If the provide `rel_span` does not precede this span,
|
||
/// [`None`] will be returned.
|
||
///
|
||
/// If the two spans do not share the same [`Context`],
|
||
/// no comparison can be made and [`None`] will be returned.
|
||
pub fn relative_to(self, rel_span: Span) -> Option<Self> {
|
||
// Note that this is unaligned.
|
||
if self.context() != rel_span.context() {
|
||
return None;
|
||
}
|
||
|
||
if self.offset() < rel_span.offset() {
|
||
return None;
|
||
}
|
||
|
||
Some(Self {
|
||
ctx: self.ctx,
|
||
offset: self.offset.saturating_sub(rel_span.offset),
|
||
len: self.len,
|
||
})
|
||
}
|
||
|
||
/// Merge with another span `b` such that the combined span begins at
|
||
/// the offset of the earlier of the two spans and extends to the end
|
||
/// of the later of the two.
|
||
///
|
||
/// Both spans must have the same [`Context`],
|
||
/// otherwise the result will be [`None`].
|
||
/// Merged values beyond [`SpanOffsetSize`] and [`SpanLenSize`] will
|
||
/// also result in [`None`].
|
||
///
|
||
/// This properly handles overlapping spans,
|
||
/// including the case where one of the spans is entirely contained
|
||
/// within another.
|
||
/// See test cases for more information.
|
||
/// (TODO: Maybe we should move the test cases into these docs?)
|
||
pub fn merge<S: Into<Span>>(self, other: S) -> Option<Span> {
|
||
let b = other.into();
|
||
|
||
if self.context() != b.context() {
|
||
return None;
|
||
}
|
||
|
||
// Order arguments such that `self` is placed at or before `b`
|
||
// rather than having to worry about confounding accommodations
|
||
// below.
|
||
if self.offset() > b.offset() {
|
||
return b.merge(self);
|
||
}
|
||
|
||
let (_, end) = b.endpoints();
|
||
|
||
end.and_then(|Span { offset, .. }| {
|
||
SpanLenSize::try_from(offset - self.offset).ok()
|
||
})
|
||
.map(|new_len| Self {
|
||
ctx: self.ctx,
|
||
offset: self.offset,
|
||
len: self.len.max(new_len),
|
||
})
|
||
}
|
||
}
|
||
|
||
impl From<Span> for u64 {
|
||
fn from(val: Span) -> Self {
|
||
val.as_u64()
|
||
}
|
||
}
|
||
|
||
impl PartialEq for Span {
|
||
fn eq(&self, other: &Self) -> bool {
|
||
self.as_u64() == other.as_u64()
|
||
}
|
||
}
|
||
|
||
impl Eq for Span {}
|
||
|
||
impl PartialOrd for Span {
|
||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||
Some(self.cmp(other))
|
||
}
|
||
}
|
||
|
||
impl Ord for Span {
|
||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||
self.as_u64().cmp(&other.as_u64())
|
||
}
|
||
}
|
||
|
||
// This assertion verifies our above expectations.
|
||
// If this fails,
|
||
// then you have either modified [`global`] constants or you have modified
|
||
// the fields of [`Span`] itself,
|
||
// in which case you should read "Related Work" above to determine
|
||
// whether this was a good idea or if interned spans should be
|
||
// introduced.
|
||
// In any case,
|
||
// hopefully this was planned for,
|
||
// because otherwise your week has just been ruined.
|
||
assert_eq_size!(Span, u64);
|
||
|
||
impl From<Span> for (Span, Span) {
|
||
/// Expand a [`Span`] into a two-span.
|
||
///
|
||
/// A two-span `(A, B)` is equivalent to a span beginning at the start
|
||
/// of `A` and ending at the end of `B`.
|
||
///
|
||
/// We gain no resolution from performing this operation,
|
||
/// but it does allow for using a single span in contexts where a
|
||
/// higher resolution is supported.
|
||
fn from(span: Span) -> Self {
|
||
(span, span)
|
||
}
|
||
}
|
||
|
||
impl Display for Span {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
// Needed to avoid unaligned references since Span is packed.
|
||
let ctx = self.ctx;
|
||
let offset = self.offset as usize;
|
||
|
||
let end = offset + self.len as usize;
|
||
|
||
// Very primitive information to begin with; we'll have something
|
||
// more useful in the future.
|
||
write!(f, "[{} offset {}-{}]", ctx, offset, end)
|
||
}
|
||
}
|
||
|
||
/// A placeholder span indicating that a span is expected but is not yet
|
||
/// known.
|
||
pub const UNKNOWN_SPAN: Span = Span::st_ctx(st16::CTX_UNKNOWN);
|
||
|
||
/// Context for byte offsets (e.g. a source file).
|
||
///
|
||
/// A context is lifetime-free and [`Copy`]-able,
|
||
/// with the assumption that an interned [`PathSymbolId`] will only need
|
||
/// to be resolved to its underlying value in a diagnostic context where
|
||
/// the internment system is readily available.
|
||
///
|
||
/// Since this is used within [`Span`],
|
||
/// it must be kept as small as possible.
|
||
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
|
||
pub struct Context(PathSymbolId);
|
||
|
||
impl Context {
|
||
/// Produce a [`Span`] within the given context.
|
||
#[inline]
|
||
pub const fn span(self, offset: SpanOffsetSize, len: SpanLenSize) -> Span {
|
||
Span {
|
||
ctx: self,
|
||
offset,
|
||
len,
|
||
}
|
||
}
|
||
|
||
/// Attempt to produce a [`Span`] of the given length at the given
|
||
/// offset,
|
||
/// otherwise fall back to a `(0,0)` (ZZ) span.
|
||
///
|
||
/// If the offset cannot be stored,
|
||
/// then the length will always be `0` even if it could otherwise be
|
||
/// represented;
|
||
/// `(0,0)` indicates no span,
|
||
/// whereas `(0,N)` would indicate a span of length `N` at
|
||
/// offset `0`,
|
||
/// which would not be true.
|
||
///
|
||
/// If the offset can be represented but not the length,
|
||
/// then a zero-length span at that offset will be produced,
|
||
/// which still provides useful information.
|
||
/// This may be the case for very large objects,
|
||
/// like compiled text fragments.
|
||
///
|
||
/// The rationale here is that spans are intended to be informative.
|
||
/// If we are unable to provide that information due to exceptional
|
||
/// circumstances
|
||
/// (very large file or very large token),
|
||
/// then it's better to provide _some_ information than to bail out
|
||
/// with an error and interrupt the entire process,
|
||
/// potentially masking errors in the process.
|
||
#[inline]
|
||
pub fn span_or_zz(self, offset: usize, len: usize) -> Span {
|
||
self.span(offset.try_into().unwrap_or(0), len.try_into().unwrap_or(0))
|
||
}
|
||
}
|
||
|
||
/// A placeholder context indicating that a context is expected but is not
|
||
/// yet known.
|
||
pub const UNKNOWN_CONTEXT: Context = Context(st16::raw::CTX_UNKNOWN);
|
||
|
||
impl<P: Into<PathSymbolId>> From<P> for Context {
|
||
fn from(sym: P) -> Self {
|
||
Self(sym.into())
|
||
}
|
||
}
|
||
|
||
impl Display for Context {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
self.0.fmt(f)
|
||
}
|
||
}
|
||
|
||
impl AsRef<Path> for Context {
|
||
fn as_ref(&self) -> &Path {
|
||
Path::new(self.0.lookup_str())
|
||
}
|
||
}
|
||
|
||
/// A closed interval (range of values including its endpoints) representing
|
||
/// source bytes associated with a token.
|
||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||
pub struct ClosedByteInterval<T = SpanOffsetSize>(pub T, pub T)
|
||
where
|
||
T: Copy + PartialOrd;
|
||
|
||
impl<T: Copy + PartialOrd> From<(T, T)> for ClosedByteInterval<T> {
|
||
/// Convert a tuple into a closed byte interval where the first index
|
||
/// represents the start of the interval and the second index the
|
||
/// end.
|
||
///
|
||
/// Panics
|
||
/// ======
|
||
/// The second value must be ≥ the first.
|
||
fn from(src: (T, T)) -> Self {
|
||
assert!(src.1 >= src.0);
|
||
|
||
Self(src.0, src.1)
|
||
}
|
||
}
|
||
|
||
assert_eq_size!(ClosedByteInterval, u64);
|
||
|
||
/// Dummy spans for testing.
|
||
#[cfg(test)]
|
||
pub mod dummy {
|
||
use super::{st16, Context, Span};
|
||
|
||
/// A dummy span that can be used in contexts where a span is expected
|
||
/// but is not important.
|
||
///
|
||
/// This is intended primarily for tests;
|
||
/// you should always use an appropriate span to permit sensible error
|
||
/// messages and source analysis.
|
||
/// For spans that are actually unknown,
|
||
/// use [`super::UNKNOWN_SPAN`].
|
||
///
|
||
/// Additional dummy spans can be derived from this one.
|
||
pub const DUMMY_SPAN: Span = Span::st_ctx(st16::CTX_DUMMY);
|
||
|
||
/// A dummy context that can be used where a span is expected but is not
|
||
/// important.
|
||
///
|
||
/// This is intended primarily for tests;
|
||
/// you should always use an appropriate span to permit sensible error
|
||
/// messages and source analysis.
|
||
/// For contexts that are actually unknown,
|
||
/// use [`super::UNKNOWN_CONTEXT`].
|
||
///
|
||
/// See also [`UNKNOWN_CONTEXT`].
|
||
pub const DUMMY_CONTEXT: Context = Context(st16::raw::CTX_DUMMY);
|
||
|
||
// This name is for brevity;
|
||
// we don't want to expose it because we don't want anyone to assume
|
||
// that a different name means that it's somehow different from
|
||
// `DUMMY_SPAN`.
|
||
const S0: Span = DUMMY_SPAN;
|
||
|
||
pub const S1: Span = S0.offset_add(1).unwrap();
|
||
pub const S2: Span = S0.offset_add(2).unwrap();
|
||
pub const S3: Span = S0.offset_add(3).unwrap();
|
||
pub const S4: Span = S0.offset_add(4).unwrap();
|
||
pub const S5: Span = S0.offset_add(5).unwrap();
|
||
pub const S6: Span = S0.offset_add(6).unwrap();
|
||
pub const S7: Span = S0.offset_add(7).unwrap();
|
||
pub const S8: Span = S0.offset_add(8).unwrap();
|
||
pub const S9: Span = S0.offset_add(9).unwrap();
|
||
pub const S10: Span = S0.offset_add(10).unwrap();
|
||
pub const S11: Span = S0.offset_add(11).unwrap();
|
||
pub const S12: Span = S0.offset_add(12).unwrap();
|
||
pub const S13: Span = S0.offset_add(13).unwrap();
|
||
pub const S14: Span = S0.offset_add(14).unwrap();
|
||
pub const S15: Span = S0.offset_add(15).unwrap();
|
||
pub const S16: Span = S0.offset_add(16).unwrap();
|
||
pub const S17: Span = S0.offset_add(17).unwrap();
|
||
pub const S18: Span = S0.offset_add(18).unwrap();
|
||
pub const S19: Span = S0.offset_add(19).unwrap();
|
||
pub const S20: Span = S0.offset_add(20).unwrap();
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod test {
|
||
use super::*;
|
||
|
||
// Little endian check.
|
||
//
|
||
// This ensures that the byte ordering is as expected,
|
||
// otherwise the resulting integer will not have the properties we
|
||
// require for sorting and comparison.
|
||
#[cfg(target_endian = "little")]
|
||
#[test]
|
||
fn span_pack_le() {
|
||
let span =
|
||
Span::new(0xA3A2A1A0, 0xB1B0, SymbolId::test_from_int(0xC1C0));
|
||
|
||
assert_eq!(
|
||
0xC1C0_A3A2A1A0_B1B0,
|
||
// ^ ^ ^
|
||
// ctx offset len
|
||
span.as_u64(),
|
||
"endianness check failed: {:X?}",
|
||
span.as_u64()
|
||
);
|
||
}
|
||
|
||
#[cfg(target_endian = "big")]
|
||
#[test]
|
||
fn span_pack_be_not_supported() {
|
||
panic!("Big-endian systems are not currently supported");
|
||
}
|
||
|
||
// The tests that follow are corollaries of the above, but the below
|
||
// tests do test that the implementations function as intended.
|
||
|
||
#[test]
|
||
fn span_at_later_offset_in_same_context_compares_greater() {
|
||
let ctx = Context::from("imaginary");
|
||
let first = ctx.span(10, 5);
|
||
let second = ctx.span(20, 5);
|
||
|
||
// These two assertions must be identical.
|
||
assert!(second > first);
|
||
assert!(second.as_u64() > first.as_u64());
|
||
}
|
||
|
||
#[test]
|
||
fn spans_order_by_context_start_and_len() {
|
||
let ctxa = Context::from("context a");
|
||
let ctxb = Context::from("context b");
|
||
|
||
// Sanity check, otherwise this test won't work as expected.
|
||
assert!(ctxa.0 < ctxb.0);
|
||
|
||
let sa1 = ctxa.span(10, 1);
|
||
let sa2 = ctxa.span(22, 1);
|
||
let sa3 = ctxa.span(35, 1);
|
||
|
||
let sb1 = ctxb.span(11, 1);
|
||
let sb2 = ctxb.span(20, 1);
|
||
let sb3 = ctxb.span(33, 1);
|
||
|
||
let mut spans = vec![sa3, sb2, sb1, sa2, sa1, sb3];
|
||
spans.sort();
|
||
|
||
assert_eq!(spans, vec![sa1, sa2, sa3, sb1, sb2, sb3]);
|
||
}
|
||
|
||
#[test]
|
||
fn retrieve_span_components() {
|
||
let ctx = Context::from("foo");
|
||
let offset = 100;
|
||
let len = 50;
|
||
|
||
let span = ctx.span(offset, len);
|
||
|
||
assert_eq!(
|
||
(offset, len, ctx),
|
||
(span.offset(), span.len(), span.context())
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn span_offset_add() {
|
||
let ctx = Context::from("addtest");
|
||
let offset = 10;
|
||
let len = 5;
|
||
|
||
let span = ctx.span(offset, len);
|
||
|
||
// Successful add.
|
||
assert_eq!(
|
||
span.offset_add(10),
|
||
Some(Span {
|
||
offset: offset + 10,
|
||
len,
|
||
ctx
|
||
})
|
||
);
|
||
|
||
// Fail, do not wrap.
|
||
assert_eq!(span.offset_add(SpanOffsetSize::MAX), None);
|
||
}
|
||
|
||
#[test]
|
||
fn span_into_twospan() {
|
||
let ctx = Context::from("foo");
|
||
let span = ctx.span(10, 50);
|
||
|
||
assert_eq!((span, span), span.into());
|
||
}
|
||
|
||
#[test]
|
||
fn span_endpoints() {
|
||
let ctx = Context::from("end");
|
||
let span = ctx.span(10, 20);
|
||
|
||
let (start, end) = span.endpoints();
|
||
|
||
assert_eq!(start, Span::new(10, 0, ctx));
|
||
assert_eq!(end, Some(Span::new(30, 0, ctx)));
|
||
}
|
||
|
||
#[test]
|
||
fn span_endpoints_exceeding_max_offset() {
|
||
let ctx = Context::from("end");
|
||
let offset = SpanOffsetSize::MAX - 5;
|
||
let span = ctx.span(offset, 10);
|
||
|
||
let (start, end) = span.endpoints();
|
||
|
||
assert_eq!(start, Span::new(offset, 0, ctx));
|
||
assert_eq!(end, None);
|
||
}
|
||
|
||
#[test]
|
||
fn span_endpoints_saturated() {
|
||
let ctx = Context::from("end");
|
||
let offset = SpanOffsetSize::MAX - 5;
|
||
let span = ctx.span(offset, 10);
|
||
|
||
let (start, end) = span.endpoints_saturated();
|
||
|
||
assert_eq!(start, Span::new(offset, 0, ctx));
|
||
assert_eq!(end, Span::new(SpanOffsetSize::MAX, 0, ctx));
|
||
}
|
||
|
||
#[test]
|
||
fn span_slice_yields_slice_within_original() {
|
||
let ctx = Context::from("slice");
|
||
let span = ctx.span(10, 10);
|
||
|
||
assert_eq!(ctx.span(15, 5), span.slice(5, 5));
|
||
}
|
||
|
||
#[test]
|
||
fn span_slice_large_values_yield_original() {
|
||
let span = Context::from("slice").span(0, 50);
|
||
|
||
// Too large of an offset should return original even though legnth
|
||
// is okay.
|
||
assert_eq!(span, span.slice(usize::MAX, 5));
|
||
|
||
// Too large of length should return original even though offset is
|
||
// okay.
|
||
assert_eq!(span, span.slice(0, usize::MAX));
|
||
}
|
||
|
||
#[test]
|
||
fn span_merge_one_after_other() {
|
||
let ctx = Context::from("merge");
|
||
|
||
// "an example string"
|
||
// [-----] [----]
|
||
// 3 9 11 16
|
||
// | A B |
|
||
// [------------]
|
||
// 3 16
|
||
// C
|
||
|
||
let a = ctx.span(3, 7);
|
||
let b = ctx.span(11, 6);
|
||
let c = ctx.span(3, 14);
|
||
|
||
assert_eq!(a.merge(b), Some(c));
|
||
assert_eq!(b.merge(a), Some(c));
|
||
}
|
||
|
||
#[test]
|
||
fn span_merge_overlap() {
|
||
let ctx = Context::from("merge");
|
||
|
||
// "an example string"
|
||
// [---+-] |
|
||
// 3 | 9 |
|
||
// | A| |
|
||
// | [--------]
|
||
// | 7 16
|
||
// | B |
|
||
// [------------]
|
||
// 3 16
|
||
// C
|
||
|
||
let a = ctx.span(3, 7);
|
||
let b = ctx.span(7, 10);
|
||
let c = ctx.span(3, 14);
|
||
|
||
// We compare in both orders,
|
||
// so this will test when a span overlaps on either side.
|
||
assert_eq!(a.merge(b), Some(c));
|
||
assert_eq!(b.merge(a), Some(c));
|
||
}
|
||
|
||
#[test]
|
||
fn span_merge_overlap_within() {
|
||
let ctx = Context::from("merge");
|
||
|
||
// "an example string"
|
||
// |[----] |
|
||
// |1 6 |
|
||
// | B |
|
||
// [--------]
|
||
// 0 9
|
||
// C
|
||
|
||
let b = ctx.span(1, 6);
|
||
let c = ctx.span(0, 10);
|
||
|
||
assert_eq!(b.merge(c), Some(c));
|
||
assert_eq!(c.merge(b), Some(c));
|
||
}
|
||
|
||
// It doesn't make sense to merge two spans that are located in
|
||
// different contexts.
|
||
#[test]
|
||
fn span_merge_different_contexts() {
|
||
let ctx_a = Context::from("merge_a");
|
||
let ctx_b = Context::from("merge_b");
|
||
|
||
assert_eq!(ctx_a.span(0, 1).merge(ctx_b.span(1, 2)), None);
|
||
}
|
||
}
|