tamer: xir::parse::ele: Remove attr sum state
This removes quite a bit of work, and work that was difficult to reason about. While I'm disappointed that that hard work is lost (aside from digging it up in the commit history), I am happy that it was able to be removed, because the extra complexity and cognitive burden was significant. This removes more `memcpy`s than the sum state could have hoped to, since aggregation is no longer necessary. Given that, there is a slight performacne improvement. The re-introduction of required and duplicate checks later on should be more efficient than this was, and so this should be a net win overall in the end. DEV-13346main
parent
f872181f64
commit
07dff3ba4e
|
@ -28,7 +28,7 @@ mod error;
|
|||
|
||||
pub use attr::{parse_attrs, AttrParseState};
|
||||
pub use ele::{
|
||||
AttrFieldOp, AttrFieldSum, EleParseState, NodeMatcher, Nt, NtState,
|
||||
StateStack, SumNt, SumNtState, SuperCtx, SuperState, SuperStateContext,
|
||||
EleParseState, NodeMatcher, Nt, NtState, StateStack, SumNt, SumNtState,
|
||||
SuperState, SuperStateContext,
|
||||
};
|
||||
pub use error::{AttrParseError, NtError, SumNtError};
|
||||
|
|
|
@ -0,0 +1,264 @@
|
|||
// XIR attribute parser generator tests
|
||||
//
|
||||
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
|
||||
//
|
||||
// This file is part of TAME.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use super::super::{AttrParseError, AttrParseState};
|
||||
use crate::{
|
||||
diagnose::{AnnotatedSpan, Diagnostic},
|
||||
parse::{self, util::SPair, ParseError, Parsed, Parser, TokenStream},
|
||||
span::dummy::*,
|
||||
xir::{
|
||||
attr::{Attr, AttrSpan},
|
||||
flat::XirfToken,
|
||||
st::qname::*,
|
||||
OpenSpan, QName,
|
||||
},
|
||||
};
|
||||
use std::{
|
||||
convert::Infallible,
|
||||
error::Error,
|
||||
fmt::{Debug, Display},
|
||||
iter,
|
||||
};
|
||||
|
||||
use Parsed::Object;
|
||||
|
||||
const SE: OpenSpan = OpenSpan(S1.offset_add(100).unwrap(), 0);
|
||||
|
||||
// Random choice of QName for tests.
|
||||
const QN_ELE: QName = QN_YIELDS;
|
||||
|
||||
fn sut_parse<S: AttrParseState, I: TokenStream<S::Token>>(
|
||||
toks: I,
|
||||
) -> Parser<S, I>
|
||||
where
|
||||
S: AttrParseState,
|
||||
S::Context: Default,
|
||||
{
|
||||
Parser::with_state(S::with_element(QN_ELE, SE), toks)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum Foo {
|
||||
A(SPair),
|
||||
B(SPair),
|
||||
Unused(SPair),
|
||||
}
|
||||
|
||||
impl parse::Object for Foo {}
|
||||
|
||||
impl<E> From<Foo> for Result<Foo, E> {
|
||||
fn from(value: Foo) -> Self {
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
|
||||
// Remember: we only describe what is _permissable_,
|
||||
// not what is required or what order it must appear in.
|
||||
// That is the responsibility of parsers lower in the pipeline.
|
||||
#[test]
|
||||
fn attrs_any_order_and_optional() {
|
||||
attr_parse_stream! {
|
||||
type Object = Foo;
|
||||
type ValueError = Infallible;
|
||||
|
||||
ValuesState {
|
||||
QN_NAME => Foo::A,
|
||||
|
||||
// The above is the same as this longer form:
|
||||
QN_YIELDS => |spair| Foo::B(spair),
|
||||
|
||||
// No value will be provided for this one,
|
||||
// which is okay since all are implicitly optional.
|
||||
QN_INDEX => Foo::Unused,
|
||||
}
|
||||
}
|
||||
|
||||
let name = "val_name".into();
|
||||
let yields = "val_value".into();
|
||||
let attr_name = Attr(QN_NAME, name, AttrSpan(S1, S2));
|
||||
let attr_yields = Attr(QN_YIELDS, yields, AttrSpan(S2, S3));
|
||||
|
||||
// @yields then @name just to emphasize that order does not matter.
|
||||
let toks = vec![XirfToken::Attr(attr_yields), XirfToken::Attr(attr_name)];
|
||||
|
||||
assert_eq!(
|
||||
// Simply parses back out the attributes;
|
||||
// see following tests further value parsing.
|
||||
// Note that we omit one of the attributes declared above.
|
||||
Ok(vec![
|
||||
Object(Foo::B(SPair(yields, S3))),
|
||||
Object(Foo::A(SPair(name, S2)))
|
||||
]),
|
||||
sut_parse::<ValuesState, _>(toks.into_iter()).collect(),
|
||||
);
|
||||
}
|
||||
|
||||
// Since all are optional,
|
||||
// the attribute list can be empty.
|
||||
#[test]
|
||||
fn attrs_empty() {
|
||||
attr_parse_stream! {
|
||||
type Object = Foo;
|
||||
type ValueError = Infallible;
|
||||
|
||||
ValuesState {
|
||||
// We will not provide a value for this.
|
||||
QN_NAME => Foo::Unused,
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
// Simply parses back out the attributes;
|
||||
// see following tests further value parsing.
|
||||
Ok(vec![]),
|
||||
sut_parse::<ValuesState, _>(iter::empty()).collect(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn attr_value_error() {
|
||||
impl TryFrom<SPair> for Foo {
|
||||
type Error = FooError;
|
||||
|
||||
fn try_from(spair: SPair) -> Result<Self, Self::Error> {
|
||||
Err(FooError(spair))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct FooError(SPair);
|
||||
|
||||
impl Error for FooError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for FooError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "test FooError")
|
||||
}
|
||||
}
|
||||
|
||||
impl Diagnostic for FooError {
|
||||
fn describe(&self) -> Vec<AnnotatedSpan> {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<Result<Foo, FooError>> for FooError {
|
||||
fn into(self) -> Result<Foo, FooError> {
|
||||
Err(self)
|
||||
}
|
||||
}
|
||||
|
||||
attr_parse_stream! {
|
||||
type Object = Foo;
|
||||
type ValueError = FooError;
|
||||
|
||||
ValueTryIntoState {
|
||||
// Explicit form:
|
||||
QN_NAME => |s| Err(FooError(s)),
|
||||
|
||||
// Conversion using `Into`:
|
||||
QN_YIELDS => FooError,
|
||||
}
|
||||
}
|
||||
|
||||
let val_name = "val_name".into();
|
||||
let val_yields = "val_yields".into();
|
||||
let attr_name = Attr(QN_NAME, val_name, AttrSpan(S1, S2));
|
||||
let attr_yields = Attr(QN_YIELDS, val_yields, AttrSpan(S2, S3));
|
||||
|
||||
let toks = vec![
|
||||
XirfToken::Attr(attr_name.clone()),
|
||||
XirfToken::Attr(attr_yields.clone()),
|
||||
];
|
||||
|
||||
let mut sut = sut_parse::<ValueTryIntoState, _>(toks.into_iter());
|
||||
|
||||
assert_eq!(
|
||||
Some(Err(ParseError::StateError(AttrParseError::InvalidValue(
|
||||
FooError(SPair(val_name, S2)),
|
||||
QN_ELE
|
||||
)))),
|
||||
sut.next(),
|
||||
);
|
||||
|
||||
// TryInto on `Option` inner type.
|
||||
assert_eq!(
|
||||
Some(Err(ParseError::StateError(AttrParseError::InvalidValue(
|
||||
FooError(SPair(val_yields, S3)),
|
||||
QN_ELE
|
||||
)))),
|
||||
sut.next(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unexpected_attr_with_recovery() {
|
||||
attr_parse_stream! {
|
||||
type Object = Foo;
|
||||
type ValueError = Infallible;
|
||||
|
||||
UnexpectedState {
|
||||
QN_NAME => Foo::A,
|
||||
QN_SRC => Foo::B,
|
||||
}
|
||||
}
|
||||
|
||||
let name = "val_name".into();
|
||||
let src = "val_src".into();
|
||||
let attr_name = Attr(QN_NAME, name, AttrSpan(S1, S2));
|
||||
let attr_unexpected = Attr(QN_TYPE, "unexpected".into(), AttrSpan(S1, S2));
|
||||
let attr_src = Attr(QN_SRC, src, AttrSpan(S2, S3));
|
||||
|
||||
let toks = vec![
|
||||
// This is expected:
|
||||
XirfToken::Attr(attr_name),
|
||||
// NOT expected (produce an error):
|
||||
XirfToken::Attr(attr_unexpected.clone()),
|
||||
// <Recovery must take place here.>
|
||||
// This is expected after recovery:
|
||||
XirfToken::Attr(attr_src),
|
||||
];
|
||||
|
||||
let mut sut = Parser::with_state(
|
||||
UnexpectedState::with_element(QN_ELE, SE),
|
||||
toks.into_iter(),
|
||||
);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Object(Foo::A(SPair(name, S2))))));
|
||||
|
||||
// This will fail at the unknown attribute,
|
||||
// and must then remain in a state where parsing can be resumed.
|
||||
// This simply means ignoring the provided attribute,
|
||||
// which in XIRF is discarding a single token of input,
|
||||
// rather than having to continue parsing the attribute to then
|
||||
// discard.
|
||||
assert_eq!(
|
||||
sut.next(),
|
||||
Some(Err(ParseError::StateError(AttrParseError::UnexpectedAttr(
|
||||
attr_unexpected,
|
||||
QN_ELE,
|
||||
)))),
|
||||
);
|
||||
|
||||
assert_eq!(sut.next(), Some(Ok(Object(Foo::B(SPair(src, S3))))));
|
||||
}
|
|
@ -44,41 +44,6 @@ use crate::{ele_parse, parse::Parser};
|
|||
/// A parser accepting a single element.
|
||||
pub trait EleParseState: ParseState {}
|
||||
|
||||
/// [`SuperState`] [`Context`] that gets propagated to each child parser.
|
||||
///
|
||||
/// This consists of two components:
|
||||
///
|
||||
/// 1. The [`StateStack`],
|
||||
/// used to store child NT [`ParseState`]s when transferring to
|
||||
/// another NT; and
|
||||
/// 2. An [`AttrFieldSum`] object representing the active attribute field
|
||||
/// context.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SuperCtx<S: SuperState + Default>(
|
||||
Context<StateStack<S>>,
|
||||
S::AttrFields,
|
||||
);
|
||||
|
||||
impl<S: SuperState + Default> SuperCtx<S> {
|
||||
/// Retrieve a mutable reference to each component.
|
||||
///
|
||||
/// This is utilized because method calls are more convenient than
|
||||
/// destructuring with [`Context`]'s required use of `Deref`.
|
||||
pub fn parts(
|
||||
&mut self,
|
||||
) -> (&mut Context<StateStack<S>>, &mut S::AttrFields) {
|
||||
match self {
|
||||
Self(stack, fields) => (stack, fields),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stack_ref(&self) -> &Context<StateStack<S>> {
|
||||
match self {
|
||||
Self(stack, _) => stack,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Maximum level of parser nesting.
|
||||
///
|
||||
/// Unfortunately,
|
||||
|
@ -108,7 +73,8 @@ pub const MAX_DEPTH: usize = 64;
|
|||
#[derive(Debug, Default)]
|
||||
pub struct StateStack<S: SuperState>(ArrayVec<S, MAX_DEPTH>);
|
||||
|
||||
pub type SuperStateContext<S> = Context<SuperCtx<S>>;
|
||||
/// [`SuperState`] [`Context`] that gets propagated to each child parser.
|
||||
pub type SuperStateContext<S> = Context<StateStack<S>>;
|
||||
|
||||
// Note that public visibility is needed because `ele_parse` expands outside
|
||||
// of this module.
|
||||
|
@ -515,12 +481,6 @@ macro_rules! ele_parse {
|
|||
#[derive(Debug, PartialEq, Eq, Default)]
|
||||
$vis struct $nt(crate::xir::parse::NtState<$nt>);
|
||||
|
||||
#[doc(hidden)]
|
||||
$vis type [<$nt AttrFields>] =
|
||||
crate::parse::Context<
|
||||
<[<$nt AttrState_>] as crate::xir::parse::AttrParseState>::Fields
|
||||
>;
|
||||
|
||||
impl $nt {
|
||||
/// A default state that cannot be preempted by the superstate.
|
||||
#[allow(dead_code)] // not utilized for every NT
|
||||
|
@ -643,7 +603,7 @@ macro_rules! ele_parse {
|
|||
self,
|
||||
tok: Self::Token,
|
||||
#[allow(unused_variables)] // used only if child NTs
|
||||
ctx: &mut Self::Context,
|
||||
stack: &mut Self::Context,
|
||||
) -> crate::parse::TransitionResult<Self::Super> {
|
||||
use crate::{
|
||||
parse::{Transition, Transitionable},
|
||||
|
@ -661,17 +621,12 @@ macro_rules! ele_parse {
|
|||
};
|
||||
|
||||
let Self(selfst) = self;
|
||||
#[allow(unused_variables)] // stack sometimes unused
|
||||
let (stack, attr_fields) = ctx.parts();
|
||||
|
||||
match (selfst, tok) {
|
||||
(
|
||||
Expecting | NonPreemptableExpecting | Closed(..),
|
||||
XirfToken::Open(qname, span, depth)
|
||||
) if $nt::matches(qname) => {
|
||||
use crate::xir::parse::AttrFieldSum;
|
||||
attr_fields.init_fields::<[<$nt AttrFields>]>();
|
||||
|
||||
$(
|
||||
let $qname_matched = qname;
|
||||
let $open_span = span;
|
||||
|
@ -752,12 +707,10 @@ macro_rules! ele_parse {
|
|||
// which overrides this match directly above
|
||||
// (xref <<SATTR>>).
|
||||
#[allow(unreachable_patterns)]
|
||||
(Attrs(meta @ (_, span, _), sa), tok) => {
|
||||
use crate::xir::parse::AttrFieldSum;
|
||||
|
||||
(Attrs(meta, sa), tok) => {
|
||||
sa.delegate::<Self, _>(
|
||||
tok,
|
||||
attr_fields.narrow::<[<$nt AttrFields>]>(span),
|
||||
crate::parse::EmptyContext,
|
||||
|sa| Transition(Self(Attrs(meta, sa))),
|
||||
|| Transition(Self(Jmp($ntfirst(meta)))),
|
||||
)
|
||||
|
@ -921,11 +874,6 @@ macro_rules! ele_parse {
|
|||
#[derive(Debug, PartialEq, Eq, Default)]
|
||||
$vis struct $nt(crate::xir::parse::SumNtState<$nt>);
|
||||
|
||||
// Must be a _unique_ unit type to avoid conflicting trait impls.
|
||||
#[doc(hidden)]
|
||||
#[derive(Debug, PartialEq, Eq, Default)]
|
||||
$vis struct [<$nt AttrFields>];
|
||||
|
||||
impl $nt {
|
||||
fn non_preemptable() -> Self {
|
||||
Self(crate::xir::parse::SumNtState::NonPreemptableExpecting)
|
||||
|
@ -1034,7 +982,7 @@ macro_rules! ele_parse {
|
|||
fn parse_token(
|
||||
self,
|
||||
tok: Self::Token,
|
||||
ctx: &mut Self::Context,
|
||||
stack: &mut Self::Context,
|
||||
) -> crate::parse::TransitionResult<Self::Super> {
|
||||
use crate::{
|
||||
parse::Transition,
|
||||
|
@ -1049,8 +997,6 @@ macro_rules! ele_parse {
|
|||
},
|
||||
};
|
||||
|
||||
let (stack, _) = ctx.parts();
|
||||
|
||||
match (self.0, tok) {
|
||||
$(
|
||||
(
|
||||
|
@ -1212,77 +1158,6 @@ macro_rules! ele_parse {
|
|||
)*
|
||||
}
|
||||
|
||||
/// Superstate attribute context sum type.
|
||||
///
|
||||
/// For more information on why this exists,
|
||||
/// see [`AttrFieldSum`](crate::xir::parse::AttrFieldSum).
|
||||
#[derive(Debug, Default)]
|
||||
$vis enum [<$super AttrFields>] {
|
||||
#[default]
|
||||
/// Indicates that no attribute parsing is active.
|
||||
///
|
||||
/// Since attribute parsing is initialized at each attribute
|
||||
/// state transition,
|
||||
/// this will never be read.
|
||||
/// Further,
|
||||
/// this may never be utilized beyond the initial construction
|
||||
/// of the superstate's context.
|
||||
Uninitialized,
|
||||
|
||||
$(
|
||||
$nt([<$nt AttrFields>]),
|
||||
)*
|
||||
}
|
||||
|
||||
impl crate::xir::parse::AttrFieldSum for [<$super AttrFields>] {}
|
||||
|
||||
// Each NT has its own attribute parsing
|
||||
// (except for sum types);
|
||||
// we need to expose a way to initialize parsing for each and
|
||||
// then narrow the type to the appropriate `Context` for the
|
||||
// respective NT's attribute parser.
|
||||
$(
|
||||
impl crate::xir::parse::AttrFieldOp<[<$nt AttrFields>]>
|
||||
for [<$super AttrFields>]
|
||||
{
|
||||
fn init_new() -> Self {
|
||||
Self::$nt(Default::default())
|
||||
}
|
||||
|
||||
fn narrow(
|
||||
&mut self,
|
||||
open_span: crate::xir::OpenSpan,
|
||||
) -> &mut [<$nt AttrFields>]
|
||||
{
|
||||
use crate::xir::EleSpan;
|
||||
use crate::diagnose::Annotate;
|
||||
|
||||
// Maybe Rust will support more robust dependent types
|
||||
// in the future to make this unnecessary;
|
||||
// see trait docs for this method for more information.
|
||||
match self {
|
||||
// This should _always_ be the case unless if the
|
||||
// system properly initializes attribute parsing
|
||||
// when transitioning to the `Attr` state.
|
||||
Self::$nt(fields) => fields,
|
||||
|
||||
// Using `unreachable_unchecked` did not have any
|
||||
// performance benefit at the time of writing.
|
||||
_ => crate::diagnostic_unreachable!(
|
||||
open_span
|
||||
.span()
|
||||
.internal_error(
|
||||
"failed to initialize attribute parsing \
|
||||
for this element"
|
||||
)
|
||||
.into(),
|
||||
"invalid AttrFields",
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
)*
|
||||
|
||||
// Default parser is the first NT,
|
||||
// and is non-preemptable to force error handling if the root node
|
||||
// is unexpected.
|
||||
|
@ -1372,7 +1247,7 @@ macro_rules! ele_parse {
|
|||
fn parse_token(
|
||||
self,
|
||||
tok: Self::Token,
|
||||
ctx: &mut Self::Context,
|
||||
stack: &mut Self::Context,
|
||||
) -> crate::parse::TransitionResult<Self> {
|
||||
use crate::{
|
||||
parse::Transition,
|
||||
|
@ -1412,8 +1287,6 @@ macro_rules! ele_parse {
|
|||
depth,
|
||||
),
|
||||
) if st.can_preempt_node() && $pre_nt::matches(qname) => {
|
||||
let (stack, _) = ctx.parts();
|
||||
|
||||
stack.transfer_with_ret(
|
||||
Transition(st),
|
||||
Transition(
|
||||
|
@ -1454,9 +1327,8 @@ macro_rules! ele_parse {
|
|||
// atop of the stack.
|
||||
(Self::$nt(st), tok) => st.delegate_child(
|
||||
tok,
|
||||
ctx,
|
||||
|deadst, tok, ctx| {
|
||||
let (stack, _) = ctx.parts();
|
||||
stack,
|
||||
|deadst, tok, stack| {
|
||||
stack.ret_or_dead(tok, deadst)
|
||||
},
|
||||
),
|
||||
|
@ -1464,7 +1336,7 @@ macro_rules! ele_parse {
|
|||
}
|
||||
}
|
||||
|
||||
fn is_accepting(&self, ctx: &Self::Context) -> bool {
|
||||
fn is_accepting(&self, stack: &Self::Context) -> bool {
|
||||
// This is short-circuiting,
|
||||
// starting at the _bottom_ of the stack and moving
|
||||
// upward.
|
||||
|
@ -1481,8 +1353,8 @@ macro_rules! ele_parse {
|
|||
//
|
||||
// After having considered the stack,
|
||||
// we can then consider the active `ParseState`.
|
||||
ctx.stack_ref().all(|st| st.is_inner_accepting(ctx))
|
||||
&& self.is_inner_accepting(ctx)
|
||||
stack.all(|st| st.is_inner_accepting(stack))
|
||||
&& self.is_inner_accepting(stack)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1538,9 +1410,7 @@ macro_rules! ele_parse {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::xir::parse::SuperState for $super {
|
||||
type AttrFields = [<$super AttrFields>];
|
||||
}
|
||||
impl crate::xir::parse::SuperState for $super {}
|
||||
}};
|
||||
|
||||
(@!ntfirst_init $super:ident, $ntfirst:ident $($nt:ident)*) => {
|
||||
|
@ -1558,228 +1428,7 @@ macro_rules! ele_parse {
|
|||
/// interdependencies.
|
||||
/// It represents the reification of such a state machine and all of its
|
||||
/// transitions.
|
||||
pub trait SuperState: ClosedParseState {
|
||||
/// Sum type holding a variant for every [`Nt`]'s attribute parsing
|
||||
/// context.
|
||||
///
|
||||
/// This holds the fields for each element as they are being
|
||||
/// aggregated,
|
||||
/// before a final attribute object is produced.
|
||||
type AttrFields: Debug + Default;
|
||||
}
|
||||
|
||||
/// Attribute context operations for individual NTs.
|
||||
///
|
||||
/// This is implemented for each NT's attribute parsing context by
|
||||
/// [`ele_parse!`] during superstate generation.
|
||||
///
|
||||
/// See [`AttrFieldSum`] for further explanation.
|
||||
pub trait AttrFieldOp<T>: AttrFieldSum + Sized {
|
||||
/// Initialize a new attribute parsing context for the given NT's
|
||||
/// attribute parsing context (represented by `T`).
|
||||
///
|
||||
/// This must be invoked before attribute parsing begins for an element,
|
||||
/// otherwise there will be a type mismatch during [`Self::narrow`]
|
||||
/// that will result in a panic.
|
||||
fn init_new() -> Self;
|
||||
|
||||
/// Narrow the [`AttrFieldSum`] into the attribute context `T`,
|
||||
/// panicing if narrowing fails.
|
||||
///
|
||||
/// The provided [`OpenSpan`] is utilized only for a diagnostic panic if
|
||||
/// lowering fails,
|
||||
/// and should never be utilized in a correctly implemented system.
|
||||
///
|
||||
/// Panics
|
||||
/// ======
|
||||
/// This will issue a diagnostic panic if the requested type `T` was not
|
||||
/// the last type initialized using [`Self::init_new`].
|
||||
/// The idea is that,
|
||||
/// if [`ele_parse`] is properly implemented,
|
||||
/// non-matching branches should be unreachable,
|
||||
/// and so this panic should never occur.
|
||||
fn narrow(&mut self, open_span: OpenSpan) -> &mut T;
|
||||
}
|
||||
|
||||
/// Sum type representing the attribute parsing contexts for each [`Nt`]'s
|
||||
/// attribute parser.
|
||||
///
|
||||
/// This may also contain unique unit types for [`SumNt`]s,
|
||||
/// which serve no purpose beyond simplifying construction of this sum
|
||||
/// type.
|
||||
///
|
||||
/// Why does this exist?
|
||||
/// ====================
|
||||
/// Prior to this implementation,
|
||||
/// each individual NT's attribute parsers ([`AttrParseState`]s)
|
||||
/// had embedded within them their parsing context.
|
||||
/// Since [`ParseState`] is immutable,
|
||||
/// it relies on Rust's ability to properly optimize away `memcpy`s so
|
||||
/// that the construction of a new [`ParseState`] amounts to in-place
|
||||
/// mutation of the existing one.
|
||||
///
|
||||
/// Unfortunately,
|
||||
/// some NTs have quite a few attributes,
|
||||
/// leading so some [`AttrParseState`]s that were nearing 2KiB in size.
|
||||
/// Since the [`AttrParseState`] is a component of NTs' [`ParseState`]s,
|
||||
/// their width had to grow to accommodate;
|
||||
/// and since [`SuperState`] aggregates all NTs,
|
||||
/// the width of the superstate had to accommodate the width of the
|
||||
/// largest NT parser.
|
||||
///
|
||||
/// This snowballing thwarted Rust's optimizations in many cases,
|
||||
/// which had a significant impact on performance and undermined the
|
||||
/// design of TAME's parsing system.
|
||||
/// Further,
|
||||
/// it resulted in a situation whereby the introduction of new attributes
|
||||
/// or NIR symbol variants would cut `tamec`'s performance in half;
|
||||
/// clearly things were only going to get worse.
|
||||
///
|
||||
/// Most data structures within TAME are used as IRs,
|
||||
/// pursuant to TAME's goal of reifying all parser state.
|
||||
/// Because of the streaming lowering pipline,
|
||||
/// IRs are typically ephemeral,
|
||||
/// and so Rust generally optimizes them away in their entirety.
|
||||
/// But the needs of [`NIR`](crate::nir`),
|
||||
/// for which the [`ele_parse!`] parser-generator was written,
|
||||
/// are slightly different—the
|
||||
/// NT states are stored on [`StateStack`],
|
||||
/// and so their representation cannot be completely optimized away.
|
||||
/// For this reason,
|
||||
/// the width of these data structures is of greater practical concern.
|
||||
///
|
||||
/// Separating and Hoisting Intermediate Attribute State
|
||||
/// ----------------------------------------------------
|
||||
/// The entire reason that [`Context`] exists in TAME's parsing framework
|
||||
/// is to be utilized when we're unable to coerce Rust into performing the
|
||||
/// necessary optimizations on immutable data structures.
|
||||
/// The solution was therefore to extract the field state of the attribute
|
||||
/// parser
|
||||
/// (representing the ongoing aggregation of attributes,
|
||||
/// akin to the Builder pattern in OOP circles)
|
||||
/// into a [`Context`],
|
||||
/// which removed it from the [`AttrParseState`],
|
||||
/// and therefore brought the [`SuperState`] down to a manageable size
|
||||
/// (512 bits at the time of writing).
|
||||
///
|
||||
/// Unfortunately,
|
||||
/// this creates a new obvious problem:
|
||||
/// how are we to feed the new context to each individual
|
||||
/// [`AttrParseState`] if we're keeping that context out of each NT's
|
||||
/// individual [`ParseState`]?
|
||||
/// By recognizing that only one attribute parser is active at any time,
|
||||
/// we would ideally have all such states aggregated into a single memory
|
||||
/// location that is only as wide as the largest attribute parsing context.
|
||||
/// This is what a sum type (via an `enum`) would give us,
|
||||
/// with a small one-byte cost for the discriminant of ~110 variants.
|
||||
///
|
||||
/// When the attribute context was part of [`AttrParseState`] and therefore
|
||||
/// part of each NT's [`ParseState`],
|
||||
/// the benefit was that the type of the context is statically known and
|
||||
/// could therefore be passed directly to the [`AttrParseState`] without
|
||||
/// any further consideration.
|
||||
/// But when we decouple that attribute context and hoist it out of all NTs
|
||||
/// into a single shared memory location,
|
||||
/// then the type becomes dynamic based on the active NT's parser.
|
||||
/// The type becomes this sum type ([`AttrFieldSum`]),
|
||||
/// which represents all possible types that could serve as such a
|
||||
/// context.
|
||||
///
|
||||
/// Context Narrowing
|
||||
/// -----------------
|
||||
/// [`AttrFieldSum`] enables polymorphism with respect to the attribute
|
||||
/// context,
|
||||
/// but the problem is that we have a _contravariant_ relationship—the
|
||||
/// context that we pass to the attribute parser must be an element of
|
||||
/// the [`AttrFieldSum`] but only one of them is valid.
|
||||
/// We must narrow from [`AttrFieldSum`] into the correct type;
|
||||
/// this is the job of [`AttrFieldOp`] via [`Self::narrow`].
|
||||
///
|
||||
/// The idea is this:
|
||||
///
|
||||
/// 1. We know that only one attribute parser is active at any time,
|
||||
/// because we cannot transition to other NTs while performing
|
||||
/// attribute parsing.
|
||||
/// This invariant is upheld by [`NtState::can_preempt_node`].
|
||||
/// 2. During the transition into the [`NtState::Attrs`] state,
|
||||
/// [`Self::init_fields`] must be used to prepare the context that
|
||||
/// will be required to parse attributes for the element represented
|
||||
/// by that respective NT.
|
||||
/// This means that this sum type will always assume the variant
|
||||
/// representing the appropriate context.
|
||||
/// 3. When delegating to the appropriate [`AttrParseState`],
|
||||
/// [`Self::narrow`] is used to invoke [`AttrFieldOp::narrow`] for
|
||||
/// the appropriate attribute context.
|
||||
/// Because of #2 above,
|
||||
/// this sum type must already have assumed that respective variant,
|
||||
/// and matching on that variant will always yield the requested
|
||||
/// attribute context type.
|
||||
///
|
||||
/// Just to be safe,
|
||||
/// in case we have some bug in this implementation,
|
||||
/// #3's call to [`Self::narrow`] ought to issue a panic;
|
||||
/// this provides a proper balance between safety
|
||||
/// (if the type is wrong,
|
||||
/// there are no memory safety issues)
|
||||
/// and ergonomics
|
||||
/// (the API is unchanged)
|
||||
/// for what should be unreachable code.
|
||||
/// Profiling showed no performance improvement at the time of writing when
|
||||
/// attempting to utilize [`std::hint::unreachable_unchecked`].
|
||||
///
|
||||
/// Before and After
|
||||
/// ----------------
|
||||
/// This implementation imposes an additional cognitive burden on groking
|
||||
/// this system,
|
||||
/// which is why it was initially passed up;
|
||||
/// it was only reconsidered when it was necessitated by performance
|
||||
/// characteristics and verified through profiling and analysis of the
|
||||
/// target disassembly.
|
||||
/// The documentation you are reading now is an attempt to offset the
|
||||
/// cognitive burden.
|
||||
///
|
||||
/// Ultimately,
|
||||
/// the amount of code required to implement this change was far less than
|
||||
/// the amount of text it takes to describe it here.
|
||||
/// And while that's a terrible metric to judge an implementation by,
|
||||
/// it is intended to convey that if someone does need to understand this
|
||||
/// subsystem,
|
||||
/// its bounds are quite limited.
|
||||
///
|
||||
/// The introduction of this system eliminated 90% of the `memcpy` calls
|
||||
/// present in `tamec` at the time of writing,
|
||||
/// completely removing most of them from the hot code path
|
||||
/// (the lowering pipline);
|
||||
/// the major exception is the necessary [`StateStack`],
|
||||
/// which exists on a _less hot_ code path,
|
||||
/// utilized only during transitions between NTs.
|
||||
/// This also clears the brush on paths leading to future optimizations.
|
||||
pub trait AttrFieldSum {
|
||||
/// Prepare attribute parsing using the attribute field context `F`.
|
||||
///
|
||||
/// This must be invoked at the beginning of each transition to
|
||||
/// [`NtState::Attrs`],
|
||||
/// otherwise later narrowing with [`Self::narrow`] will panic.
|
||||
///
|
||||
/// See [`Self`] and [`AttrFieldOp::init_new`] for more information.
|
||||
fn init_fields<F>(&mut self)
|
||||
where
|
||||
Self: AttrFieldOp<F>,
|
||||
{
|
||||
*self = AttrFieldOp::<F>::init_new();
|
||||
}
|
||||
|
||||
/// Narrow self into the attribute context `T`,
|
||||
/// panicing if narrowing fails.
|
||||
///
|
||||
/// See [`Self`] and [`AttrFieldOp::narrow`] for more information.
|
||||
fn narrow<F>(&mut self, open_span: OpenSpan) -> &mut F
|
||||
where
|
||||
Self: AttrFieldOp<F>,
|
||||
{
|
||||
AttrFieldOp::<F>::narrow(self, open_span)
|
||||
}
|
||||
}
|
||||
pub trait SuperState: ClosedParseState {}
|
||||
|
||||
/// Nonterminal.
|
||||
///
|
||||
|
|
Loading…
Reference in New Issue