tamer: obj::xmlo::reader: preproc:symtable/preproc:sym parsing

This integrates much of the work done so far to parse into a
`XmloEvent::SymDecl`.  The attribute parsing _is_ verbose, and I do intend
to abstract it away later on, but I'm going to wait on that for now.

The new reader should be finishing up soon, which is really exciting, since
I started working on this months ago (before having to take a break on
TAMER); I'm anticipating strong performance gains in the reader, and this is
a test that will tell us how the compiler will perform moving forward with
the abstractions that I've spent so much time on.

DEV-10863
main
Mike Gerwitz 2022-03-30 09:06:10 -04:00
parent 4cb478a42d
commit 1e278cbe26
6 changed files with 337 additions and 78 deletions

View File

@ -46,9 +46,9 @@ pub enum XmloError {
/// A `preproc:sym` node was found, but is missing `@name`.
UnassociatedSym(Span),
/// The provided `preproc:sym/@type` is unknown or invalid.
InvalidType(String),
InvalidType(SymbolId, Span),
/// The provided `preproc:sym/@dtype` is unknown or invalid.
InvalidDtype(String),
InvalidDtype(SymbolId, Span),
/// The provided `preproc:sym/@dim` is invalid.
InvalidDim(SymbolId, Span),
/// A `preproc:sym-dep` element was found, but is missing `@name`.
@ -91,11 +91,11 @@ impl Display for XmloError {
"unassociated symbol table entry: \
preproc:sym/@name missing at {span}"
),
Self::InvalidType(ty) => {
write!(fmt, "invalid preproc:sym/@type `{}`", ty)
Self::InvalidType(ty, span) => {
write!(fmt, "invalid preproc:sym/@type `{ty}` at {span}")
}
Self::InvalidDtype(dtype) => {
write!(fmt, "invalid preproc:sym/@dtype `{}`", dtype)
Self::InvalidDtype(dtype, span) => {
write!(fmt, "invalid preproc:sym/@dtype `{dtype}` at {span}")
}
Self::InvalidDim(dim, span) => {
write!(fmt, "invalid preproc:sym/@dim `{dim}` at {span}")

View File

@ -19,11 +19,11 @@
use super::{SymAttrs, XmloError};
use crate::{
obj::xmlo::Dim,
obj::xmlo::{Dim, SymDtype, SymType},
parse::{self, ParseState, ParseStatus, Transition, TransitionResult},
span::Span,
sym::{st::*, SymbolId},
xir::{attr::Attr, flat::Object as Xirf},
xir::{attr::Attr, flat::Object as Xirf, QName},
};
// While the _use_ is gated, this isn't, to ensure that we still try to
@ -98,15 +98,25 @@ impl parse::Object for XmloEvent {}
pub type XmloResult<T> = Result<T, XmloError>;
qname_const! {
QN_LV_PACKAGE: L_LV:L_PACKAGE,
QN_PACKAGE: :L_PACKAGE,
QN_NAME: :L_NAME,
QN_UUROOTPATH: :L_UUROOTPATH,
QN_PROGRAM: :L_PROGRAM,
QN_ELIG_CLASS_YIELDS: L_PREPROC:L_ELIG_CLASS_YIELDS,
QN_SYMTABLE: L_PREPROC:L_SYMTABLE,
QN_SYM: L_PREPROC:L_SYM,
QN_DESC: :L_DESC,
QN_DIM: :L_DIM,
QN_DTYPE: :L_DTYPE,
QN_ELIG_CLASS_YIELDS: L_PREPROC:L_ELIG_CLASS_YIELDS,
QN_EXTERN: :L_EXTERN,
QN_GENERATED: L_PREPROC:L_GENERATED,
QN_ISOVERRIDE: :L_ISOVERRIDE,
QN_LV_PACKAGE: L_LV:L_PACKAGE,
QN_NAME: :L_NAME,
QN_PACKAGE: :L_PACKAGE,
QN_PARENT: :L_PARENT,
QN_PROGRAM: :L_PROGRAM,
QN_SRC: :L_SRC,
QN_SYM: L_PREPROC:L_SYM,
QN_SYMTABLE: L_PREPROC:L_SYMTABLE,
QN_TYPE: :L_TYPE,
QN_UUROOTPATH: :L_UUROOTPATH,
QN_VIRTUAL: :L_VIRTUAL,
QN_YIELDS: :L_YIELDS,
}
pub trait XmloSymtableState =
@ -227,27 +237,14 @@ impl ParseState for SymtableState {
Transition(Sym(span, Some(name), attrs)).incomplete()
}
(Sym(span, name, mut attrs), Xirf::Attr(Attr(key, value, _))) => {
match key {
QN_DIM => {
use crate::sym::st::raw::{N0, N1, N2};
(
Sym(_tspan, name, mut attrs),
Xirf::Attr(Attr(key, value, (_, span))),
) => {
let result = Self::parse_sym_attr(&mut attrs, key, value, span)
.map(|_: ()| ParseStatus::Incomplete);
let result = match value {
N0 => Ok(Dim::Scalar),
N1 => Ok(Dim::Vector),
N2 => Ok(Dim::Matrix),
_ => Err(XmloError::InvalidDim(value, span)),
}
.and_then(|dim| {
attrs.dim.replace(dim);
Ok(ParseStatus::Incomplete)
});
Transition(Sym(span, name, attrs)).result(result)
}
QN_NAME => unreachable!("@name already processed"),
todo => todo!("{todo}"),
}
Transition(Sym(span, name, attrs)).result(result)
}
todo => todo!("{todo:?}"),
@ -259,6 +256,139 @@ impl ParseState for SymtableState {
}
}
impl SymtableState {
/// Parse attributes of a `preproc:symtable/preproc:sym` element,
/// representing attributes of a symbol in the symbol table.
///
/// Note that `@name` is expected to have already been processed by the
/// caller and is not expected to occur a second time.
fn parse_sym_attr(
attrs: &mut SymAttrs,
key: QName,
value: SymbolId,
span: Span,
) -> Result<(), XmloError> {
use raw::L_TRUE;
match key {
QN_DIM => {
attrs.dim.replace(
Self::parse_dim(value)
.ok_or(XmloError::InvalidDim(value, span))?,
);
}
QN_DTYPE => {
attrs.dtype.replace(
Self::parse_dtype(value)
.ok_or(XmloError::InvalidDtype(value, span))?,
);
}
QN_TYPE => {
attrs.ty.replace(
Self::parse_symtype(value)
.ok_or(XmloError::InvalidType(value, span))?,
);
}
QN_SRC => {
attrs.src.replace(value);
}
QN_EXTERN => {
attrs.extern_ = value == L_TRUE;
}
QN_YIELDS => {
attrs.yields.replace(value);
}
QN_PARENT => {
attrs.parent.replace(value);
}
QN_DESC => {
attrs.desc.replace(value);
}
QN_VIRTUAL => {
attrs.virtual_ = value == L_TRUE;
}
QN_ISOVERRIDE => {
attrs.override_ = value == L_TRUE;
}
QN_GENERATED => {
attrs.generated = value == L_TRUE;
}
// If we actually hit this,
// we may want to add a proper error to provide more context.
// It is not expected to be hit,
// since it would mean that there is a duplicate attribute and
// this xmlo file is hopefully produced by the compiler.
QN_NAME => panic!(
"preproc:sym/@name already processed \
(duplicate attribute); \
the xmlo file is corrupt"
),
// To maintain BC,
// ignore unknown attrs for now until we are confident that we
// have a comprehensive schema.
// TODO: Error here.
_ => (),
}
Ok(())
}
/// Parse a numeric `preproc:sym/@dim` attribute.
fn parse_dim(value: SymbolId) -> Option<Dim> {
use raw::*;
match value {
N0 => Some(Dim::Scalar),
N1 => Some(Dim::Vector),
N2 => Some(Dim::Matrix),
_ => None,
}
}
/// Parse a `preproc:sym/@dtype` attribute.
fn parse_dtype(value: SymbolId) -> Option<SymDtype> {
use raw::*;
match value {
L_BOOLEAN => Some(SymDtype::Boolean),
L_INTEGER => Some(SymDtype::Integer),
L_FLOAT => Some(SymDtype::Float),
L_EMPTY => Some(SymDtype::Empty),
_ => None,
}
}
/// Parse a `preproc:sym/@type` attribute.
fn parse_symtype(value: SymbolId) -> Option<SymType> {
use raw::*;
match value {
L_CGEN => Some(SymType::Cgen),
L_CLASS => Some(SymType::Class),
L_CONST => Some(SymType::Const),
L_FUNC => Some(SymType::Func),
L_GEN => Some(SymType::Gen),
L_LPARAM => Some(SymType::Lparam),
L_PARAM => Some(SymType::Param),
L_RATE => Some(SymType::Rate),
L_TPL => Some(SymType::Tpl),
L_TYPE => Some(SymType::Type),
L_RETMAP_HEAD => Some(SymType::RetMapHead),
L_RETMAP => Some(SymType::RetMap),
L_RETMAP_TAIL => Some(SymType::RetMapTail),
L_MAP_HEAD => Some(SymType::MapHead),
L_MAP => Some(SymType::Map),
L_MAP_TAIL => Some(SymType::MapTail),
L_META => Some(SymType::Meta),
L_WORKSHEET => Some(SymType::Worksheet),
_ => None,
}
}
}
impl From<(SymbolId, SymAttrs, Span)> for XmloEvent {
fn from(tup: (SymbolId, SymAttrs, Span)) -> Self {
match tup {

View File

@ -352,9 +352,10 @@ where
b"type" => {
sym_attrs.ty =
Some((*attr.value).try_into().map_err(|_| {
XmloError::InvalidType(unsafe {
String::from_utf8_unchecked(attr.value.to_vec())
})
XmloError::InvalidType(
unsafe { attr.value.intern_utf8_unchecked() },
UNKNOWN_SPAN,
)
})?);
}
@ -365,9 +366,10 @@ where
b"dtype" => {
sym_attrs.dtype =
Some((*attr.value).try_into().map_err(|_| {
XmloError::InvalidDtype(unsafe {
String::from_utf8_unchecked(attr.value.to_vec())
})
XmloError::InvalidDtype(
unsafe { attr.value.intern_utf8_unchecked() },
UNKNOWN_SPAN,
)
})?);
}

View File

@ -686,6 +686,7 @@ macro_rules! sym_tests {
}
}
// DONE
sym_tests! {
src: [src="foo/bar/baz"] => SymAttrs {
// see macro for src relpath
@ -781,6 +782,7 @@ sym_tests! {
}
}
// DONE
// can't be tested using the above
#[test]
fn generated_true() -> XmloResult<()> {
@ -821,6 +823,7 @@ fn generated_true() -> XmloResult<()> {
Ok(())
}
// DONE
#[test]
fn fails_on_non_ascii_dim() {
let stub_data: &[u8] = &[];
@ -834,6 +837,7 @@ fn fails_on_non_ascii_dim() {
}
}
// DONE
#[test]
fn fails_on_multi_char_dim() {
let stub_data: &[u8] = &[];
@ -847,6 +851,7 @@ fn fails_on_multi_char_dim() {
}
}
// DONE
#[test]
fn fails_on_invalid_type() {
let stub_data: &[u8] = &[];
@ -855,11 +860,12 @@ fn fails_on_invalid_type() {
sym_test_reader_event!(sut, fail_sym, type = "foo");
match sut.read_event() {
Err(XmloError::InvalidType(msg)) => assert!(msg.contains("foo")),
Err(XmloError::InvalidType(ty, _)) => assert_eq!(ty, "foo".into()),
bad => panic!("expected failure: {:?}", bad),
}
}
// DONE
#[test]
fn fails_on_invalid_dtype() {
let stub_data: &[u8] = &[];
@ -868,20 +874,7 @@ fn fails_on_invalid_dtype() {
sym_test_reader_event!(sut, fail_sym, dtype = "foo");
match sut.read_event() {
Err(XmloError::InvalidDtype(msg)) => assert!(msg.contains("foo")),
bad => panic!("expected failure: {:?}", bad),
}
}
#[test]
fn fails_when_missing_sym_name() {
let stub_data: &[u8] = &[];
let mut sut = Sut::new(stub_data);
sym_test_reader_event!(sut, fail_sym, dtype = "foo");
match sut.read_event() {
Err(XmloError::InvalidDtype(msg)) => assert!(msg.contains("foo")),
Err(XmloError::InvalidDtype(dty, _)) => assert_eq!(dty, "foo".into()),
bad => panic!("expected failure: {:?}", bad),
}
}

View File

@ -22,6 +22,7 @@ use std::assert_matches::assert_matches;
use super::*;
use crate::{
convert::ExpectInto,
obj::xmlo::{SymDtype, SymType},
parse::{ParseError, ParseState, Parsed},
span::{Span, DUMMY_SPAN},
sym::GlobalSymbolIntern,
@ -217,6 +218,8 @@ fn symtable_err_missing_sym_name() {
);
}
const SA: Span = S4;
macro_rules! symtable_tests {
($($name:ident: [$($key:ident=$val:literal),*] => $expect:expr)*) => {
$(
@ -226,12 +229,12 @@ macro_rules! symtable_tests {
let toks = [
Xirf::Open(QN_SYM, S1, Depth(0)),
Xirf::Attr(Attr::new(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
$(
Xirf::Attr(Attr::new(
Xirf::Attr(Attr(
stringify!($key).unwrap_into(),
$val.unwrap_into(),
(S2, S3)
(S3, SA)
)),
)*
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
@ -239,18 +242,22 @@ macro_rules! symtable_tests {
.into_iter();
assert_eq!(
Ok(vec![
Parsed::Incomplete, // Opening tag
Parsed::Incomplete, // @name
$(
// For each attribute ($key here is necessary
// for macro iteration).
#[allow(unused)]
#[doc=stringify!($key)]
Parsed::Incomplete,
)*
Parsed::Object((name, $expect, S1)),
]),
match $expect {
Ok(expected) =>
Ok(vec![
Parsed::Incomplete, // Opening tag
Parsed::Incomplete, // @name
$(
// For each attribute ($key here is necessary
// for macro iteration).
#[allow(unused)]
#[doc=stringify!($key)]
Parsed::Incomplete,
)*
Parsed::Object((name, expected, SA)),
]),
Err(expected) => Err(ParseError::StateError(expected)),
},
SymtableState::parse(toks).collect(),
);
}
@ -259,18 +266,142 @@ macro_rules! symtable_tests {
}
symtable_tests! {
dim_0: [dim="0"] => SymAttrs {
src: [src="foo/bar/baz"] => Ok(SymAttrs {
// see macro for src relpath
src: Some("foo/bar/baz".intern()),
..Default::default()
})
// note that this doesn't test every type; we're not going to
// duplicate the mapping for all of them here
tycgen: [type="cgen"] => Ok(SymAttrs {
ty: Some(SymType::Cgen),
..Default::default()
})
badtype: [type="bad"] => Err(XmloError::InvalidType("bad".into(), SA))
dim_0: [dim="0"] => Ok(SymAttrs {
dim: Some(Dim::Scalar),
..Default::default()
}
})
dim_1: [dim="1"] => SymAttrs {
dim_1: [dim="1"] => Ok(SymAttrs {
dim: Some(Dim::Vector),
..Default::default()
}
})
dim_2: [dim="2"] => SymAttrs {
dim_2: [dim="2"] => Ok(SymAttrs {
dim: Some(Dim::Matrix),
..Default::default()
}
})
dim_highnum: [dim="3"] => Err(XmloError::InvalidDim("3".into(), SA))
dim_nonum: [dim="X1"] => Err(XmloError::InvalidDim("X1".into(), SA))
dtyboolean: [dtype="boolean"] => Ok(SymAttrs {
dtype: Some(SymDtype::Boolean),
..Default::default()
})
dtyinteger: [dtype="integer"] => Ok(SymAttrs {
dtype: Some(SymDtype::Integer),
..Default::default()
})
dtyfloat: [dtype="float"] => Ok(SymAttrs {
dtype: Some(SymDtype::Float),
..Default::default()
})
dtyempty: [dtype="empty"] => Ok(SymAttrs {
dtype: Some(SymDtype::Empty),
..Default::default()
})
dtybad: [dtype="bad"] => Err(XmloError::InvalidDtype("bad".into(), SA))
extern_true: [extern="true"] => Ok(SymAttrs {
extern_: true,
..Default::default()
})
// The compiler will never produce nonsense values, so we'll just
// provide a sane default rather than adding extra checks (and
// hopefully we don't regret this)
extern_crap: [extern="nonsense"] => Ok(SymAttrs {
extern_: false,
..Default::default()
})
parent: [parent="foo"] => Ok(SymAttrs {
parent: Some("foo".intern()),
..Default::default()
})
yields: [yields="yield"] => Ok(SymAttrs {
yields: Some("yield".intern()),
..Default::default()
})
desc: [desc="Description"] => Ok(SymAttrs {
desc: Some("Description".into()),
..Default::default()
})
r#virtual: [virtual="true"] => Ok(SymAttrs {
virtual_: true,
..Default::default()
})
r#override: [isoverride="true"] => Ok(SymAttrs {
override_: true,
..Default::default()
})
// Multiple attributes at once
multi: [src="foo", type="class", dim="1", dtype="float", extern="true"]
=> Ok(SymAttrs {
// see macro for src relpath
src: Some("foo".intern()),
ty: Some(SymType::Class),
dim: Some(Dim::Vector),
dtype: Some(SymDtype::Float),
extern_: true,
..Default::default()
})
}
// Can't be tested using the above macro because of the attr name.
#[test]
fn symtable_sym_generated_true() {
let name = "generated_true".into();
let toks = [
Xirf::Open(QN_SYM, S1, Depth(0)),
Xirf::Attr(Attr(QN_NAME, name, (S2, S3))),
Xirf::Attr(Attr(
("preproc", "generated").unwrap_into(),
raw::L_TRUE,
(S3, S4),
)),
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
]
.into_iter();
let expected = SymAttrs {
generated: true,
..Default::default()
};
assert_eq!(
Ok(vec![
Parsed::Incomplete, // Opening tag
Parsed::Incomplete, // @name
Parsed::Incomplete, // @preproc:generated
Parsed::Object((name, expected, S4)),
]),
SymtableState::parse(toks).collect(),
);
}

View File

@ -447,6 +447,7 @@ pub mod st {
L_ELIG_CLASS_YIELDS: tid "elig-class-yields",
L_EMPTY: cid "empty",
L_EXEC: cid "exec",
L_EXTERN: cid "extern",
L_FALSE: cid "false",
L_FLOAT: cid "float",
L_FROM: cid "from",
@ -454,6 +455,7 @@ pub mod st {
L_GEN: cid "gen",
L_GENERATED: cid "generated",
L_INTEGER: cid "integer",
L_ISOVERRIDE: cid "isoverride",
L_L: cid "l",
L_LPARAM: cid "lparam",
L_LV: cid "lv",
@ -484,6 +486,7 @@ pub mod st {
L_TYPE: cid "type",
L_UUROOTPATH: cid "__rootpath",
L_VALUE: cid "value",
L_VIRTUAL: cid "virtual",
L_WORKSHEET: cid "worksheet",
L_XMLNS: cid "xmlns",
L_YIELDS: cid "yields",