tamer: obj::xmlo::reader: Begin symbol table parsing

This wasn't the simplest thing to start with, but I wanted to explore
something with a higher level of complexity.  There is some boilerplate to
observe here, including:

  1. The state stitching (as I guess I'm calling it now) of SymtableState
     with XmloReaderState is all boilerplate and requires no lookahead,
     presenting an abstraction opportunity that I was holding off on
     previously (attr parsing for XIRF requires lookahead).
  2. This is simply collecting attributes into a struct.  This can be
     abstracted away in the future.
  3. Creating stub parsers to verify that generics are stitched rather than
     being tightly coupled with another state is boilerplate that maybe can
     be abstracted away after a pattern is observed in future tests.

DEV-10863
main
Mike Gerwitz 2022-03-29 11:14:47 -04:00
parent f402e51d04
commit f42288f3a2
8 changed files with 329 additions and 33 deletions

View File

@ -213,7 +213,7 @@ where
self.add_dep_lookup(sym, dep_sym);
}
(IS::None, XmloEvent::SymDecl(sym, attrs)) => {
(IS::None, XmloEvent::SymDecl(sym, attrs, _span)) => {
if let Some(sym_src) = attrs.src {
found.insert(sym_src);
} else {
@ -361,6 +361,7 @@ mod test {
use super::*;
use crate::asg::{DefaultAsg, FragmentText, IdentObject};
use crate::obj::xmlo::{SymAttrs, SymType};
use crate::span::UNKNOWN_SPAN;
use crate::sym::GlobalSymbolIntern;
use std::collections::hash_map::RandomState;
@ -456,6 +457,7 @@ mod test {
src: Some(src_a),
..Default::default()
},
UNKNOWN_SPAN,
)),
Ok(XmloEvent::SymDecl(
sym,
@ -463,6 +465,7 @@ mod test {
src: Some(src_b),
..Default::default()
},
UNKNOWN_SPAN,
)),
];
@ -507,6 +510,7 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
)),
// These three will be roots
Ok(XmloEvent::SymDecl(
@ -516,6 +520,7 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
)),
Ok(XmloEvent::SymDecl(
sym_map,
@ -524,6 +529,7 @@ mod test {
ty: Some(SymType::Map),
..Default::default()
},
UNKNOWN_SPAN,
)),
Ok(XmloEvent::SymDecl(
sym_retmap,
@ -532,6 +538,7 @@ mod test {
ty: Some(SymType::RetMap),
..Default::default()
},
UNKNOWN_SPAN,
)),
];
@ -623,6 +630,7 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
))];
let _ = sut.import_xmlo(evs.into_iter(), state).unwrap();
@ -648,7 +656,7 @@ mod test {
let sym = "sym".intern();
let bad_attrs = SymAttrs::default();
let evs = vec![Ok(XmloEvent::SymDecl(sym, bad_attrs))];
let evs = vec![Ok(XmloEvent::SymDecl(sym, bad_attrs, UNKNOWN_SPAN))];
let result = sut
.import_xmlo(evs.into_iter(), SutState::new())
@ -671,6 +679,7 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
)),
// Incompatible
Ok(XmloEvent::SymDecl(
@ -680,6 +689,7 @@ mod test {
ty: Some(SymType::Map),
..Default::default()
},
UNKNOWN_SPAN,
)),
];
@ -703,6 +713,7 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
)),
// Redeclare
Ok(XmloEvent::SymDecl(
@ -711,6 +722,7 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
)),
];
@ -735,6 +747,7 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
)),
Ok(XmloEvent::Fragment(sym, frag.clone())),
];
@ -788,6 +801,7 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
)),
Ok(XmloEvent::Fragment(sym, frag.clone())),
];

View File

@ -20,6 +20,7 @@
//! Errors while processing `xmlo` object files.
use crate::parse::ParseError;
use crate::span::Span;
use crate::sym::SymbolId;
use crate::tpwrap::quick_xml::{Error as XmlError, InnerXmlError};
use crate::xir::{tree::StackError, Token};
@ -43,13 +44,13 @@ pub enum XmloError {
/// The root node was not an `lv:package`.
UnexpectedRoot,
/// A `preproc:sym` node was found, but is missing `@name`.
UnassociatedSym,
UnassociatedSym(Span),
/// The provided `preproc:sym/@type` is unknown or invalid.
InvalidType(String),
/// The provided `preproc:sym/@dtype` is unknown or invalid.
InvalidDtype(String),
/// The provided `preproc:sym/@dim` is invalid.
InvalidDim(String),
InvalidDim(SymbolId, Span),
/// A `preproc:sym-dep` element was found, but is missing `@name`.
UnassociatedSymDep,
/// The `preproc:sym[@type="map"]` contains unexpected or invalid data.
@ -85,9 +86,10 @@ impl Display for XmloError {
Self::UnexpectedRoot => {
write!(fmt, "unexpected package root (is this a package?)")
}
Self::UnassociatedSym => write!(
Self::UnassociatedSym(span) => write!(
fmt,
"unassociated symbol table entry: preproc:sym/@name missing"
"unassociated symbol table entry: \
preproc:sym/@name missing at {span}"
),
Self::InvalidType(ty) => {
write!(fmt, "invalid preproc:sym/@type `{}`", ty)
@ -95,8 +97,8 @@ impl Display for XmloError {
Self::InvalidDtype(dtype) => {
write!(fmt, "invalid preproc:sym/@dtype `{}`", dtype)
}
Self::InvalidDim(dim) => {
write!(fmt, "invalid preproc:sym/@dim `{}`", dim)
Self::InvalidDim(dim, span) => {
write!(fmt, "invalid preproc:sym/@dim `{dim}` at {span}")
}
Self::InvalidMapFrom(msg) => {
write!(fmt, "invalid preproc:sym[@type=\"map\"]: {}", msg)

View File

@ -19,7 +19,9 @@
use super::{SymAttrs, XmloError};
use crate::{
parse::{self, ParseState, Transition, TransitionResult},
obj::xmlo::Dim,
parse::{self, ParseState, ParseStatus, Transition, TransitionResult},
span::Span,
sym::{st::*, SymbolId},
xir::{attr::Attr, flat::Object as Xirf},
};
@ -60,7 +62,7 @@ pub enum XmloEvent {
/// This represents an entry in the symbol table,
/// which includes a symbol along with its variable metadata as
/// [`SymAttrs`].
SymDecl(SymbolId, SymAttrs),
SymDecl(SymbolId, SymAttrs, Span),
/// Begin adjacency list for a given symbol and interpret subsequent
/// symbols as edges (dependencies).
@ -101,24 +103,37 @@ qname_const! {
QN_NAME: :L_NAME,
QN_UUROOTPATH: :L_UUROOTPATH,
QN_PROGRAM: :L_PROGRAM,
QN_PREPROC_ELIG_CLASS_YIELDS: L_PREPROC:L_ELIG_CLASS_YIELDS,
QN_ELIG_CLASS_YIELDS: L_PREPROC:L_ELIG_CLASS_YIELDS,
QN_SYMTABLE: L_PREPROC:L_SYMTABLE,
QN_SYM: L_PREPROC:L_SYM,
QN_DIM: :L_DIM,
}
pub trait XmloSymtableState =
ParseState<Token = Xirf, Object = (SymbolId, SymAttrs, Span)>
where <Self as ParseState>::Error: Into<XmloError>;
#[derive(Debug, Default, PartialEq, Eq)]
pub enum XmloReaderState {
pub enum XmloReaderState<SS: XmloSymtableState = SymtableState> {
/// Parser has not yet processed any input.
#[default]
Ready,
/// Processing `package` attributes.
Package,
/// Expecting a symbol declaration or end of symbol table.
Symtable(Span, SS),
/// `xmlo` file has been fully read.
Done,
}
impl ParseState for XmloReaderState {
impl<SS: XmloSymtableState> ParseState for XmloReaderState<SS> {
type Token = Xirf;
type Object = XmloEvent;
type Error = XmloError;
fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> {
use XmloReaderState::{Done, Package, Ready};
use ParseStatus::{Dead, Incomplete, Object as Obj};
use XmloReaderState::*;
match (self, tok) {
(Ready, Xirf::Open(QN_LV_PACKAGE | QN_PACKAGE, ..)) => {
@ -132,7 +147,7 @@ impl ParseState for XmloReaderState {
QN_NAME => XmloEvent::PkgName(value),
QN_UUROOTPATH => XmloEvent::PkgRootPath(value),
QN_PROGRAM => XmloEvent::PkgProgramFlag,
QN_PREPROC_ELIG_CLASS_YIELDS => {
QN_ELIG_CLASS_YIELDS => {
XmloEvent::PkgEligClassYields(value)
}
// Ignore unknown attributes for now to maintain BC,
@ -145,6 +160,35 @@ impl ParseState for XmloReaderState {
// XIRF guarantees a matching closing tag.
(Package, Xirf::Close(..)) => Transition(Done).incomplete(),
(Package, Xirf::Open(QN_SYMTABLE, span, ..)) => {
Transition(Symtable(span, SS::default())).incomplete()
}
(Symtable(_, ss), Xirf::Close(Some(QN_SYMTABLE), ..))
if ss.is_accepting() =>
{
Transition(Done).incomplete()
}
// TODO: This is all boilerplate; abstract away state stitching.
// TOOD: It'd be nice to augment errors with the symbol table
// span as well (e.g. "while processing symbol table at <loc>").
(Symtable(span, ss), tok) => match ss.parse_token(tok).into() {
(Transition(ss), Ok(Incomplete)) => {
Transition(Symtable(span, ss)).incomplete()
}
(Transition(ss), Ok(Obj((name, attrs, span)))) => {
Transition(Symtable(span, ss))
.ok(XmloEvent::SymDecl(name, attrs, span))
}
(Transition(ss), Ok(Dead(tok))) => {
Transition(Symtable(span, ss)).dead(tok)
}
(Transition(ss), Err(e)) => {
Transition(Symtable(span, ss)).err(e)
}
},
todo => todo!("{todo:?}"),
}
}
@ -154,6 +198,82 @@ impl ParseState for XmloReaderState {
}
}
#[cfg(feature = "wip-xmlo-xir-reader")]
/// Symbol table parser operating within a delimited context.
///
/// This parser expects a parent [`ParserState`] to indicate when symtable
/// parsing ought to start and end—
/// this parser does not recognize any opening or closing tags.
#[derive(Debug, Default, PartialEq, Eq)]
pub enum SymtableState {
/// Symbol table declaration found;
/// symbols declarations expected.
#[default]
Ready,
/// Processing a symbol.
Sym(Span, Option<SymbolId>, SymAttrs),
}
impl parse::Object for (SymbolId, SymAttrs, Span) {}
impl ParseState for SymtableState {
type Token = Xirf;
type Object = (SymbolId, SymAttrs, Span);
type Error = XmloError;
fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> {
use SymtableState::*;
match (self, tok) {
(Ready, Xirf::Open(QN_SYM, span, _)) => {
Transition(Sym(span, None, SymAttrs::default())).incomplete()
}
(Sym(span, None, attrs), Xirf::Close(..)) => {
Transition(Sym(span, None, attrs))
.err(XmloError::UnassociatedSym(span))
}
// Completed symbol.
(Sym(span, Some(name), attrs), Xirf::Close(..)) => {
Transition(Ready).ok((name, attrs, span))
}
// Symbol @name found.
(Sym(span, None, attrs), Xirf::Attr(Attr(QN_NAME, name, _))) => {
Transition(Sym(span, Some(name), attrs)).incomplete()
}
(Sym(span, name, mut attrs), Xirf::Attr(Attr(key, value, _))) => {
match key {
QN_DIM => {
use crate::sym::st::raw::{N0, N1, N2};
let result = match value {
N0 => Ok(Dim::Scalar),
N1 => Ok(Dim::Vector),
N2 => Ok(Dim::Matrix),
_ => Err(XmloError::InvalidDim(value, span)),
}
.and_then(|dim| {
attrs.dim.replace(dim);
Ok(ParseStatus::Incomplete)
});
Transition(Sym(span, name, attrs)).result(result)
}
QN_NAME => unreachable!("@name already processed"),
todo => todo!("{todo}"),
}
}
todo => todo!("{todo:?}"),
}
}
fn is_accepting(&self) -> bool {
*self == Self::Ready
}
}
#[cfg(test)]
mod test;

View File

@ -50,6 +50,7 @@
use super::super::{SymAttrs, SymType};
use super::{XmloError, XmloEvent, XmloResult};
use crate::obj::xmlo::Dim;
use crate::span::UNKNOWN_SPAN;
use crate::sym::{GlobalSymbolInternUnchecked, GlobalSymbolResolve, SymbolId};
#[cfg(test)]
use crate::test::quick_xml::MockBytesStart as BytesStart;
@ -230,7 +231,7 @@ where
let mut event = Self::process_sym(&self.pkg_name, &ele)?;
match &mut event {
XmloEvent::SymDecl(_, attrs)
XmloEvent::SymDecl(_, attrs, _)
if attrs.ty == Some(SymType::Map) =>
{
attrs.from = Self::process_map_from(
@ -410,8 +411,10 @@ where
sym_attrs.pkg_name = *pkg_name;
name.map(|name_sym| XmloEvent::SymDecl(name_sym, sym_attrs))
.ok_or(XmloError::UnassociatedSym)
name.map(|name_sym| {
XmloEvent::SymDecl(name_sym, sym_attrs, UNKNOWN_SPAN)
})
.ok_or(XmloError::UnassociatedSym(UNKNOWN_SPAN))
}
/// Process `preproc:from` for `preproc:sym[@type="map"]` elements.
@ -601,9 +604,10 @@ where
[b'0'] => Ok(Dim::Scalar),
[b'1'] => Ok(Dim::Vector),
[b'2'] => Ok(Dim::Matrix),
_ => Err(XmloError::InvalidDim(unsafe {
String::from_utf8_unchecked(value.to_vec())
})),
_ => Err(XmloError::InvalidDim(
unsafe { value.intern_utf8_unchecked() },
UNKNOWN_SPAN,
)),
}
}
}

View File

@ -66,6 +66,7 @@ xmlo_tests! {
}
}
// DONE
fn sym_fails_without_name(sut) {
sut.reader.next_event = Some(Box::new(|_, _| {
Ok(XmlEvent::Start(MockBytesStart::new(
@ -75,7 +76,7 @@ xmlo_tests! {
}));
match sut.read_event() {
Err(XmloError::UnassociatedSym) => (),
Err(XmloError::UnassociatedSym(_)) => (),
bad => panic!("expected XmloError::UnassociatedSym: {:?}", bad),
}
}
@ -415,6 +416,7 @@ xmlo_tests! {
pkg_name: Some("pkg/name".intern()),
..Default::default()
},
UNKNOWN_SPAN,
),
result
);
@ -450,6 +452,7 @@ xmlo_tests! {
pkg_name: Some("pkg/name".intern()),
..Default::default()
},
UNKNOWN_SPAN,
),
result
);
@ -513,6 +516,7 @@ xmlo_tests! {
pkg_name: Some("pkg/name".intern()),
..Default::default()
},
UNKNOWN_SPAN,
),
result
);
@ -671,9 +675,10 @@ macro_rules! sym_tests {
assert_eq!(
XmloEvent::SymDecl(
stringify!($name).intern(),
expected_attrs
expected_attrs,
UNKNOWN_SPAN,
),
result
result,
);
Ok(())
}
@ -805,7 +810,11 @@ fn generated_true() -> XmloResult<()> {
};
assert_eq!(
XmloEvent::SymDecl("generated_true".intern(), expected_attrs),
XmloEvent::SymDecl(
"generated_true".intern(),
expected_attrs,
UNKNOWN_SPAN,
),
result
);
@ -820,7 +829,7 @@ fn fails_on_non_ascii_dim() {
sym_test_reader_event!(sut, fail_sym, dim = "X1");
match sut.read_event() {
Err(XmloError::InvalidDim(msg)) => assert!(msg.contains("X1")),
Err(XmloError::InvalidDim(dim, _)) => assert_eq!(dim, "X1".intern()),
bad => panic!("expected failure: {:?}", bad),
}
}
@ -833,7 +842,7 @@ fn fails_on_multi_char_dim() {
sym_test_reader_event!(sut, fail_sym, dim = "11");
match sut.read_event() {
Err(XmloError::InvalidDim(msg)) => assert!(msg.contains("11")),
Err(XmloError::InvalidDim(dim, _)) => assert_eq!(dim, "11".intern()),
bad => panic!("expected failure: {:?}", bad),
}
}

View File

@ -24,6 +24,7 @@ use crate::{
convert::ExpectInto,
parse::{ParseError, ParseState, Parsed},
span::{Span, DUMMY_SPAN},
sym::GlobalSymbolIntern,
xir::{
attr::Attr,
flat::{Depth, Object as Xirf},
@ -36,7 +37,7 @@ const S2: Span = S1.offset_add(1).unwrap();
const S3: Span = S2.offset_add(1).unwrap();
const S4: Span = S3.offset_add(1).unwrap();
type Sut = XmloReader;
type Sut = XmloReaderState;
#[test]
fn fails_on_invalid_root() {
@ -109,15 +110,14 @@ fn parses_package_attrs_with_ns_prefix() {
// but this ought to reject in the future.
#[test]
fn ignores_unknown_package_attr() {
let package = "package".unwrap_into();
let name = "pkgroot".into();
let toks = [
Xirf::Open(package, S1, Depth(0)),
Xirf::Open(QN_PACKAGE, S1, Depth(0)),
Xirf::Attr(Attr::new("name".unwrap_into(), name, (S2, S3))),
// This is ignored.
Xirf::Attr(Attr::new("unknown".unwrap_into(), name, (S2, S3))),
Xirf::Close(Some(package), S2, Depth(0)),
Xirf::Close(Some(QN_PACKAGE), S2, Depth(0)),
]
.into_iter();
@ -133,3 +133,144 @@ fn ignores_unknown_package_attr() {
sut.collect(),
);
}
#[test]
fn xmlo_symtable_parser() {
const SSTUB: Span = DUMMY_SPAN.offset_add(50).unwrap();
#[derive(Debug, Default, PartialEq, Eq)]
enum StubSymtableState {
#[default]
None,
}
impl ParseState for StubSymtableState {
type Token = Xirf;
type Object = (SymbolId, SymAttrs, Span);
type Error = XmloError;
fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> {
match tok {
Xirf::Attr(Attr(QN_NAME, name, (s1, s2))) => {
assert_eq!(s1, S1);
assert_eq!(s2, S2);
Transition(Self::None).ok((
name,
SymAttrs::default(),
SSTUB,
))
}
tok => panic!("test expects @name but got {tok:?}"),
}
}
fn is_accepting(&self) -> bool {
*self == Self::None
}
}
let symname = "symname".into();
let attrs = SymAttrs::default();
let toks = [
Xirf::Open(QN_PACKAGE, S1, Depth(0)),
Xirf::Open(QN_SYMTABLE, S2, Depth(1)),
// Our stub parser doesn't need an opening or closing tag.
// Note that S1 and S2 are expected.
Xirf::Attr(Attr(QN_NAME, symname, (S1, S2))), // @name
Xirf::Close(Some(QN_SYMTABLE), S4, Depth(1)),
]
.into_iter();
let sut = XmloReaderState::<StubSymtableState>::parse(toks);
assert_eq!(
Ok(vec![
Parsed::Incomplete, // <package
Parsed::Incomplete, // <preproc:symtable
// SSTUB is used to prove that StubSymtableState was used,
// instead of the SS default (no, not a ship).
Parsed::Object(XmloEvent::SymDecl(symname, attrs, SSTUB)),
Parsed::Incomplete, // </preproc:symtable>
]),
sut.collect(),
);
}
#[test]
fn symtable_err_missing_sym_name() {
let toks = [
Xirf::Open(QN_SYM, S1, Depth(0)),
// No attributes, but importantly, no name.
Xirf::Close(Some(QN_SYMTABLE), S2, Depth(0)),
]
.into_iter();
let mut sut = SymtableState::parse(toks);
assert_eq!(sut.next(), Some(Ok(Parsed::Incomplete)),);
assert_eq!(
sut.next(),
Some(Err(ParseError::StateError(XmloError::UnassociatedSym(S1)))),
);
}
macro_rules! symtable_tests {
($($name:ident: [$($key:ident=$val:literal),*] => $expect:expr)*) => {
$(
#[test]
fn $name() {
let name = stringify!($name).intern();
let toks = [
Xirf::Open(QN_SYM, S1, Depth(0)),
Xirf::Attr(Attr::new(QN_NAME, name, (S2, S3))),
$(
Xirf::Attr(Attr::new(
stringify!($key).unwrap_into(),
$val.unwrap_into(),
(S2, S3)
)),
)*
Xirf::Close(Some(QN_SYM), S2, Depth(0)),
]
.into_iter();
assert_eq!(
Ok(vec![
Parsed::Incomplete, // Opening tag
Parsed::Incomplete, // @name
$(
// For each attribute ($key here is necessary
// for macro iteration).
#[allow(unused)]
#[doc=stringify!($key)]
Parsed::Incomplete,
)*
Parsed::Object((name, $expect, S1)),
]),
SymtableState::parse(toks).collect(),
);
}
)*
}
}
symtable_tests! {
dim_0: [dim="0"] => SymAttrs {
dim: Some(Dim::Scalar),
..Default::default()
}
dim_1: [dim="1"] => SymAttrs {
dim: Some(Dim::Vector),
..Default::default()
}
dim_2: [dim="2"] => SymAttrs {
dim: Some(Dim::Matrix),
..Default::default()
}
}

View File

@ -435,6 +435,10 @@ impl Display for Span {
}
}
/// A placeholder span indicating that a span is expected but is not yet
/// known.
pub const UNKNOWN_SPAN: Span = Span::st_ctx(crate::sym::st16::CTX_UNKNOWN);
/// A dummy span that can be used in contexts where a span is expected but
/// is not important.
///

View File

@ -477,6 +477,7 @@ pub mod st {
L_SRC: cid "src",
L_STATIC: cid "static",
L_SYM: cid "sym",
L_SYMTABLE: cid "symtable",
L_TITLE: cid "title",
L_TPL: cid "tpl",
L_TRUE: cid "true",
@ -519,7 +520,8 @@ pub mod st16 {
<u16>;
// Special contexts.
CTX_DUMMY: ctx "#!DUMMY",
CTX_DUMMY: ctx "#!UNKNOWN",
CTX_UNKNOWN: ctx "#!DUMMY",
CTX_LINKER: ctx "#!LINKER",
// [Symbols will be added here as they are needed.]