tamer: obj::xmlo::reader: preproc:fragment parsing

This concludes the bulk of the header parsing, though there are surely going
to be other issues when I try to read a real xmlo file, such as
whitespace.  That is something I expect that I'd rather handle as part of
XIRF, but maybe I'll initially ignore it here just to get it working.  We'll
see.

DEV-10863
main
Mike Gerwitz 2022-03-30 21:41:59 -04:00
parent f3545cf347
commit 9eaebd576b
7 changed files with 224 additions and 27 deletions

View File

@ -248,7 +248,10 @@ where
}
// Fragments follow SymDeps.
(IS::None | IS::SymDep(_), XmloEvent::Fragment(sym, text)) => {
(
IS::None | IS::SymDep(_),
XmloEvent::Fragment(sym, text, _),
) => {
istate = IS::None;
let frag = self
@ -747,9 +750,9 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
DUMMY_SPAN,
)),
Ok(XmloEvent::Fragment(sym, frag.clone())),
Ok(XmloEvent::Fragment(sym, frag.clone(), DUMMY_SPAN)),
];
let _ = sut.import_xmlo(evs.into_iter(), SutState::new()).unwrap();
@ -776,7 +779,7 @@ mod test {
let sym = "sym".intern();
// Note: missing `SymDecl`.
let evs = vec![Ok(XmloEvent::Fragment(sym, "foo".into()))];
let evs = vec![Ok(XmloEvent::Fragment(sym, "foo".into(), DUMMY_SPAN))];
let result = sut
.import_xmlo(evs.into_iter(), SutState::new())
@ -801,9 +804,9 @@ mod test {
ty: Some(SymType::Meta),
..Default::default()
},
UNKNOWN_SPAN,
DUMMY_SPAN,
)),
Ok(XmloEvent::Fragment(sym, frag.clone())),
Ok(XmloEvent::Fragment(sym, frag.clone(), DUMMY_SPAN)),
];
let result = sut

View File

@ -61,9 +61,9 @@ pub enum XmloError {
/// (`preproc:sym-dep/preproc:sym-ref`).
MalformedSymRef(SymbolId, Span),
/// A `preproc:fragment` element was found, but is missing `@id`.
UnassociatedFragment,
UnassociatedFragment(Span),
/// A `preproc:fragment` element was found, but is missing `text()`.
MissingFragmentText(SymbolId),
MissingFragmentText(SymbolId, Span),
/// Token stream ended unexpectedly.
UnexpectedEof,
}
@ -128,14 +128,13 @@ impl Display for XmloError {
{name} at {span}"
)
}
Self::UnassociatedFragment => write!(
Self::UnassociatedFragment(span) => write!(
fmt,
"unassociated fragment: preproc:fragment/@id missing"
"unassociated fragment: preproc:fragment/@id missing at {span}"
),
Self::MissingFragmentText(symname) => write!(
Self::MissingFragmentText(sym, span) => write!(
fmt,
"fragment found, but missing text for symbol `{}`",
symname,
"fragment found, but missing text for symbol `{sym}` at {span}",
),
Self::UnexpectedEof => write!(fmt, "unexpected EOF"),
}

View File

@ -79,7 +79,7 @@ pub enum XmloEvent {
/// Given that fragments can be quite large,
/// a caller not interested in these data should choose to skip
/// fragments entirely rather than simply ignoring fragment events.
Fragment(SymbolId, SymbolId),
Fragment(SymbolId, SymbolId, Span),
/// End-of-header.
///
@ -103,8 +103,11 @@ qname_const! {
QN_DTYPE: :L_DTYPE,
QN_ELIG_CLASS_YIELDS: L_PREPROC:L_ELIG_CLASS_YIELDS,
QN_EXTERN: :L_EXTERN,
QN_FRAGMENT: L_PREPROC:L_FRAGMENT,
QN_FRAGMENTS: L_PREPROC:L_FRAGMENTS,
QN_FROM: L_PREPROC:L_FROM,
QN_GENERATED: L_PREPROC:L_GENERATED,
QN_ID: :L_ID,
QN_ISOVERRIDE: :L_ISOVERRIDE,
QN_LV_PACKAGE: L_LV:L_PACKAGE,
QN_NAME: :L_NAME,
@ -114,8 +117,8 @@ qname_const! {
QN_SRC: :L_SRC,
QN_SYM: L_PREPROC:L_SYM,
QN_SYMTABLE: L_PREPROC:L_SYMTABLE,
QN_SYM_DEPS: L_PREPROC:L_SYM_DEPS,
QN_SYM_DEP: L_PREPROC:L_SYM_DEP,
QN_SYM_DEPS: L_PREPROC:L_SYM_DEPS,
QN_SYM_REF: L_PREPROC:L_SYM_REF,
QN_TYPE: :L_TYPE,
QN_UUROOTPATH: :L_UUROOTPATH,
@ -133,6 +136,7 @@ where
pub enum XmloReaderState<
SS: XmloState = SymtableState,
SD: XmloState = SymDepsState,
SF: XmloState = FragmentsState,
> {
/// Parser has not yet processed any input.
#[default]
@ -145,13 +149,19 @@ pub enum XmloReaderState<
SymDepsExpected,
/// Expecting symbol dependency list or closing `preproc:sym-deps`.
SymDeps(Span, SD),
/// Compiled text fragments are expected next.
FragmentsExpected,
/// Expecting text fragment or closing `preproc:fragments`.
Fragments(Span, SF),
/// End of header parsing.
Eoh,
/// `xmlo` file has been fully read.
Done,
}
impl<SS: XmloState, SD: XmloState> ParseState for XmloReaderState<SS, SD> {
impl<SS: XmloState, SD: XmloState, SF: XmloState> ParseState
for XmloReaderState<SS, SD, SF>
{
type Token = Xirf;
type Object = XmloEvent;
type Error = XmloError;
@ -205,11 +215,23 @@ impl<SS: XmloState, SD: XmloState> ParseState for XmloReaderState<SS, SD> {
(SymDeps(_, sd), Xirf::Close(Some(QN_SYM_DEPS), ..))
if sd.is_accepting() =>
{
Transition(Eoh).incomplete()
Transition(FragmentsExpected).incomplete()
}
(SymDeps(span, sd), tok) => sd.delegate(span, tok, SymDeps),
(FragmentsExpected, Xirf::Open(QN_FRAGMENTS, span, _)) => {
Transition(Fragments(span, SF::default())).incomplete()
}
(Fragments(_, sf), Xirf::Close(Some(QN_FRAGMENTS), ..))
if sf.is_accepting() =>
{
Transition(Eoh).incomplete()
}
(Fragments(span, sf), tok) => sf.delegate(span, tok, Fragments),
(Eoh, Xirf::Close(Some(QN_PACKAGE), ..)) => {
Transition(Done).incomplete()
}
@ -527,5 +549,63 @@ impl ParseState for SymDepsState {
}
}
/// Text fragment (compiled code) parser for `preproc:fragments` children.
///
/// This parser expects a parent [`ParseState`] to indicate when dependency
/// parsing ought to start and end—
/// this parser does not recognize any opening or closing
/// `preproc:fragments` tags.
#[derive(Debug, Default, PartialEq, Eq)]
pub enum FragmentsState {
#[default]
Ready,
FragmentUnnamed(Span),
Fragment(Span, SymbolId),
FragmentDone(Span, SymbolId),
}
impl ParseState for FragmentsState {
type Token = Xirf;
type Object = XmloEvent;
type Error = XmloError;
fn parse_token(self, tok: Self::Token) -> TransitionResult<Self> {
use FragmentsState::*;
match (self, tok) {
(Ready, Xirf::Open(QN_FRAGMENT, span, _)) => {
Transition(FragmentUnnamed(span)).incomplete()
}
(FragmentUnnamed(span), Xirf::Attr(Attr(QN_ID, id, _)))
if id != raw::WS_EMPTY =>
{
Transition(Fragment(span, id)).incomplete()
}
(FragmentUnnamed(span), _) => Transition(FragmentUnnamed(span))
.err(XmloError::UnassociatedFragment(span)),
(Fragment(span, id), Xirf::Text(text, _)) => {
Transition(FragmentDone(span, id))
.ok(XmloEvent::Fragment(id, text, span))
}
(Fragment(span, id), _) => Transition(Fragment(span, id))
.err(XmloError::MissingFragmentText(id, span)),
(FragmentDone(..), Xirf::Close(..)) => {
Transition(Ready).incomplete()
}
todo => todo!("{todo:?}"),
}
}
fn is_accepting(&self) -> bool {
*self == Self::Ready
}
}
#[cfg(test)]
mod test;

View File

@ -588,9 +588,10 @@ where
let id = filtered
.find(|attr| attr.key == b"id")
.filter(|attr| &*attr.value != b"")
.map_or(Err(XmloError::UnassociatedFragment), |attr| {
Ok(unsafe { attr.value.intern_utf8_unchecked() })
})?;
.map_or(
Err(XmloError::UnassociatedFragment(UNKNOWN_SPAN)),
|attr| Ok(unsafe { attr.value.intern_utf8_unchecked() }),
)?;
let text = match reader.read_event(buffer)? {
XmlEvent::Text(ev) => {
@ -600,10 +601,10 @@ where
// compiler.
Ok(unsafe { ev.escaped().clone_uninterned_utf8_unchecked() })
}
_ => Err(XmloError::MissingFragmentText(id)),
_ => Err(XmloError::MissingFragmentText(id, UNKNOWN_SPAN)),
}?;
Ok(XmloEvent::Fragment(id, text))
Ok(XmloEvent::Fragment(id, text, UNKNOWN_SPAN))
}
/// Convert single-character `@dim` to a [`Dim`].

View File

@ -263,6 +263,7 @@ xmlo_tests! {
}
}
// DONE (part of composite)
fn eoh_after_fragments(sut) {
sut.reader.next_event = Some(Box::new(|_, _| {
Ok(XmlEvent::End(MockBytesEnd::new(b"preproc:fragments")))
@ -273,6 +274,7 @@ xmlo_tests! {
assert_eq!(XmloEvent::Eoh, result);
}
// DONE
fn fragment_event(sut) {
let expected = "fragment text";
@ -295,7 +297,7 @@ xmlo_tests! {
assert!(matches!(
result,
XmloEvent::Fragment(sym, given)
XmloEvent::Fragment(sym, given, _)
if sym == "fragsym".intern() && given.lookup_str() == expected
));
}
@ -309,11 +311,12 @@ xmlo_tests! {
}));
match sut.read_event() {
Err(XmloError::UnassociatedFragment) => (),
Err(XmloError::UnassociatedFragment(_)) => (),
bad => panic!("expected XmloError: {:?}", bad),
}
}
// DONE
// Yes, this happened.
fn fragment_fails_with_empty_id(sut) {
sut.reader.next_event = Some(Box::new(|_, _| {
@ -326,11 +329,12 @@ xmlo_tests! {
}));
match sut.read_event() {
Err(XmloError::UnassociatedFragment) => (),
Err(XmloError::UnassociatedFragment(_)) => (),
bad => panic!("expected XmloError: {:?}", bad),
}
}
// DONE
fn fragment_fails_with_missing_text(sut) {
sut.reader.next_text = Some(Err(InnerXmlError::TextNotFound));
@ -344,7 +348,7 @@ xmlo_tests! {
}));
match sut.read_event() {
Err(XmloError::MissingFragmentText(symname)) => {
Err(XmloError::MissingFragmentText(symname, _)) => {
assert_eq!("fragsym".intern(), symname)
}
bad => panic!("expected XmloError: {:?}", bad),

View File

@ -519,7 +519,98 @@ fn sym_ref_missing_name() {
assert_eq!(
Err(ParseError::StateError(XmloError::MalformedSymRef(name, S2))),
SymDepsState::parse(toks)
.collect::<Result<Vec<Parsed<<SymDepsState as ParseState>::Object>>, _>>(),
.collect::<Result<Vec<Parsed<XmloEvent>>, _>>(),
);
}
#[test]
fn sym_fragment_event() {
let id1 = "fragsym1".into();
let id2 = "fragsym2".into();
let frag1 = "fragment text 1".into();
let frag2 = "fragment text 2".into();
let toks = [
// first
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id1, (S2, S3))),
Xirf::Text(frag1, S4),
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
// second
Xirf::Open(QN_FRAGMENT, S2, Depth(0)),
Xirf::Attr(Attr(QN_ID, id2, (S3, S4))),
Xirf::Text(frag2, S5),
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
]
.into_iter();
assert_eq!(
Ok(vec![
Parsed::Incomplete, // <preproc:fragment
Parsed::Incomplete, // @id
Parsed::Object(XmloEvent::Fragment(id1, frag1, S1)), // text
Parsed::Incomplete, // </preproc:fragment>
Parsed::Incomplete, // <preproc:fragment
Parsed::Incomplete, // @id
Parsed::Object(XmloEvent::Fragment(id2, frag2, S2)), // text
Parsed::Incomplete, // </preproc:fragment>
]),
FragmentsState::parse(toks).collect()
);
}
#[test]
fn sym_fragment_missing_id() {
let toks = [
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
// missing @id
Xirf::Text("text".into(), S4),
]
.into_iter();
assert_eq!(
Err(ParseError::StateError(XmloError::UnassociatedFragment(S1))),
FragmentsState::parse(toks)
.collect::<Result<Vec<Parsed<XmloEvent>>, _>>(),
);
}
// Yes, this happened.
#[test]
fn sym_fragment_empty_id() {
let toks = [
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
// empty @id
Xirf::Attr(Attr(QN_ID, "".into(), (S3, S4))),
Xirf::Text("text".into(), S4),
]
.into_iter();
assert_eq!(
Err(ParseError::StateError(XmloError::UnassociatedFragment(S1))),
FragmentsState::parse(toks)
.collect::<Result<Vec<Parsed<XmloEvent>>, _>>(),
);
}
#[test]
fn sym_fragment_missing_text() {
let id = "fragsym".into();
let toks = [
Xirf::Open(QN_FRAGMENT, S1, Depth(0)),
Xirf::Attr(Attr(QN_ID, id, (S3, S4))),
// missing text
Xirf::Close(Some(QN_FRAGMENT), S5, Depth(0)),
]
.into_iter();
assert_eq!(
Err(ParseError::StateError(XmloError::MissingFragmentText(
id, S1
))),
FragmentsState::parse(toks)
.collect::<Result<Vec<Parsed<XmloEvent>>, _>>(),
);
}
@ -531,6 +622,8 @@ fn sym_ref_missing_name() {
fn xmlo_composite_parsers_header() {
let sym_name = "sym".into();
let symdep_name = "symdep".into();
let symfrag_id = "symfrag".into();
let frag = "fragment text".into();
let toks_header = [
Xirf::Open(QN_PACKAGE, S1, Depth(0)),
@ -552,6 +645,16 @@ fn xmlo_composite_parsers_header() {
// </preproc:sym-dep>
Xirf::Close(Some(QN_SYM_DEPS), S3, Depth(1)),
// </preproc:sym-deps>
// <preproc:fragments>
Xirf::Open(QN_FRAGMENTS, S2, Depth(1)),
// <preproc:fragment
Xirf::Open(QN_FRAGMENT, S4, Depth(2)),
Xirf::Attr(Attr(QN_ID, symfrag_id, (S2, S3))),
Xirf::Text(frag, S5),
Xirf::Close(Some(QN_FRAGMENT), S4, Depth(2)),
// </preproc:fragment>
Xirf::Close(Some(QN_FRAGMENTS), S3, Depth(1)),
// </preproc:fragments>
// No closing root node:
// ensure that we can just end at the header without parsing further.
]
@ -567,6 +670,7 @@ fn xmlo_composite_parsers_header() {
S3
)),
Parsed::Object(XmloEvent::SymDepStart(symdep_name, S3)),
Parsed::Object(XmloEvent::Fragment(symfrag_id, frag, S4)),
]),
sut.filter(|parsed| match parsed {
Ok(Parsed::Incomplete) => false,

View File

@ -450,10 +450,13 @@ pub mod st {
L_EXTERN: cid "extern",
L_FALSE: cid "false",
L_FLOAT: cid "float",
L_FRAGMENT: cid "fragment",
L_FRAGMENTS: cid "fragments",
L_FROM: cid "from",
L_FUNC: cid "func",
L_GEN: cid "gen",
L_GENERATED: cid "generated",
L_ID: cid "id",
L_INTEGER: cid "integer",
L_ISOVERRIDE: cid "isoverride",
L_L: cid "l",
@ -503,6 +506,9 @@ pub mod st {
URI_LV_PREPROC: uri "http://www.lovullo.com/rater/preproc",
URI_LV_LINKER: uri "http://www.lovullo.com/rater/linker",
// TODO: Whitespace type
WS_EMPTY: str "",
// [Symbols will be added here as they are needed.]
// Marker indicating the end of the static symbols