tamer: xir::parse::attrstream: Streaming attribute parser

As I talked about in the previous commit, this is going to be the
replacement for the aggreagte `attr_parse!`; the next commit will integrate
it into `ele_parse!` so that I can begin to remove the old one.

It is disappointing, since I did put a bit of work into this and I think the
end result was pretty neat, even if was never fully utilized.  But, this
simplifies things significantly; no use in maintaining features that serve
no purpose but to confound people.

DEV-13346
main
Mike Gerwitz 2022-11-29 11:42:19 -05:00
parent 6d39474127
commit 9ad7742ad2
4 changed files with 521 additions and 5 deletions

View File

@ -251,11 +251,9 @@ impl<const TY: NirSymbolTy> Token for NirSymbol<TY> {
impl<const TY: NirSymbolTy> Display for NirSymbol<TY> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self(sym, _span) => write!(
f,
"{TY} {fmt_sym}",
fmt_sym = TtQuote::wrap(sym),
),
Self(sym, _span) => {
write!(f, "{TY} {fmt_sym}", fmt_sym = TtQuote::wrap(sym),)
}
}
}
}

View File

@ -23,6 +23,7 @@
//! processing of the input XML to ensure well-formedness.
mod attr;
mod attrstream;
mod ele;
mod error;

View File

@ -0,0 +1,234 @@
// XIR attribute parser generator
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! Attribute parser generator for parsing of [XIRF](super::super::flat).
//!
//! The parser generator is invoked via the macro
//! [`attr_parse!`](crate::attr_parse),
//! which expects a `match`-like definition describing the mapping between
//! attribute [QNames](crate::xir::QName) and the final value.
//! It produces a streaming attribute parser.
//!
//! All fields recognized by this parser are implicitly optional,
//! as this is intended only to extract a grammar from an XML document.
//! This destructuring describes the _permissable attributes_ of an element,
//! but nothing more.
//! Whether or not an attribute is required should be determined by whether
//! the produced IR is missing necessary information,
//! which is a later lowering operation.
//! Further,
//! duplicate attributes should be inhibited earlier in the process by XIR,
//! if desired.
//!
//! The parser automatically produces detailed error and diagnostic
//! messages for unexpected attributes,
//! or attributes that cannot be parsed into the final type.
#[macro_export]
macro_rules! attr_parse_stream {
($(#[$sattr:meta])*
type Object = $objty:ty;
type ValueError = $evty:ty;
$(vis($vis:vis);)?
$(#[$st_attr:meta])? $state_name:ident {
$(
$(#[$fattr:meta])*
$qname:ident => $ty:ty,
)*
}
) => { paste::paste! {
$(
// This provides a nice error on $ty itself at the call site,
// rather than relying on `Into::into` to cause the error
// later on,
// which places the error inside the macro definition.
assert_impl_all!($ty: TryFrom<crate::xir::attr::Attr>);
)*
$(#[$st_attr])?
///
#[doc=concat!("Parser producing [`", stringify!($struct_name), "`].")]
// TODO: This can be extracted out of the macro.
#[derive(Debug, PartialEq, Eq)]
$($vis)? enum $state_name {
Parsing(crate::xir::QName, crate::xir::OpenSpan),
Done(crate::xir::QName, crate::xir::OpenSpan),
}
/// Intermediate state of parser as fields are aggregated.
///
/// TODO: Remove once integrated with `ele_parse!`.
#[allow(non_camel_case_types)]
#[derive(Debug, PartialEq, Eq, Default)]
$($vis)? struct [<$state_name Fields>];
impl crate::xir::parse::AttrParseState for $state_name {
type ValueError = $evty;
type Fields = [<$state_name Fields>];
fn with_element(
ele: crate::xir::QName,
span: crate::xir::OpenSpan
) -> Self {
Self::Parsing(ele, span)
}
fn element_name(&self) -> crate::xir::QName {
match self {
Self::Parsing(qname, _) | Self::Done(qname, _) => *qname,
}
}
fn element_span(&self) -> crate::xir::OpenSpan {
match self {
Self::Parsing(_, span) | Self::Done(_, span) => *span,
}
}
fn finalize_attr(
self,
_ctx: &mut <Self as crate::parse::ParseState>::Context,
) -> Result<
Self::Object,
crate::xir::parse::AttrParseError<Self>,
> {
unimplemented!()
}
fn required_missing(
&self,
#[allow(unused_variables)] // unused if no fields
_ctx: &Self::Fields
) -> Vec<crate::xir::QName> {
unimplemented!()
}
}
impl $state_name {
fn done_with_element(
ele: crate::xir::QName,
span: crate::xir::OpenSpan,
) -> Self {
Self::Done(ele, span)
}
}
impl std::fmt::Display for $state_name {
/// Additional error context shown in diagnostic messages for
/// certain variants of [`ParseError`].
///
/// [`ParseError`]: crate::parse::ParseError
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use crate::fmt::{DisplayWrapper, TtQuote};
use crate::xir::parse::AttrParseState;
write!(
f,
"expecting attributes for element {}",
TtQuote::wrap(self.element_name())
)
}
}
impl crate::parse::ParseState for $state_name {
type Token = crate::xir::flat::XirfToken<
crate::xir::flat::RefinedText
>;
type Object = $objty;
type Error = crate::xir::parse::AttrParseError<Self>;
fn parse_token(
#[allow(unused_mut)]
mut self,
tok: Self::Token,
ctx: &mut Self::Context,
) -> crate::parse::TransitionResult<Self> {
use crate::parse::{Transition, Transitionable, ParseStatus};
use crate::xir::{
flat,
parse::{AttrParseError, AttrParseState}
};
#[allow(unused_imports)]
use crate::xir::attr::{Attr, AttrSpan}; // unused if no attrs
let ele_name = self.element_name();
match (self, tok) {
$(
// Use guard so we don't bind as a variable if we
// forget to import a const for `$qname`.
// We don't use `$qname:pat` because we reuse
// `$qname` for error messages.
(st @ Self::Parsing(_, _), flat::XirfToken::Attr(
attr @ Attr(qn, _, AttrSpan(_kspan, _))
)) if qn == $qname => {
match attr.try_into() {
Ok(value) => {
Transition(st).ok::<$objty>(value)
},
Err(e) => Transition(st).err(
// Unreachable `Into::into` if
// Infallible.
#[allow(unreachable_code)]
AttrParseError::InvalidValue(
Into::<$evty>::into(e),
ele_name,
)
),
}
}
)*
(st @ Self::Parsing(_, _), flat::XirfToken::Attr(attr)) => {
Transition(st).err(AttrParseError::UnexpectedAttr(
attr,
ele_name,
))
},
// Aggregation complete (dead state).
(Self::Parsing(ele, span), tok_dead) => {
Self::Parsing(ele, span).finalize_attr(ctx)
.map(ParseStatus::Object)
.transition(Self::done_with_element(ele, span))
.with_lookahead(tok_dead)
}
// Any tokens received after aggregation is completed
// must not be processed,
// otherwise we'll recurse indefinitely.
(st @ Self::Done(_, _), tok_dead) => {
Transition(st).dead(tok_dead)
}
}
}
fn is_accepting(&self, _: &Self::Context) -> bool {
// All attributes are optional for this parser,
// and each token is a complete attribute.
true
}
}
} };
}
#[cfg(test)]
mod test;

View File

@ -0,0 +1,283 @@
// XIR attribute parser generator tests
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
use super::super::{AttrParseError, AttrParseState};
use crate::{
diagnose::{AnnotatedSpan, Diagnostic},
parse::{self, ParseError, Parsed, Parser, TokenStream},
span::{dummy::*, Span},
sym::SymbolId,
xir::{
attr::{Attr, AttrSpan},
flat::XirfToken,
st::qname::*,
OpenSpan, QName,
},
};
use std::{
convert::Infallible,
error::Error,
fmt::{Debug, Display},
iter,
};
use Parsed::Object;
const SE: OpenSpan = OpenSpan(S1.offset_add(100).unwrap(), 0);
// Random choice of QName for tests.
const QN_ELE: QName = QN_YIELDS;
fn sut_parse<S: AttrParseState, I: TokenStream<S::Token>>(
toks: I,
) -> Parser<S, I>
where
S: AttrParseState,
S::Context: Default,
{
Parser::with_state(S::with_element(QN_ELE, SE), toks)
}
// Remember: we only describe what is _permissable_,
// not what is required or what order it must appear in.
// That is the responsibility of parsers lower in the pipeline.
#[test]
fn attrs_any_order_and_optional() {
attr_parse_stream! {
type Object = Attr;
type ValueError = Infallible;
ValuesState {
QN_NAME => Attr,
QN_YIELDS => Attr,
// No value will be provided for this one,
// which is okay since all are implicitly optional.
QN_INDEX => Attr,
}
}
let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
let attr_yields = Attr(QN_YIELDS, "val_value".into(), AttrSpan(S2, S3));
// @yields then @name just to emphasize that order does not matter.
let toks = vec![
XirfToken::Attr(attr_yields.clone()),
XirfToken::Attr(attr_name.clone()),
];
assert_eq!(
// Simply parses back out the attributes;
// see following tests further value parsing.
// Note that we omit one of the attributes declared above.
Ok(vec![Object(attr_yields), Object(attr_name),]),
sut_parse::<ValuesState, _>(toks.into_iter()).collect(),
);
}
// Since all are optional,
// the attribute list can be empty.
#[test]
fn attrs_empty() {
attr_parse_stream! {
type Object = Attr;
type ValueError = Infallible;
ValuesState {
// We will not provide a value for this.
QN_NAME => Attr,
}
}
assert_eq!(
// Simply parses back out the attributes;
// see following tests further value parsing.
Ok(vec![]),
sut_parse::<ValuesState, _>(iter::empty()).collect(),
);
}
#[test]
fn attr_value_into() {
// Yes, this is like SPair,
// but the point of this test is to be useful in isolation,
// so please do not couple this with SPair.
#[derive(Debug, PartialEq, Eq)]
struct Foo(SymbolId, Span);
impl From<Attr> for Foo {
fn from(attr: Attr) -> Self {
Foo(attr.value(), attr.attr_span().value_span())
}
}
impl parse::Object for Foo {}
attr_parse_stream! {
type Object = Foo;
type ValueError = Infallible;
ValueIntoState {
QN_NAME => Foo,
QN_YIELDS => Foo,
}
}
let val_name = "val_name".into();
let val_yields = "val_yields".into();
let attr_name = Attr(QN_NAME, val_name, AttrSpan(S1, S2));
let attr_yields = Attr(QN_YIELDS, val_yields, AttrSpan(S2, S3));
let toks = vec![
XirfToken::Attr(attr_name.clone()),
XirfToken::Attr(attr_yields.clone()),
];
assert_eq!(
Ok(vec![Object(Foo(val_name, S2)), Object(Foo(val_yields, S3))]),
sut_parse::<ValueIntoState, _>(toks.into_iter()).collect(),
);
}
// This test would fail at compile time.
#[test]
fn attr_value_error() {
#[derive(Debug, PartialEq, Eq)]
struct Foo;
impl TryFrom<Attr> for Foo {
type Error = FooError;
fn try_from(attr: Attr) -> Result<Self, Self::Error> {
Err(FooError(attr.value()))
}
}
impl parse::Object for Foo {}
#[derive(Debug, PartialEq)]
struct FooError(SymbolId);
impl Error for FooError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
None
}
}
impl Display for FooError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "test FooError")
}
}
impl Diagnostic for FooError {
fn describe(&self) -> Vec<AnnotatedSpan> {
vec![]
}
}
attr_parse_stream! {
type Object = Foo;
type ValueError = FooError;
ValueTryIntoState {
QN_NAME => Foo,
QN_YIELDS => Foo,
}
}
let val_name = "val_name".into();
let val_yields = "val_yields".into();
let attr_name = Attr(QN_NAME, val_name, AttrSpan(S1, S2));
let attr_yields = Attr(QN_YIELDS, val_yields, AttrSpan(S2, S3));
let toks = vec![
XirfToken::Attr(attr_name.clone()),
XirfToken::Attr(attr_yields.clone()),
];
let mut sut = sut_parse::<ValueTryIntoState, _>(toks.into_iter());
assert_eq!(
Some(Err(ParseError::StateError(AttrParseError::InvalidValue(
FooError(val_name),
QN_ELE
)))),
sut.next(),
);
// TryInto on `Option` inner type.
assert_eq!(
Some(Err(ParseError::StateError(AttrParseError::InvalidValue(
FooError(val_yields),
QN_ELE
)))),
sut.next(),
);
}
#[test]
fn unexpected_attr_with_recovery() {
attr_parse_stream! {
type Object = Attr;
type ValueError = Infallible;
UnexpectedState {
QN_NAME => Attr,
QN_SRC => Attr,
}
}
let attr_name = Attr(QN_NAME, "val_name".into(), AttrSpan(S1, S2));
let attr_unexpected = Attr(QN_TYPE, "unexpected".into(), AttrSpan(S1, S2));
let attr_src = Attr(QN_SRC, "val_src".into(), AttrSpan(S2, S3));
let toks = vec![
// This is expected:
XirfToken::Attr(attr_name.clone()),
// NOT expected (produce an error):
XirfToken::Attr(attr_unexpected.clone()),
// <Recovery must take place here.>
// This is expected after recovery:
XirfToken::Attr(attr_src.clone()),
];
let mut sut = Parser::with_state(
UnexpectedState::with_element(QN_ELE, SE),
toks.into_iter(),
);
assert_eq!(sut.next(), Some(Ok(Object(attr_name))));
// This will fail at the unknown attribute,
// and must then remain in a state where parsing can be resumed.
// This simply means ignoring the provided attribute,
// which in XIRF is discarding a single token of input,
// rather than having to continue parsing the attribute to then
// discard.
assert_eq!(
sut.next(),
Some(Err(ParseError::StateError(AttrParseError::UnexpectedAttr(
attr_unexpected,
QN_ELE,
)))),
);
assert_eq!(sut.next(), Some(Ok(Object(attr_src))));
}