From ab0e4151a106e1fef9cfd3e7ee6474f4be159189 Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Wed, 30 Nov 2022 23:52:18 -0500 Subject: [PATCH] tamer: xir::parse::ele::ele_parse!: Integrate `attr_parse_stream!` This handles the bulk of the integration of the new `attr_parse_stream!` as a replacement for `attr_parse!`, which moves from aggregate attribute objects to a stream of attribute-derived tokens. Rationale for this change is in the preceding commit messages. The first striking change here is how it affects the test cases: nearly all `Incomplete`s are removed. Note that the parser has an existing optimization whereby `Incomplete` with lookahead causes immediate recursion within `Parser`, since those situations are used only for control flow and to keep recursion out of `ParseState`s. Next: this removes types from `nir::parse`'s grammar for attributes. The types will instead be derived from NIR tokens later in the lowering pipeline. This simplifies NIR considerably, since adding types into the mix at this point was taking an already really complex lowering phase and making it ever more difficult to reason about and get everything working together the way that I needed. Because of `attr_parse_stream!`, there are no more required attribute checks. Those will be handled later in the lowering pipeline, if they're actually needed in context, with possibly one exception: namespace declarations. Those are really part of the document and they ought to be handled _earlier_ in the pipeline; I'll do that at some point. It's not required for compilation; it's just required to maintain compliance with the XML spec. We also lose checks for duplicate attributes. This is also something that ought to be handled at the document level, and so earlier in the pipeline, since XML cares, not us---if we get a duplicate attribute that results in an extra NIR token, then the next parser will error out, since it has to check for those things anyway. A bunch of cleanup and simplification is still needed; I want to get the initial integration committed first. It's a shame I'm getting rid of so much work, but this is the right approach, and results in a much simpler system. DEV-13346 --- tamer/src/nir.rs | 11 +- tamer/src/nir/parse.rs | 416 +++++++++++----------- tamer/src/xir/parse/attrstream.rs | 30 +- tamer/src/xir/parse/ele.rs | 117 ++---- tamer/src/xir/parse/ele/test.rs | 568 +++++++++++++++--------------- 5 files changed, 538 insertions(+), 604 deletions(-) diff --git a/tamer/src/nir.rs b/tamer/src/nir.rs index e99cbe77..45f17c09 100644 --- a/tamer/src/nir.rs +++ b/tamer/src/nir.rs @@ -55,7 +55,7 @@ mod parse; use crate::{ diagnose::{Annotate, Diagnostic}, fmt::{DisplayWrapper, TtQuote}, - parse::{Object, Token}, + parse::{util::SPair, Object, Token}, span::{Span, UNKNOWN_SPAN}, sym::SymbolId, xir::{ @@ -84,6 +84,7 @@ use NirSymbolTy::*; #[derive(Debug, PartialEq, Eq)] pub enum Nir { Todo, + TodoAttr(SPair), TplParamOpen(Plain<{ TplParamIdent }>, Plain<{ DescLiteral }>), TplParamClose(Span), @@ -108,6 +109,7 @@ impl Token for Nir { match self { Todo => UNKNOWN_SPAN, + TodoAttr(SPair(_, span)) => *span, TplParamOpen(dfn, _) => dfn.span(), TplParamClose(span) => *span, TplParamText(text) => text.span(), @@ -124,6 +126,7 @@ impl Display for Nir { match self { Todo => write!(f, "TODO"), + TodoAttr(SPair(sym, _)) => write!(f, "TODO Attr {sym}"), TplParamOpen(dfn, desc) => { write!(f, "open template param {dfn} ({desc})") } @@ -138,6 +141,12 @@ impl Display for Nir { } } +impl Into> for Nir { + fn into(self) -> Result { + Ok(self) + } +} + /// Tag representing the type of a NIR value. /// /// NIR values originate from attributes, diff --git a/tamer/src/nir/parse.rs b/tamer/src/nir/parse.rs index fb0dbbd1..39cc989e 100644 --- a/tamer/src/nir/parse.rs +++ b/tamer/src/nir/parse.rs @@ -17,7 +17,21 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -//! NIR parser. +//! Decompose a [XIRF](crate::xir::flat) stream into NIR. +//! +//! TAME's grammar is embedded within the grammar of a document, +//! in this case XML. +//! The purpose of this parser is to extract the grammar of TAME from the +//! XML document and represent it as NIR. +//! This parser merely describes _the permissable structure of the +//! document_, +//! but nothing more. +//! For example, +//! whether an attribute is required depends on the what parsers later in +//! the lowering pipeline require of NIR within a given context; +//! this parser merely describes how to translate an attribute into NIR +//! if it happens to be present, +//! and rejects attributes that it does not know about. //! //! For general information about NIR, //! see the [parent module](super). @@ -102,18 +116,12 @@ //! //! See [`TplKw`] for template tokens that are accepted anywhere. -use super::{NirSymbolTy::*, *}; +use super::*; use crate::{ ele_parse, - sym::st::raw::*, - xir::{ - attr::Attr, - st::{prefix::*, qname::*}, - }, + xir::st::{prefix::*, qname::*}, }; -type N = NirSymbol; - ele_parse! { /// Parser lowering [XIR](crate::xir) into [`Nir`]. /// @@ -198,13 +206,13 @@ ele_parse! { /// since TAME was designed for producing insurance rating systems. RaterStmt := QN_RATER { @ { - _xmlns: (QN_XMLNS) => Literal, - _xmlns_c: (QN_XMLNS_C) => Literal, - _xmlns_t: (QN_XMLNS_T) => Literal, + QN_XMLNS => Nir::TodoAttr, + QN_XMLNS_C => Nir::TodoAttr, + QN_XMLNS_T => Nir::TodoAttr, // TODO: Is this still needed? // TODO: PkgName type - _name: (QN_NAME) => N<{PkgPath}>, + QN_NAME => Nir::TodoAttr, } => Nir::Todo, ImportStmt, @@ -219,25 +227,25 @@ ele_parse! { /// different package types. PackageStmt := QN_PACKAGE { @ { - _xmlns: (QN_XMLNS) => Literal, - _xmlns_c: (QN_XMLNS_C) => Literal, - _xmlns_t: (QN_XMLNS_T) => Literal, + QN_XMLNS => Nir::TodoAttr, + QN_XMLNS_C => Nir::TodoAttr, + QN_XMLNS_T => Nir::TodoAttr, // TODO: Having trouble getting rid of `@xmlns:lv` using Saxon // for `progui-pkg`, // so just allow for now. // It can't actually be used on nodes. - _xmlns_lv: (QN_XMLNS_LV?) => Option>, + QN_XMLNS_LV => Nir::TodoAttr, - _id: (QN_ID?) => Option>, - _title: (QN_TITLE?) => Option>, - _desc: (QN_DESC?) => Option>, + QN_ID => Nir::TodoAttr, + QN_TITLE => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, // TODO: When can we get rid of this? - _core: (QN_CORE?) => Option>, - _program: (QN_PROGRAM?) => Option>, + QN_CORE => Nir::TodoAttr, + QN_PROGRAM => Nir::TodoAttr, // TODO: Can this go away now? - _name: (QN_NAME?) => Option>, + QN_NAME => Nir::TodoAttr, } => Nir::Todo, ImportStmt, @@ -250,8 +258,8 @@ ele_parse! { /// for composing larger systems out of smaller components. ImportStmt := QN_IMPORT { @ { - _pkg: (QN_PACKAGE) => N<{PkgPath}>, - _export: (QN_EXPORT?) => Option>, + QN_PACKAGE => Nir::TodoAttr, + QN_EXPORT => Nir::TodoAttr, } => Nir::Todo, }; @@ -294,12 +302,12 @@ ele_parse! { /// declaration. ExternStmt := QN_EXTERN { @ { - _name: (QN_NAME) => N<{AnyIdent}>, - _ty: (QN_TYPE) => N<{IdentType}>, - _dtype: (QN_DTYPE?) => Option>, - _dim: (QN_DIM) => N<{NumLiteral}>, - _parent: (QN_PARENT?) => Option>, - _yields: (QN_YIELDS?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_TYPE => Nir::TodoAttr, + QN_DTYPE => Nir::TodoAttr, + QN_DIM => Nir::TodoAttr, + QN_PARENT => Nir::TodoAttr, + QN_YIELDS => Nir::TodoAttr, } => Nir::Todo, }; @@ -309,13 +317,13 @@ ele_parse! { /// such as [`ProgramMapStmt`]. ParamStmt := QN_PARAM { @ { - _name: (QN_NAME) => N<{ParamName}>, - _ty: (QN_TYPE) => N<{ParamType}>, - _desc: (QN_DESC) => N<{DescLiteral}>, + QN_NAME => Nir::TodoAttr, + QN_TYPE => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, // This is a misnomer. - _set: (QN_SET?) => Option>, - _default: (QN_DEFAULT?) => Option>, - _sym: (QN_SYM?) => Option>, + QN_SET => Nir::TodoAttr, + QN_DEFAULT => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, } => Nir::Todo, }; @@ -332,15 +340,15 @@ ele_parse! { /// and re-use that familiar syntax. ConstStmt := QN_CONST { @ { - _name: (QN_NAME) => N<{ConstIdent}>, - _desc: (QN_DESC) => N<{DescLiteral}>, - _value: (QN_VALUE?) => Option>, - _values: (QN_VALUES?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, + QN_VALUES => Nir::TodoAttr, // TODO: deprecate? - _ty: (QN_TYPE?) => Option>, - _sym: (QN_SYM?) => Option>, + QN_TYPE => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, // TODO: Misnomer - _set: (QN_SET?) => Option>, + QN_SET => Nir::TodoAttr, } => Nir::Todo, ConstStmtBody, @@ -360,7 +368,7 @@ ele_parse! { /// dimensionality and will be changed in future versions. ConstMatrixRow := QN_SET { @ { - _desc: (QN_DESC) => N<{DescLiteral}>, + QN_DESC => Nir::TodoAttr, } => Nir::Todo, ConstVectorItem, @@ -369,8 +377,8 @@ ele_parse! { /// Constant vector scalar item definition. ConstVectorItem := QN_ITEM { @ { - _value: (QN_VALUE) => N<{NumLiteral}>, - _desc: (QN_DESC) => N<{DescLiteral}>, + QN_VALUE => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, } => Nir::Todo, }; @@ -381,12 +389,12 @@ ele_parse! { /// inputs. ClassifyStmt := QN_CLASSIFY { @ { - _name: (QN_AS) => N<{ClassIdent}>, - _desc: (QN_DESC) => N<{DescLiteral}>, - _any: (QN_ANY?) => Option>, - _yields: (QN_YIELDS?) => Option>, - _sym: (QN_SYM?) => Option>, - _terminate: (QN_TERMINATE?) => Option>, + QN_AS => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, + QN_ANY => Nir::TodoAttr, + QN_YIELDS => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, + QN_TERMINATE => Nir::TodoAttr, } => Nir::Todo, LogExpr, @@ -401,19 +409,19 @@ ele_parse! { /// This will eventually be renamed to a more general term. RateStmt := QN_RATE { @ { - _class: (QN_CLASS?) => Option>, - _no: (QN_NO?) => Option>, - _yields: (QN_YIELDS) => N<{ValueIdent}>, - _desc: (QN_DESC?) => Option>, - _sym: (QN_SYM?) => Option>, + QN_CLASS => Nir::TodoAttr, + QN_NO => Nir::TodoAttr, + QN_YIELDS => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, // TODO: This is still recognized by the XSLT-based compiler, // so we need to support it until it's removed. - _gentle_no: (QN_GENTLE_NO?) => Option>, + QN_GENTLE_NO => Nir::TodoAttr, // TODO: We'll have private-by-default later. // This is a kludge. - _local: (QN_LOCAL?) => Option>, + QN_LOCAL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, @@ -427,13 +435,13 @@ ele_parse! { /// [`SumExpr`] serving as the item-wise map. RateEachStmt := QN_RATE_EACH { @ { - _class: (QN_CLASS) => N<{ClassIdentList}>, - _no: (QN_NO?) => Option>, - _generates: (QN_GENERATES?) => Option>, - _index: (QN_INDEX) => N<{ValueIdent}>, - _yields: (QN_YIELDS?) => Option>, - _sym: (QN_SYM?) => Option>, - _gensym: (QN_GENSYM?) => Option>, + QN_CLASS => Nir::TodoAttr, + QN_NO => Nir::TodoAttr, + QN_GENERATES => Nir::TodoAttr, + QN_INDEX => Nir::TodoAttr, + QN_YIELDS => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, + QN_GENSYM => Nir::TodoAttr, } => Nir::Todo, CalcExpr, @@ -442,9 +450,9 @@ ele_parse! { /// Define a new type that restricts the domain of data. TypedefStmt := QN_TYPEDEF { @ { - _name: (QN_NAME) => N<{TypeIdent}>, - _desc: (QN_DESC) => N<{DescLiteral}>, - _sym: (QN_SYM?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, } => Nir::Todo, InnerTypedefStmt, @@ -467,7 +475,7 @@ ele_parse! { /// each with associated constant identifiers. EnumStmt := QN_ENUM { @ { - _ty: (QN_TYPE) => N<{TypeIdent}>, + QN_TYPE => Nir::TodoAttr, } => Nir::Todo, ItemEnumStmt, @@ -477,9 +485,9 @@ ele_parse! { /// with a constant identifier. ItemEnumStmt := QN_ITEM { @ { - _name: (QN_NAME) => N<{ConstIdent}>, - _value: (QN_VALUE) => N<{NumLiteral}>, - _desc: (QN_DESC) => N<{DescLiteral}>, + QN_NAME => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, } => Nir::Todo, }; @@ -522,7 +530,7 @@ ele_parse! { /// which must appear outside of sections. SectionStmt := QN_SECTION { @ { - _title: (QN_TITLE) => N<{Title}>, + QN_TITLE => Nir::TodoAttr, } => Nir::Todo, PkgBodyStmt, @@ -531,9 +539,9 @@ ele_parse! { /// Define a function and associate it with an identifier. FunctionStmt := QN_FUNCTION { @ { - _name: (QN_NAME) => N<{FuncIdent}>, - _desc: (QN_DESC) => N<{DescLiteral}>, - _sym: (QN_SYM?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, } => Nir::Todo, FunctionParamStmt, @@ -544,11 +552,11 @@ ele_parse! { /// is scoped to the function body. FunctionParamStmt := QN_PARAM { @ { - _name: (QN_NAME) => N<{ParamIdent}>, - _ty: (QN_TYPE) => N<{TypeIdent}>, + QN_NAME => Nir::TodoAttr, + QN_TYPE => Nir::TodoAttr, // _TODO: This is a misnomer. - _set: (QN_SET?) => Option>, - _desc: (QN_DESC) => N<{DescLiteral}>, + QN_SET => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, } => Nir::Todo, }; @@ -571,10 +579,10 @@ ele_parse! { /// determined by the dimensionality of the matches' [`@on`](QN_ON). MatchExpr := QN_MATCH { @ { - _on: (QN_ON) => N<{ValueIdent}>, - _value: (QN_VALUE?) => Option>, - _index: (QN_INDEX?) => Option>, - _anyof: (QN_ANY_OF?) => Option>, + QN_ON => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, + QN_INDEX => Nir::TodoAttr, + QN_ANY_OF => Nir::TodoAttr, } => Nir::Todo, CalcPredExpr, @@ -655,13 +663,13 @@ ele_parse! { /// Summation is generated automatically by [`RateEachStmt`]. SumExpr := QN_C_SUM { @ { - _of: (QN_OF?) => Option>, - _generates: (QN_GENERATES?) => Option>, - _index: (QN_INDEX?) => Option>, - _desc: (QN_DESC?) => Option>, - _label: (QN_LABEL?) => Option>, - _sym: (QN_SYM?) => Option>, - _dim: (QN_DIM?) => Option>, + QN_OF => Nir::TodoAttr, + QN_GENERATES => Nir::TodoAttr, + QN_INDEX => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, + QN_LABEL => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, + QN_DIM => Nir::TodoAttr, } => Nir::Todo, WhenExpr, @@ -676,14 +684,14 @@ ele_parse! { /// identified by [`@generates`](QN_GENERATES). ProductExpr := QN_C_PRODUCT { @ { - _of: (QN_OF?) => Option>, - _generates: (QN_GENERATES?) => Option>, - _index: (QN_INDEX?) => Option>, - _desc: (QN_DESC?) => Option>, - _label: (QN_LABEL?) => Option>, - _dot: (QN_DOT?) => Option>, - _sym: (QN_SYM?) => Option>, - _dim: (QN_DIM?) => Option>, + QN_OF => Nir::TodoAttr, + QN_GENERATES => Nir::TodoAttr, + QN_INDEX => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, + QN_LABEL => Nir::TodoAttr, + QN_DOT => Nir::TodoAttr, + QN_SYM => Nir::TodoAttr, + QN_DIM => Nir::TodoAttr, } => Nir::Todo, WhenExpr, @@ -699,7 +707,7 @@ ele_parse! { /// TAMER will be relaxing that restriction. QuotientExpr := QN_C_QUOTIENT { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, @@ -728,9 +736,9 @@ ele_parse! { /// a scalar. ValueOfExpr := QN_C_VALUE_OF { @ { - _name: (QN_NAME) => N<{ValueIdent}>, - _index: (QN_INDEX?) => Option>, - _label: (QN_LABEL?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_INDEX => Nir::TodoAttr, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, IndexExpr, @@ -746,7 +754,7 @@ ele_parse! { /// such that **M**_ⱼ,ₖ_ ≡ (**M**_ⱼ_)_ₖ_. IndexExpr := QN_C_INDEX { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, }; @@ -754,7 +762,7 @@ ele_parse! { /// Expression yielding a constant scalar value. ConstExpr := QN_C_CONST { @ { - _value: (QN_VALUE) => N<{NumLiteral}>, + QN_VALUE => Nir::TodoAttr, // TODO: Description was historically required to avoid magic // values, // but we now have short-hand constants which do not require @@ -763,9 +771,9 @@ ele_parse! { // but requiring `c:value-of` short-hand wouldn't be // the responsibility of NIR, // so perhaps then neither should be. - _desc: (QN_DESC?) => Option>, + QN_DESC => Nir::TodoAttr, // _TODO: deprecate? - _ty: (QN_TYPE?) => Option>, + QN_TYPE => Nir::TodoAttr, } => Nir::Todo, WhenExpr, @@ -774,7 +782,7 @@ ele_parse! { /// Ceiling (⌈_x_⌉) expression. CeilExpr := QN_C_CEIL { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, }; @@ -782,7 +790,7 @@ ele_parse! { /// Floor (⌊_x_⌋) expression. FloorExpr := QN_C_FLOOR { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, }; @@ -802,7 +810,7 @@ ele_parse! { /// otherwise the value `0` is yielded. CasesExpr := QN_C_CASES { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CaseExpr, @@ -822,7 +830,7 @@ ele_parse! { /// if any. CaseExpr := QN_C_CASE { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, WhenExpr, @@ -845,7 +853,7 @@ ele_parse! { /// explicit. OtherwiseExpr := QN_C_OTHERWISE { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, @@ -893,11 +901,11 @@ ele_parse! { /// its parent. LetValue := QN_C_VALUE { @ { - _name: (QN_NAME) => N<{ParamIdent}>, - _ty: (QN_TYPE) => N<{TypeIdent}>, + QN_NAME => Nir::TodoAttr, + QN_TYPE => Nir::TodoAttr, // Misnomer - _set: (QN_SET?) => Option>, - _desc: (QN_DESC?) => Option>, + QN_SET => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, } => Nir::Todo, CalcExpr, @@ -907,7 +915,7 @@ ele_parse! { /// expressions' values as respective items. VectorExpr := QN_C_VECTOR { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, @@ -939,7 +947,7 @@ ele_parse! { /// as attributes. ApplyArg := QN_C_ARG { @ { - _name: (QN_NAME) => N<{ParamIdent}>, + QN_NAME => Nir::TodoAttr, } => Nir::Todo, CalcExpr, @@ -977,7 +985,7 @@ ele_parse! { /// This terminology originates from Lisp. CarExpr := QN_C_CAR { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, }; @@ -989,7 +997,7 @@ ele_parse! { /// It is also called "tail". CdrExpr := QN_C_CDR { @ { - _label: (QN_LABEL?) => Option>, + QN_LABEL => Nir::TodoAttr, } => Nir::Todo, CalcExpr, }; @@ -1009,9 +1017,9 @@ ele_parse! { /// conditions for which case to evaluate. WhenExpr := QN_C_WHEN { @ { - _name: (QN_NAME) => N<{ValueIdent}>, - _index: (QN_INDEX?) => Option>, - _value: (QN_VALUE?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_INDEX => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, } => Nir::Todo, CalcPredExpr, @@ -1086,9 +1094,9 @@ ele_parse! { /// This will be generalized in the future. ProgramMapStmt := QN_PROGRAM_MAP { @ { - _xmlns: (QN_XMLNS) => Literal, - _xmlnslv: (QN_XMLNS_LV) => Literal, - _src: (QN_SRC) => N<{PkgPath}>, + QN_XMLNS => Nir::TodoAttr, + QN_XMLNS_LV => Nir::TodoAttr, + QN_SRC => Nir::TodoAttr, } => Nir::Todo, MapPkgImportStmt, @@ -1105,8 +1113,8 @@ ele_parse! { /// string values. ReturnMapStmt := QN_RETURN_MAP { @ { - _xmlns: (QN_XMLNS) => Literal, - _xmlnslv: (QN_XMLNS_LV) => Literal, + QN_XMLNS => Nir::TodoAttr, + QN_XMLNS_LV => Nir::TodoAttr, } => Nir::Todo, MapPkgImportStmt, @@ -1121,8 +1129,8 @@ ele_parse! { /// in favor of [`ImportStmt`]. MapPkgImportStmt := QN_LV_IMPORT { @ { - _package: (QN_PACKAGE) => N<{PkgPath}>, - _export: (QN_EXPORT?) => Option>, + QN_PACKAGE => Nir::TodoAttr, + QN_EXPORT => Nir::TodoAttr, } => Nir::Todo, }; @@ -1133,7 +1141,7 @@ ele_parse! { /// it will be removed in the future. MapImportStmt := QN_IMPORT { @ { - _path: (QN_PATH) => N<{PkgPath}>, + QN_PATH => Nir::TodoAttr, } => Nir::Todo, }; @@ -1145,11 +1153,11 @@ ele_parse! { /// See also [`MapStmt`] if the value needs to be modified in some way. MapPassStmt := QN_PASS { @ { - _name: (QN_NAME) => N<{AnyIdent}>, - _default: (QN_DEFAULT?) => Option>, - _scalar: (QN_SCALAR?) => Option>, - _override: (QN_OVERRIDE?) => Option>, - _novalidate: (QN_NOVALIDATE?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_DEFAULT => Nir::TodoAttr, + QN_SCALAR => Nir::TodoAttr, + QN_OVERRIDE => Nir::TodoAttr, + QN_NOVALIDATE => Nir::TodoAttr, } => Nir::Todo, }; @@ -1158,17 +1166,17 @@ ele_parse! { /// See also [`MapPassStmt`] if the value does not need modification. MapStmt := QN_MAP { @ { - _to: (QN_TO) => N<{AnyIdent}>, - _from: (QN_FROM?) => Option>, + QN_TO => Nir::TodoAttr, + QN_FROM => Nir::TodoAttr, // We need to be permissive in what we accept since this may // match in different contexts; // downstream IR will validate the against the map // destination. - _value: (QN_VALUE?) => Option>, - _default: (QN_DEFAULT?) => Option>, - _scalar: (QN_SCALAR?) => Option>, - _override: (QN_OVERRIDE?) => Option>, - _novalidate: (QN_NOVALIDATE?) => Option>, + QN_VALUE => Nir::TodoAttr, + QN_DEFAULT => Nir::TodoAttr, + QN_SCALAR => Nir::TodoAttr, + QN_OVERRIDE => Nir::TodoAttr, + QN_NOVALIDATE => Nir::TodoAttr, } => Nir::Todo, MapStmtBody, @@ -1180,10 +1188,10 @@ ele_parse! { /// Source of data for a map operation. MapFromStmt := QN_FROM { @ { - _name: (QN_NAME) => N<{AnyIdent}>, - _default: (QN_DEFAULT?) => Option>, - _scalar: (QN_SCALAR?) => Option>, - _novalidate: (QN_NOVALIDATE?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_DEFAULT => Nir::TodoAttr, + QN_SCALAR => Nir::TodoAttr, + QN_NOVALIDATE => Nir::TodoAttr, } => Nir::Todo, MapTranslateStmt, @@ -1192,8 +1200,8 @@ ele_parse! { /// List of 1:1 value translations for a map. MapTranslateStmt := QN_TRANSLATE { @ { - _key: (QN_KEY) => N<{StringLiteral}>, - _value: (QN_VALUE) => N<{NumLiteral}>, + QN_KEY => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, } => Nir::Todo, }; @@ -1215,7 +1223,7 @@ ele_parse! { /// Map from a constant value. MapConstStmt := QN_CONST { @ { - _value: (QN_VALUE) => N<{StringLiteral}>, + QN_VALUE => Nir::TodoAttr, } => Nir::Todo, }; @@ -1227,7 +1235,7 @@ ele_parse! { /// Transformations may be composed via nesting. MapTransformStmt := QN_TRANSFORM { @ { - _method: (QN_METHOD) => N<{MapTransformLiteral}>, + QN_METHOD => Nir::TodoAttr, } => Nir::Todo, MapStmtBody, @@ -1256,10 +1264,10 @@ ele_parse! { /// definition. WorksheetStmt := QN_WORKSHEET { @ { - _xmlns: (QN_XMLNS) => Literal, + QN_XMLNS => Nir::TodoAttr, - _name: (QN_NAME) => N<{PkgPath}>, - _pkg: (QN_PACKAGE) => N<{PkgPath}>, + QN_NAME => Nir::TodoAttr, + QN_PACKAGE => Nir::TodoAttr, } => Nir::Todo, ExpandFunctionStmt, @@ -1276,7 +1284,7 @@ ele_parse! { /// to care about. ExpandFunctionStmt := QN_EXPAND_FUNCTION { @ { - _name: (QN_NAME) => N<{FuncIdent}>, + QN_NAME => Nir::TodoAttr, } => Nir::Todo, }; @@ -1284,7 +1292,7 @@ ele_parse! { /// along with its result. DisplayStmt := QN_DISPLAY { @ { - _name: (QN_NAME) => N<{ValueIdent}>, + QN_NAME => Nir::TodoAttr, } => Nir::Todo, }; @@ -1376,8 +1384,8 @@ ele_parse! { /// Templates are applied using [`ApplyTemplate`] or [`TplApplyShort`]. TemplateStmt := QN_TEMPLATE { @ { - _name: (QN_NAME) => N<{TplIdent}>, - _desc: (QN_DESC) => N<{DescLiteral}>, + QN_NAME => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, } => Nir::Todo, TplHeading, @@ -1401,8 +1409,8 @@ ele_parse! { /// expanded. TplParamStmt := QN_PARAM { @ { - _name: (QN_NAME) => N<{TplParamIdent}>, - _desc: (QN_DESC) => N<{DescLiteral}>, + QN_NAME => Nir::TodoAttr, + QN_DESC => Nir::TodoAttr, } => Nir::Todo, TplParamDefault, @@ -1435,7 +1443,7 @@ ele_parse! { /// associated template application argument. TplText := QN_TEXT { @ { - _unique: (QN_UNIQUE?) => Option>, + QN_UNIQUE => Nir::TodoAttr, } => Nir::Todo, }; @@ -1450,15 +1458,15 @@ ele_parse! { /// cumbersome and slow TplParamValue := QN_PARAM_VALUE { @ { - _name: (QN_NAME) => N<{ParamIdent}>, - _dash: (QN_DASH?) => Option>, - _upper: (QN_UPPER?) => Option>, - _lower: (QN_LOWER?) => Option>, - _ucfirst: (QN_UCFIRST?) => Option>, - _rmdash: (QN_RMDASH?) => Option>, - _rmunderscore: (QN_RMUNDERSCORE?) => Option>, - _identifier: (QN_IDENTIFIER?) => Option>, - _snake: (QN_SNAKE?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_DASH => Nir::TodoAttr, + QN_UPPER => Nir::TodoAttr, + QN_LOWER => Nir::TodoAttr, + QN_UCFIRST => Nir::TodoAttr, + QN_RMDASH => Nir::TodoAttr, + QN_RMUNDERSCORE => Nir::TodoAttr, + QN_IDENTIFIER => Nir::TodoAttr, + QN_SNAKE => Nir::TodoAttr, } => Nir::Todo, }; @@ -1473,7 +1481,7 @@ ele_parse! { /// of the template. TplParamInherit := QN_PARAM_INHERIT { @ { - _meta: (QN_META) => N<{TplMetaIdent}>, + QN_META => Nir::TodoAttr, } => Nir::Todo, }; @@ -1483,8 +1491,8 @@ ele_parse! { /// this can be used to perform bounded recursive template expansion. TplParamAdd := QN_PARAM_ADD { @ { - _name: (QN_NAME) => N<{TplParamIdent}>, - _value: (QN_VALUE) => N<{NumLiteral}>, + QN_NAME => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, } => Nir::Todo, }; @@ -1505,7 +1513,7 @@ ele_parse! { /// classification. TplParamClassToYields := QN_PARAM_CLASS_TO_YIELDS { @ { - _name: (QN_NAME) => N<{ClassIdent}>, + QN_NAME => Nir::TodoAttr, } => Nir::Todo, }; @@ -1549,18 +1557,18 @@ ele_parse! { /// potentially yield false matches. TplParamTypedefLookup := QN_PARAM_TYPEDEF_LOOKUP { @ { - _name: (QN_NAME) => N<{TypeIdent}>, - _value: (QN_VALUE) => N<{NumLiteral}>, + QN_NAME => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, } => Nir::Todo, }; /// Look up an attribute from the symbol table for a given identifier. TplParamSymValue := QN_PARAM_SYM_VALUE { @ { - _name: (QN_NAME) => N<{AnyIdent}>, - _value: (QN_VALUE) => N<{SymbolTableKey}>, - _prefix: (QN_PREFIX?) => Option>, - _ignore_missing: (QN_IGNORE_MISSING?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, + QN_PREFIX => Nir::TodoAttr, + QN_IGNORE_MISSING => Nir::TodoAttr, } => Nir::Todo, }; @@ -1603,7 +1611,7 @@ ele_parse! { // determine what they may be. DynNode := QN_DYN_NODE { @ { - _name: (QN_NAME) => N<{DynNodeLiteral}>, + QN_NAME => Nir::TodoAttr, } => Nir::Todo, // But we can at least restrict it for now by ensuring that it's @@ -1753,8 +1761,8 @@ ele_parse! { /// documentation and examples. InlineTemplateSymSet := QN_SYM_SET { @ { - _name_prefix: (QN_NAME_PREFIX?) => Option>, - _type: (QN_TYPE?) => Option>, + QN_NAME_PREFIX => Nir::TodoAttr, + QN_TYPE => Nir::TodoAttr, // TODO: Look at XSL sources for others } => Nir::Todo, }; @@ -1818,7 +1826,7 @@ ele_parse! { /// This allows creating templates that accept children. TplParamCopy := QN_PARAM_COPY { @ { - _name: (QN_NAME) => N<{TplParamIdent}>, + QN_NAME => Nir::TodoAttr, } => Nir::Todo, }; @@ -1828,22 +1836,22 @@ ele_parse! { /// see [`TplParamInherit`]. TplParamMeta := QN_PARAM_META { @ { - _name: (QN_NAME) => N<{TplParamIdent}>, - _value: (QN_VALUE) => N<{StringLiteral}>, + QN_NAME => Nir::TodoAttr, + QN_VALUE => Nir::TodoAttr, } => Nir::Todo, }; /// Conditionally expand the body if the provided predicate matches. TplIf := QN_IF { @ { - _name: (QN_NAME) => N<{TplParamIdent}>, - _eq: (QN_EQ?) => Option>, - _gt: (QN_GT?) => Option>, - _gte: (QN_GTE?) => Option>, - _lt: (QN_LT?) => Option>, - _lte: (QN_LTE?) => Option>, - _prefix: (QN_PREFIX?) => Option>, - _suffix: (QN_SUFFIX?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_EQ => Nir::TodoAttr, + QN_GT => Nir::TodoAttr, + QN_GTE => Nir::TodoAttr, + QN_LT => Nir::TodoAttr, + QN_LTE => Nir::TodoAttr, + QN_PREFIX => Nir::TodoAttr, + QN_SUFFIX => Nir::TodoAttr, } => Nir::Todo, AnyStmtOrExpr, @@ -1856,14 +1864,14 @@ ele_parse! { /// of an `else` clause. TplUnless := QN_UNLESS { @ { - _name: (QN_NAME) => N<{TplParamIdent}>, - _eq: (QN_EQ?) => Option>, - _gt: (QN_GT?) => Option>, - _gte: (QN_GTE?) => Option>, - _lt: (QN_LT?) => Option>, - _lte: (QN_LTE?) => Option>, - _prefix: (QN_PREFIX?) => Option>, - _suffix: (QN_SUFFIX?) => Option>, + QN_NAME => Nir::TodoAttr, + QN_EQ => Nir::TodoAttr, + QN_GT => Nir::TodoAttr, + QN_GTE => Nir::TodoAttr, + QN_LT => Nir::TodoAttr, + QN_LTE => Nir::TodoAttr, + QN_PREFIX => Nir::TodoAttr, + QN_SUFFIX => Nir::TodoAttr, } => Nir::Todo, AnyStmtOrExpr, diff --git a/tamer/src/xir/parse/attrstream.rs b/tamer/src/xir/parse/attrstream.rs index 103aeff3..26f98c6c 100644 --- a/tamer/src/xir/parse/attrstream.rs +++ b/tamer/src/xir/parse/attrstream.rs @@ -102,7 +102,7 @@ macro_rules! attr_parse_stream { Self::Object, crate::xir::parse::AttrParseError, > { - unimplemented!() + unimplemented!("attrstream finalize_attr") } fn required_missing( @@ -110,16 +110,7 @@ macro_rules! attr_parse_stream { #[allow(unused_variables)] // unused if no fields _ctx: &Self::Fields ) -> Vec { - unimplemented!() - } - } - - impl $state_name { - fn done_with_element( - ele: crate::xir::QName, - span: crate::xir::OpenSpan, - ) -> Self { - Self::Done(ele, span) + unimplemented!("attrstream required_missing") } } @@ -151,16 +142,18 @@ macro_rules! attr_parse_stream { #[allow(unused_mut)] mut self, tok: Self::Token, - ctx: &mut Self::Context, + _ctx: &mut Self::Context, ) -> crate::parse::TransitionResult { - use crate::parse::{Transition, Transitionable, ParseStatus}; + use crate::parse::Transition; use crate::xir::{ flat, parse::{AttrParseError, AttrParseState} }; - #[allow(unused_imports)] - use crate::xir::attr::{Attr, AttrSpan}; // unused if no attrs - use crate::parse::util::SPair; + #[allow(unused_imports)] // unused if no attrs + use crate::{ + parse::{Transitionable, ParseStatus, util::SPair}, + xir::attr::{Attr, AttrSpan} + }; let ele_name = self.element_name(); @@ -200,10 +193,7 @@ macro_rules! attr_parse_stream { // Aggregation complete (dead state). (Self::Parsing(ele, span), tok_dead) => { - Self::Parsing(ele, span).finalize_attr(ctx) - .map(ParseStatus::Object) - .transition(Self::done_with_element(ele, span)) - .with_lookahead(tok_dead) + Transition(Self::Done(ele, span)).dead(tok_dead) } // Any tokens received after aggregation is completed diff --git a/tamer/src/xir/parse/ele.rs b/tamer/src/xir/parse/ele.rs index 6182c04d..2944413b 100644 --- a/tamer/src/xir/parse/ele.rs +++ b/tamer/src/xir/parse/ele.rs @@ -266,7 +266,7 @@ macro_rules! ele_parse { $vis:vis enum $super:ident; // Attr has to be first to avoid ambiguity with `$rest`. - $(type AttrValueError = $evty:ty;)? + type AttrValueError = $evty:ty; type Object = $objty:ty; $( @@ -281,7 +281,7 @@ macro_rules! ele_parse { $nt_first:ident := $($nt_defs:tt)* ) => { ele_parse! {@!next $vis $super - $(type AttrValueError = $evty;)? + type AttrValueError = $evty; type Object = $objty; $(#[$nt_first_attr])* $nt_first := $($nt_defs)* @@ -295,38 +295,38 @@ macro_rules! ele_parse { (@!next $vis:vis $super:ident // Attr has to be first to avoid ambiguity with `$rest`. - $(type AttrValueError = $evty:ty;)? + type AttrValueError = $evty:ty; type Object = $objty:ty; $($rest:tt)* ) => { - ele_parse!(@!nonterm_decl <$objty, $($evty)?> $vis $super $($rest)*); + ele_parse!(@!nonterm_decl <$objty, $evty> $vis $super $($rest)*); }; - (@!nonterm_decl <$objty:ty, $($evty:ty)?> + (@!nonterm_decl <$objty:ty, $evty:ty> $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident := $($rest:tt)* ) => { - ele_parse!(@!nonterm_def <$objty, $($evty)?> + ele_parse!(@!nonterm_def <$objty, $evty> $vis $super $(#[$nt_attr])* $nt $($rest)* ); }; - (@!nonterm_def <$objty:ty, $($evty:ty)?> + (@!nonterm_def <$objty:ty, $evty:ty> $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident $qname:ident $(($($ntp:tt)*))? { $($matches:tt)* }; $($rest:tt)* ) => { - ele_parse!(@!ele_expand_body <$objty, $($evty)?> + ele_parse!(@!ele_expand_body <$objty, $evty> $vis $super $(#[$nt_attr])* $nt $qname ($($($ntp)*)?) $($matches)* ); ele_parse! {@!next $vis $super - $(type AttrValueError = $evty;)? + type AttrValueError = $evty; type Object = $objty; $($rest)* } }; - (@!nonterm_def <$objty:ty, $($evty:ty)?> + (@!nonterm_def <$objty:ty, $evty:ty> $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident ($ntref_first:ident $(| $ntref:ident)+); $($rest:tt)* ) => { @@ -335,17 +335,17 @@ macro_rules! ele_parse { ); ele_parse! {@!next $vis $super - $(type AttrValueError = $evty;)? + type AttrValueError = $evty; type Object = $objty; $($rest)* } }; - (@!nonterm_decl <$objty:ty, $($evty:ty)?> $vis:vis $super:ident) => {}; + (@!nonterm_decl <$objty:ty, $evty:ty> $vis:vis $super:ident) => {}; // Expand the provided data to a more verbose form that provides the // context necessary for state transitions. - (@!ele_expand_body <$objty:ty, $($evty:ty)?> + (@!ele_expand_body <$objty:ty, $evty:ty> $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident $qname:ident ($($ntp:tt)*) @@ -366,7 +366,7 @@ macro_rules! ele_parse { )* ) => { paste::paste! { ele_parse! { - @!ele_dfn_body <$objty, $($evty)?> + @!ele_dfn_body <$objty, $evty> $vis $super $(#[$nt_attr])*$nt $qname ($($ntp)*) @ { $($attrbody)* } => $attrmap, @@ -436,7 +436,7 @@ macro_rules! ele_parse { ) }; - (@!ele_dfn_body <$objty:ty, $($evty:ty)?> + (@!ele_dfn_body <$objty:ty, $evty:ty> $vis:vis $super:ident $(#[$nt_attr:meta])* $nt:ident $qname:ident ($($qname_matched:pat, $open_span:pat)?) @@ -447,7 +447,7 @@ macro_rules! ele_parse { // `$attrmap`. $( $(#[$fattr:meta])* - $field:ident: ($($fmatch:tt)+) => $fty:ty, + $fmatch:tt => $fexpr:expr, )* } => $attrmap:expr, @@ -472,16 +472,17 @@ macro_rules! ele_parse { )* } ) => { paste::paste! { - crate::attr_parse! { + crate::attr_parse_stream! { /// Attribute parser for #[doc=concat!("[`", stringify!($nt), "`].")] - vis($vis); - $(type ValueError = $evty;)? + type Object = $objty; + type ValueError = $evty; - struct #[doc(hidden)] [<$nt AttrState_>] -> [<$nt Attrs>] { + #[doc(hidden)] + $vis [<$nt AttrState_>] { $( $(#[$fattr])* - $field: ($($fmatch)+) => $fty, + $fmatch => $fexpr, )* } } @@ -510,8 +511,7 @@ macro_rules! ele_parse { $(#[$nt_attr])* /// - #[doc=concat!("Parser for element [`", stringify!($qname), "`] ")] - #[doc=concat!("with attributes [`", stringify!([<$nt Attrs>]), "`].")] + #[doc=concat!("Parser for element [`", stringify!($qname), "`].")] #[derive(Debug, PartialEq, Eq, Default)] $vis struct $nt(crate::xir::parse::NtState<$nt>); @@ -666,45 +666,21 @@ macro_rules! ele_parse { match (selfst, tok) { ( - Expecting | NonPreemptableExpecting, + Expecting | NonPreemptableExpecting | Closed(..), XirfToken::Open(qname, span, depth) ) if $nt::matches(qname) => { use crate::xir::parse::AttrFieldSum; attr_fields.init_fields::<[<$nt AttrFields>]>(); - let transition = Transition(Self(Attrs( - (qname, span, depth), - parse_attrs(qname, span) - ))); - - // Streaming attribute parsing will cause the - // attribute map to be yielded immediately as the - // opening object, - // since we will not be aggregating attrs. $( - // Used only to match on `[attr]`. - let [<_ $attr_stream_binding>] = (); - return transition.ok(<$objty>::from($attrmap)); + let $qname_matched = qname; + let $open_span = span; )? - // If the `[attr]` special form was _not_ - // provided, - // we'll be aggregating attributes. - #[allow(unreachable_code)] - transition.incomplete() - }, - - ( - Closed(..), - XirfToken::Open(qname, span, depth) - ) if Self::matches(qname) => { - use crate::xir::parse::AttrFieldSum; - attr_fields.init_fields::<[<$nt AttrFields>]>(); - Transition(Self(Attrs( (qname, span, depth), parse_attrs(qname, span) - ))).incomplete() + ))).ok(<$objty>::from($attrmap)) }, // We only attempt recovery when encountering an @@ -776,47 +752,14 @@ macro_rules! ele_parse { // which overrides this match directly above // (xref <>). #[allow(unreachable_patterns)] - (Attrs(meta @ (qname, span, depth), sa), tok) => { + (Attrs(meta @ (_, span, _), sa), tok) => { use crate::xir::parse::AttrFieldSum; - sa.delegate_until_obj::( + sa.delegate::( tok, attr_fields.narrow::<[<$nt AttrFields>]>(span), |sa| Transition(Self(Attrs(meta, sa))), - // If we enter a dead state then we have - // failed produce an attribute object, - // in which case we'll recover by ignoring - // the entire element. - || Transition(Self(RecoverEleIgnore(qname, span, depth))), - |#[allow(unused_variables)] sa, attrs| { - let obj = match attrs { - // Attribute field bindings for `$attrmap` - [<$nt Attrs>] { - $( - $field, - )* - } => { - // Optional `OpenSpan` binding - let _ = qname; // avoid unused warning - $( - use crate::xir::parse::attr::AttrParseState; - let $qname_matched = qname; - let $open_span = sa.element_span(); - )? - - <$objty>::from($attrmap) - }, - }; - - // Lookahead is added by `delegate_until_obj`. - ele_parse!(@!ntref_delegate - stack, - Self(Jmp($ntfirst(meta))), - $ntfirst_st, - Transition(<$ntfirst_st>::default()).ok(obj), - Transition(Self(Jmp($ntfirst(meta)))).ok(obj) - ) - } + || Transition(Self(Jmp($ntfirst(meta)))), ) }, diff --git a/tamer/src/xir/parse/ele/test.rs b/tamer/src/xir/parse/ele/test.rs index dbac7e54..575542a5 100644 --- a/tamer/src/xir/parse/ele/test.rs +++ b/tamer/src/xir/parse/ele/test.rs @@ -19,8 +19,9 @@ //! Element parser generator tests. //! -//! It is expected to be understood for these tests that `ele_parse` -//! directly invokes `attr_parse` to perform all attribute parsing, +//! It is expected to be understood for these tests that `ele_parse!` +//! directly invokes `attr_parse_stream!` to perform all attribute +//! parsing, //! and so testing of that parsing is not duplicated here. //! A brief visual inspection of the implementation of `ele_parse` //! should suffice to verify this claim. @@ -32,13 +33,17 @@ //! the system, //! simply force the test to panic at the end. -use std::{assert_matches::assert_matches, error::Error, fmt::Display}; +use std::{ + assert_matches::assert_matches, convert::Infallible, error::Error, + fmt::Display, +}; use crate::{ convert::ExpectInto, - diagnose::Diagnostic, + diagnose::{AnnotatedSpan, Diagnostic}, parse::{ - FinalizeError, Object, ParseError, ParseState, Parsed, ParsedResult, + util::SPair, FinalizeError, Object, ParseError, ParseState, Parsed, + ParsedResult, }, span::{dummy::*, Span}, sym::SymbolId, @@ -61,6 +66,8 @@ fn empty_element_no_attrs_no_close() { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := QN_PACKAGE { @@ -76,9 +83,8 @@ fn empty_element_no_attrs_no_close() { assert_eq!( Ok(vec![ - Parsed::Incomplete, // [Root] Open - Parsed::Object(Foo), // [Root@] Close (>LA) - Parsed::Incomplete, // [Root] Close (LA) - Parsed::Object(Foo::Close), // [Root] Close (LA) - Object(Foo::Close(CloseSpan::empty(S2))), // [Root] Close (LA) - Parsed::Incomplete, // [Root] Close (LA) + assert_eq!(sut.next(), Some(Ok(Parsed::Object(Foo)))); // [Root] Open // But then consuming the LA will produce an error, // since we were not expecting a child. @@ -285,54 +297,28 @@ fn empty_element_ns_prefix_invalid_close_contains_matching_qname() { ); } -// Static, aggregate attribute objects. +// Merging of element stream with attributes. #[test] -fn empty_element_with_attr_bindings() { +fn empty_element_with_attrs() { #[derive(Debug, PartialEq, Eq)] - struct Foo(SymbolId, SymbolId, (Span, Span)); + enum Foo { + Ele, + A(SPair), + B(SPair), + } + impl Object for Foo {} - #[derive(Debug, PartialEq, Eq)] - struct AttrVal(Attr); - - impl TryFrom for AttrVal { - // Type must match AttrValueError on `ele_parse!` - type Error = AttrValueError; - - fn try_from(attr: Attr) -> Result { - Ok(AttrVal(attr)) - } - } - - #[derive(Debug, PartialEq)] - enum AttrValueError {} - - impl Error for AttrValueError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - None - } - } - - impl Display for AttrValueError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "test AttrValueError") - } - } - - impl Diagnostic for AttrValueError { - fn describe(&self) -> Vec { - vec![] + impl Into> for Foo { + fn into(self) -> Result { + Ok(self) } } ele_parse! { enum Sut; - // AttrValueError should be passed to `attr_parse!` - // (which is invoked by `ele_parse!`) - // as ValueError. - type AttrValueError = AttrValueError; - + type AttrValueError = Infallible; type Object = Foo; // In practice we wouldn't actually use Attr @@ -340,13 +326,9 @@ fn empty_element_with_attr_bindings() { // but for the sake of this test we'll keep things simple. Root := QN_PACKAGE { @ { - name: (QN_NAME) => AttrVal, - value: (QN_VALUE) => AttrVal, - } => Foo( - name.0.value(), - value.0.value(), - (name.0.attr_span().value_span(), value.0.attr_span().value_span()) - ), + QN_NAME => Foo::A, + QN_VALUE => Foo::B, + } => Foo::Ele, }; } @@ -364,11 +346,10 @@ fn empty_element_with_attr_bindings() { assert_eq!( Ok(vec![ - Parsed::Incomplete, // Open - Parsed::Incomplete, // Attr - Parsed::Incomplete, // Attr - Parsed::Object(Foo(name_val, value_val, (S5, S3))), // Close - Parsed::Incomplete, // Close (LA) + Parsed::Object(Foo::Ele), // Open + Parsed::Object(Foo::B(SPair(value_val, S3))), // Attr + Parsed::Object(Foo::A(SPair(name_val, S5))), // Attr + Parsed::Incomplete, // Close ]), Sut::parse(toks.into_iter()).collect(), ); @@ -376,29 +357,77 @@ fn empty_element_with_attr_bindings() { // This only tests one scenario under which attribute parsing may fail // (others are tested with `attr_parse!`). -// Failure to parse an attribute is considered a failure at the element -// level and recovery will skip the entire element. +// Failure to parse an attribute will ignore that attribute and continue. +// +// Historical Note +// =============== +// This strategy differs from the original design of this parser, +// which would ignore the entire element, +// with the intent that the type of token to yield would depend in part on +// the element's attributes; +// this has been walked back in favor of refinement lower in the +// pipeline. #[test] fn element_with_failed_attr_parsing() { #[derive(Debug, PartialEq, Eq)] enum Foo { Open, Close, + Attr(SPair), Child, } impl crate::parse::Object for Foo {} + impl Into> for Foo { + fn into(self) -> Result { + Ok(self) + } + } + + #[derive(Debug, PartialEq)] + struct FooError(SPair); + + impl Error for FooError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + None + } + } + + impl Display for FooError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "test FooError") + } + } + + impl Diagnostic for FooError { + fn describe(&self) -> Vec { + vec![] + } + } + + impl Into> for FooError { + fn into(self) -> Result { + Err(self) + } + } + const QN_ROOT: QName = QN_PACKAGE; const QN_CHILD: QName = QN_DIM; ele_parse! { enum Sut; + + type AttrValueError = FooError; type Object = Foo; Root := QN_ROOT { @ { - _name: (QN_NAME) => Attr, + // This one will always fail: + QN_NAME => FooError, + + // This one will always succeed: + QN_YIELDS => Foo::Attr, } => Foo::Open, // Important to check that this is not emitted. @@ -410,12 +439,16 @@ fn element_with_failed_attr_parsing() { }; } + let name_val = "name_val".into(); + let yields_val = "yields_val".into(); + let toks = vec![ XirfToken::Open(QN_ROOT, OpenSpan(S1, N), Depth(0)), - // Child elements should be ignored. - XirfToken::Open(QN_CHILD, OpenSpan(S4, N), Depth(1)), - XirfToken::Close(None, CloseSpan::empty(S5), Depth(1)), - // Recovery ends at the closing tag. + // This will fail to parse and will not yield a token + XirfToken::Attr(Attr(QN_NAME, name_val, AttrSpan(S2, S3))), + // But this will parse successfully, + // since recovery was restricted to that one token of input. + XirfToken::Attr(Attr(QN_YIELDS, yields_val, AttrSpan(S4, S5))), XirfToken::Close(Some(QN_ROOT), CloseSpan::empty(S6), Depth(0)), ]; @@ -424,33 +457,29 @@ fn element_with_failed_attr_parsing() { use Parsed::*; // Root will open normally. - assert_eq!(sut.next(), Some(Ok(Incomplete))); // [Root] Root Open + assert_eq!(sut.next(), Some(Ok(Object(Foo::Open)))); // [Root] Root Open - // But the child will result in an error because we have not provided a - // required attribute. + // The next token is the attribute that will result in an error. + // Recovery concludes immediately after ignoring it. let err = sut.next().unwrap().unwrap_err(); assert_matches!( err, ParseError::StateError(::Error::Root( ::Error::Attrs(..) )), - ); // [Root] Child Open (>LA) + ); // [Root@] QN_NAME - // The remaining tokens should be ignored and we should finish parsing. - // Since the opening object was not emitted, - // we must not emit the closing. assert_eq!( Ok(vec![ - Incomplete, // [Root!] Child Open (LA) - Incomplete, // [Child] Child Close (LA) - Parsed::Incomplete, // [Child] Child Open (LA) - Parsed::Incomplete, // [Child] Child Close (LA) + Parsed::Object(Foo::Root), // [Root] Root Open + Parsed::Incomplete, // [Root] Root Close ]), Sut::parse(toks.into_iter()).collect(), ); @@ -736,6 +760,8 @@ fn multiple_child_elements_sequential() { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := QN_PACKAGE(_, ospan) { @@ -775,15 +801,12 @@ fn multiple_child_elements_sequential() { use Parsed::*; assert_eq!( Ok(vec![ - Incomplete, // [Root] Root Open - Object(Foo::RootOpen(S1)), // [Root@] ChildA Open (>LA) - Incomplete, // [ChildA] ChildA Open (LA) - Object(Foo::ChildAClose(S3)), // [ChildA] ChildA Close (LA) - Object(Foo::ChildBClose), // [ChildB] ChildB Close () { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := QN_SUT { @@ -845,16 +870,13 @@ fn x_ignored_between_elements(tok: XirfToken) { assert_eq!( Ok(vec![ Incomplete, // [Root] tok - Incomplete, // [Root] Root Open + Object(Foo::Root), // [Root] Root Open Incomplete, // [Root@] tok - Object(Foo::Root), // [Root@] A Open (>LA) - Incomplete, // [A] A Open (LA) - Incomplete, // [A] A Close (LA) - Incomplete, // [B] B Close (LA) - sut.next() - ); + assert_eq!(Some(Ok(Object(Foo::RootOpen))), sut.next()); // [Root] Open 0 // The token of lookahead (`Open`) is unexpected for `ChildA`, // when then skips to `ChildB`, @@ -1015,9 +1029,8 @@ fn child_error_and_recovery() { // but that's not what we're doing yet. assert_eq!( Ok(vec![ - Incomplete, // [ChildB] Open 1 - Object(Foo::ChildB), // [ChildB@] Close 1 (>LA) - Incomplete, // [ChildB] Close 1 (LA) - sut.next() - ); + assert_eq!(Some(Ok(Parsed::Object(Foo::Open))), sut.next()); // [Root] Open 0 // The token of lookahead (`Open`) is unexpected for `Root`, // which is expecting `Close`. @@ -1173,6 +1178,8 @@ fn sum_nonterminal_accepts_any_valid_element() { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := (A | B | C); @@ -1204,9 +1211,8 @@ fn sum_nonterminal_accepts_any_valid_element() { assert_eq!( Ok(vec![ - Incomplete, // [X] Open - Object(obj), // [X@] Close (>LA) - Incomplete, // [X] Close + Object(obj), // [X] Open + Incomplete, // [X] Close ]), Sut::parse(toks.into_iter()).collect(), ); @@ -1229,6 +1235,8 @@ fn sum_nonterminal_accepts_x(tok: XirfToken) { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; // Sum type requires two NTs but we only use A. @@ -1258,11 +1266,10 @@ fn sum_nonterminal_accepts_x(tok: XirfToken) { assert_eq!( Ok(vec![ - Incomplete, // [A] tok - Incomplete, // [A] Open - Object(Foo::A), // [A@] Close (>LA) - Incomplete, // [A] Close - Incomplete, // [A] tok + Incomplete, // [A] tok + Object(Foo::A), // [A] Open + Incomplete, // [A] Close + Incomplete, // [A] tok ]), Sut::parse(toks.into_iter()).collect(), ); @@ -1304,6 +1311,8 @@ fn sum_nonterminal_as_child_element() { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := QN_PACKAGE { @@ -1348,15 +1357,12 @@ fn sum_nonterminal_as_child_element() { assert_eq!( Ok(vec![ - Incomplete, // [Root] Root Open - Object(Foo::Open(QN_ROOT)), // [Root@] A Open (>LA) - Incomplete, // [A] A Open (LA) - Object(Foo::Close(QN_A)), // [A] A Close (LA) - Object(Foo::Close(QN_C)), // [C] B Close (LA) + Object(Foo::Close(QN_ROOT)), // [Root] Root Close ]), Sut::parse(toks.into_iter()).collect(), ); @@ -1379,6 +1385,8 @@ fn sum_nonterminal_error_recovery() { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := (A | B); @@ -1497,6 +1505,8 @@ fn child_repetition() { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := QN_PACKAGE { @@ -1571,23 +1581,17 @@ fn child_repetition() { // then.) assert_eq!( Ok(vec![ - Incomplete, // [Root] Root Open - Object(Foo::RootOpen), // [Root@] ChildA Open (>LA) - Incomplete, // [ChildA] ChildA Open (LA) - Object(Foo::ChildClose(QN_A)), // [ChildA] ChildA Close (LA) - Object(Foo::ChildClose(QN_A)), // [ChildA] ChildA Close (LA) - Object(Foo::ChildClose(QN_B)), // [ChildB] ChildB Close (LA) - Object(Foo::ChildClose(QN_B)), // [ChildB] ChildB Close (LA) - Object(Foo::ChildClose(QN_C)), // [ChildC] ChildC Close (LA) - Ok(Incomplete), // [A] A Open (LA) - Ok(Object(Foo::Close(QN_A))), // [A] A Close (LA) - Ok(Object(Foo::Close(QN_A))), // [A] A Close (LA) - Ok(Object(Foo::Close(QN_B))), // [B] B Close (LA) - Ok(Object(Foo::Close(QN_B))), // [B] B Close (::Error::B( ::Error::UnexpectedEle( QN_A, @@ -1703,6 +1704,8 @@ fn child_repetition_invalid_tok_dead() { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := QN_PACKAGE { @@ -1735,11 +1738,9 @@ fn child_repetition_invalid_tok_dead() { let mut next = || sut.next(); - assert_eq!(next(), Some(Ok(Incomplete))); // [Root] Open - assert_eq!(next(), Some(Ok(Object(Foo::RootOpen)))); // [Root@] Open > - assert_eq!(next(), Some(Ok(Incomplete))); // [Child] Open < - assert_eq!(next(), Some(Ok(Object(Foo::ChildOpen)))); // [Child@] Close > - assert_eq!(next(), Some(Ok(Object(Foo::ChildClose)))); // [Child] Close < + assert_eq!(next(), Some(Ok(Object(Foo::RootOpen)))); // [Root] Open + assert_eq!(next(), Some(Ok(Object(Foo::ChildOpen)))); // [Child] Open + assert_eq!(next(), Some(Ok(Object(Foo::ChildClose)))); // [Child] Close // Intuitively, // we may want to enter recovery and ignore the element. @@ -1796,6 +1797,8 @@ fn sum_repetition() { ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := QN_PACKAGE { @@ -1851,23 +1854,17 @@ fn sum_repetition() { // the suppression of `Incomplete` for dead states. assert_eq!( Ok(vec![ - Incomplete, // [Root] Root Open - Object(Foo::Open(QN_ROOT)), // [Root@] A Open (>LA) - Incomplete, // [A] A Open (LA) - Object(Foo::Close(QN_A)), // [A] A Close (LA) - Object(Foo::Close(QN_A)), // [A] A Close (LA) - Object(Foo::Close(QN_B)), // [B] B Close (LA) - Object(Foo::Close(QN_C)), // [C] C Close (LA) - Object(Foo::Close(QN_B)), // [B] B Close (LA) - Object(Foo::Text(text_root, S1)), // [Root] Text (LA) - Object(Foo::Text(text_a, S2)), // [Sut] Text (LA) - Object(Foo::Text(text_b, S4)), // [B] Text (LA) + Object(Foo::Open(QN_B)), // [B] B Open Object(Foo::Close(QN_B)), // [B] B Close - Incomplete, // [C] C Open - Object(Foo::Open(QN_C)), // [C@] C Text (>LA) + Object(Foo::Open(QN_C)), // [C] C Open Object(Foo::Close(QN_C)), // [C] C Close Object(Foo::Close(QN_ROOT)), // [Root] Root Close ]), @@ -2032,6 +2026,8 @@ fn no_mixed_content_super() { // No text permitted. ele_parse! { enum Sut; + + type AttrValueError = Infallible; type Object = Foo; Root := QN_SUT { @@ -2062,15 +2058,13 @@ fn no_mixed_content_super() { // The first two tokens should parse successfully // (four calls because of LA). - assert_eq!(sut.next(), Some(Ok(Incomplete))); // [Root] Root Open - assert_eq!(sut.next(), Some(Ok(Object(Foo::Root)))); // [Root@] A Open (>LA) - assert_eq!(sut.next(), Some(Ok(Incomplete))); // [A] A Open (LA) + assert_eq!(sut.next(), Some(Ok(Object(Foo::Root)))); // [Root] Root Open + assert_eq!(sut.next(), Some(Ok(Object(Foo::A)))); // [A] A Open // The next token is text, // which is not permitted because of a lack of `[super]` with // `[text`]. - assert_matches!(sut.next(), Some(Err(_))); // [A] Text (LA) - Incomplete, // [PreB] B Open (LA) - Object(Foo::PreAClose), // [PreA] A Close (LA) - Incomplete, // [PreA] A Open (LA) - Object(Foo::PreAClose), // [PreA] A Close (LA) - Incomplete, // [PreA] PreA Open (LA) - Incomplete, // [PreA] PreA Open (LA) - Object(Foo::PreAClose(S4)), // [PreA] PreA Close (LA) - Object(Foo::PreAClose(S5)), // [PreA] PreA Close (LA) - assert_eq!(sut.next(), Some(Ok(Incomplete))); // [A] Open A (LA) - assert_eq!(sut.next(), Some(Ok(Incomplete))); // [A] Close A (