From 82915f11af143372bc015b969bff508aa2aa472b Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Wed, 22 Feb 2023 23:03:42 -0500 Subject: [PATCH] tamer: asg::graph::object::xir: Initial rate element reconstruction This extends the POC a bit by beginning to reconstruct rate blocks (note that NIR isn't producing sub-expressions yet). Importantly, this also adds the first system tests, now that we have an end-to-end system. This not only gives me confidence that the system is producing the expected output, but serves as a compromise: writing unit or integration tests for this program derivation would be a great deal of work, and wouldn't even catch the bugs I'm worried most about; the lowering operation can be written in such a way as to give me high confidence in its correctness without those more granular tests, or in conjunction with unit or integration tests for a smaller portion. DEV-13708 --- tamer/conf.sh.in | 2 + tamer/configure.ac | 4 + tamer/src/asg/graph/object/expr.rs | 16 ++- tamer/src/asg/graph/object/xir.rs | 197 +++++++++++++++++++++++------ tamer/src/parse/util.rs | 6 + tamer/src/xir/flat.rs | 42 ++++++ tamer/tests/xmli/.gitignore | 1 + tamer/tests/xmli/README.md | 29 +++++ tamer/tests/xmli/expected.xml | 8 ++ tamer/tests/xmli/src.xml | 12 ++ tamer/tests/xmli/test-xmli | 35 +++++ 11 files changed, 313 insertions(+), 39 deletions(-) create mode 100644 tamer/tests/xmli/.gitignore create mode 100644 tamer/tests/xmli/README.md create mode 100644 tamer/tests/xmli/expected.xml create mode 100644 tamer/tests/xmli/src.xml create mode 100755 tamer/tests/xmli/test-xmli diff --git a/tamer/conf.sh.in b/tamer/conf.sh.in index f5046669..d0fa492b 100644 --- a/tamer/conf.sh.in +++ b/tamer/conf.sh.in @@ -61,3 +61,5 @@ fi declare -r TAMER_PATH_TAMEC="$TAMER_PATH_BIN/tamec" declare -r TAMER_PATH_TAMELD="$TAMER_PATH_BIN/tameld" + +declare -r P_XMLLINT="@XMLLINT@" diff --git a/tamer/configure.ac b/tamer/configure.ac index 6eb06eb3..e9678ad9 100644 --- a/tamer/configure.ac +++ b/tamer/configure.ac @@ -123,6 +123,10 @@ test -z "$FEATURES" || { FEATURES="--features $FEATURES" } +# Other programs used by scripts +AC_CHECK_PROGS(XMLLINT, [xmllint]) +test -n "$XMLLINT" || AC_MSG_ERROR([xmllint not found]) + AC_CONFIG_FILES([Makefile conf.sh]) AC_OUTPUT diff --git a/tamer/src/asg/graph/object/expr.rs b/tamer/src/asg/graph/object/expr.rs index 02a4e3fa..0f1ae934 100644 --- a/tamer/src/asg/graph/object/expr.rs +++ b/tamer/src/asg/graph/object/expr.rs @@ -53,6 +53,12 @@ impl Expr { Expr(_, _, span) => *span, } } + + pub fn op(&self) -> ExprOp { + match self { + Expr(op, _, _) => *op, + } + } } impl Functor for Expr { @@ -82,7 +88,7 @@ impl Display for Expr { /// TODO: Ideally this will be replaced with arbitrary binary (dyadic) /// functions defined within the language of TAME itself, /// as was the original plan with TAMER. -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum ExprOp { Sum, Product, @@ -284,4 +290,12 @@ impl ObjectIndex { let identi = asg.lookup_or_missing(ident); self.add_edge_to(asg, identi, Some(ident.span())) } + + /// The [`Ident`] bound to this expression, + /// if any. + pub fn ident(self, asg: &Asg) -> Option<&Ident> { + self.incoming_edges_filtered(asg) + .map(ObjectIndex::cresolve(asg)) + .next() + } } diff --git a/tamer/src/asg/graph/object/xir.rs b/tamer/src/asg/graph/object/xir.rs index 51333407..26cef0d7 100644 --- a/tamer/src/asg/graph/object/xir.rs +++ b/tamer/src/asg/graph/object/xir.rs @@ -29,18 +29,24 @@ //! but may be useful in the future for concrete code suggestions/fixes, //! or observing template expansions. -use super::ObjectRelTy; +use super::{DynObjectRel, Expr, Object, ObjectIndex, ObjectRelTy, Pkg}; use crate::{ - asg::{visit::TreeWalkRel, Asg}, - diagnose::Annotate, - diagnostic_unreachable, - parse::prelude::*, - sym::st::raw::URI_LV_RATER, + asg::{ + visit::{Depth, TreeWalkRel}, + Asg, ExprOp, + }, + diagnose::{panic::DiagnosticPanic, Annotate}, + diagnostic_panic, diagnostic_unreachable, + parse::{prelude::*, util::SPair}, + span::{Span, UNKNOWN_SPAN}, + sym::{ + st::{URI_LV_CALC, URI_LV_RATER, URI_LV_TPL}, + UriStaticSymbolId, + }, xir::{ - attr::Attr, flat::{Text, XirfToken}, - st::qname::{QN_PACKAGE, QN_XMLNS}, - OpenSpan, + st::qname::*, + OpenSpan, QName, }, }; use arrayvec::ArrayVec; @@ -63,55 +69,145 @@ impl<'a> Display for AsgTreeToXirf<'a> { } } +type Xirf = XirfToken; + impl<'a> ParseState for AsgTreeToXirf<'a> { type Token = TreeWalkRel; - type Object = XirfToken; + type Object = Xirf; type Error = Infallible; type Context = TreeContext<'a>; fn parse_token( self, tok: Self::Token, - TreeContext(tok_stack, asg): &mut TreeContext, + TreeContext(toks, asg): &mut TreeContext, ) -> TransitionResult { - use ObjectRelTy as Ty; - - if let Some(emit) = tok_stack.pop() { + if let Some(emit) = toks.pop() { return Transition(self).ok(emit).with_lookahead(tok); } let tok_span = tok.span(); let TreeWalkRel(dyn_rel, depth) = tok; + if depth == Depth(0) { + return Transition(self).incomplete(); + } + let obj = dyn_rel.target().resolve(asg); - let obj_span = obj.span(); - match dyn_rel.target_ty() { - Ty::Pkg => { - tok_stack.push(XirfToken::Attr(Attr::new( - QN_XMLNS, - URI_LV_RATER, - (obj_span, obj_span), - ))); + match obj { + Object::Pkg(pkg) => { + let span = pkg.span(); - Transition(self).ok(XirfToken::Open( - QN_PACKAGE, - OpenSpan::without_name_span(obj_span), - depth, - )) + toks.push(ns(QN_XMLNS_T, URI_LV_TPL, span)); + toks.push(ns(QN_XMLNS_C, URI_LV_CALC, span)); + toks.push(ns(QN_XMLNS, URI_LV_RATER, span)); + + Transition(self).ok(package(pkg, depth)) } - Ty::Ident | Ty::Expr => Transition(self).incomplete(), + // Identifiers will be considered in context; + // pass over it for now. + Object::Ident(..) => Transition(self).incomplete(), - Ty::Root => diagnostic_unreachable!( + Object::Expr(expr) => match dyn_rel.source_ty() { + ObjectRelTy::Ident => { + // We were just told an ident exists, + // so this should not fail. + let ident = dyn_rel + .must_narrow_into::() + .ident(asg) + .diagnostic_unwrap(|| { + vec![expr.internal_error( + "missing ident for this expression", + )] + }); + + toks.push(yields(ident.name(), expr.span())); + + Transition(Self::Ready(Default::default())) + .ok(stmt(expr, depth)) + } + _ => todo!("non-ident expr"), + }, + + Object::Root(_) => diagnostic_unreachable!( vec![tok_span.error("unexpected Root")], "tree walk is not expected to emit Root", ), } } - fn is_accepting(&self, _ctx: &Self::Context) -> bool { - true + fn is_accepting(&self, TreeContext(toks, _): &Self::Context) -> bool { + toks.is_empty() + } + + fn eof_tok( + &self, + TreeContext(toks, _): &Self::Context, + ) -> Option { + // If the stack is not empty on EOF, + // yield a dummy token just to invoke `parse_token` to finish + // emptying it. + (!toks.is_empty()).then_some(TreeWalkRel( + DynObjectRel::new( + ObjectRelTy::Root, + ObjectRelTy::Root, + ObjectIndex::new(0.into(), UNKNOWN_SPAN), + None, + ), + // This is the only part that really matters; + // the tree walk will never yield a depth of 0. + Depth(0), + )) + } +} + +fn package(pkg: &Pkg, depth: Depth) -> Xirf { + Xirf::open(QN_PACKAGE, OpenSpan::without_name_span(pkg.span()), depth) +} + +fn ns(qname: QName, uri: UriStaticSymbolId, span: Span) -> Xirf { + Xirf::attr(qname, uri, (span, span)) +} + +fn stmt(expr: &Expr, depth: Depth) -> Xirf { + match expr.op() { + ExprOp::Sum => { + Xirf::open(QN_RATE, OpenSpan::without_name_span(expr.span()), depth) + } + + _ => todo!("stmt: {expr:?}"), + } +} + +fn yields(name: SPair, span: Span) -> Xirf { + Xirf::attr(QN_YIELDS, name, (span, name)) +} + +pub struct TreeContext<'a>(TokenStack, &'a Asg); + +// Custom `Debug` impl to omit ASG rendering, +// since it's large and already included while rendering other parts of +// the lowering pipeline. +// Of course, +// that's assuming this is part of the lowering pipeline. +impl<'a> std::fmt::Debug for TreeContext<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_tuple("TreeContext") + .field(&self.0) + .field(&AsgElided) + .finish() + } +} + +/// Used a placeholder for [`TreeContext`]'s [`Debug`]. +#[derive(Debug)] +struct AsgElided; + +impl<'a> From<&'a Asg> for TreeContext<'a> { + fn from(asg: &'a Asg) -> Self { + Self(Default::default(), asg) } } @@ -132,14 +228,39 @@ const TOK_STACK_SIZE: usize = 8; /// This need only be big enough to accommodate [`AsgTreeToXirf`]'s /// implementation; /// the size is independent of user input. -type TokenStack<'a> = - ArrayVec< as ParseState>::Object, TOK_STACK_SIZE>; +#[derive(Debug, Default)] +struct TokenStack(ArrayVec); -#[derive(Debug)] -pub struct TreeContext<'a>(TokenStack<'a>, &'a Asg); +impl TokenStack { + fn push(&mut self, tok: Xirf) { + match self { + Self(stack) => { + if stack.is_full() { + diagnostic_panic!( + vec![tok.internal_error( + "while emitting a token for this object" + )], + "token stack exhausted (increase TOK_STACK_SIZE)", + ) + } -impl<'a> From<&'a Asg> for TreeContext<'a> { - fn from(asg: &'a Asg) -> Self { - Self(Default::default(), asg) + stack.push(tok) + } + } + } + + fn pop(&mut self) -> Option { + match self { + Self(stack) => stack.pop(), + } + } + + fn is_empty(&self) -> bool { + match self { + Self(stack) => stack.is_empty(), + } } } + +// System tests covering this functionality can be found in +// `tamer/tests/xir/`. diff --git a/tamer/src/parse/util.rs b/tamer/src/parse/util.rs index ea1908cc..53e06ab6 100644 --- a/tamer/src/parse/util.rs +++ b/tamer/src/parse/util.rs @@ -132,6 +132,12 @@ impl From for (SymbolId, Span) { } } +impl From for SymbolId { + fn from(spair: SPair) -> Self { + spair.symbol() + } +} + #[derive(Debug, PartialEq, Eq)] pub struct EchoParseState(S); diff --git a/tamer/src/xir/flat.rs b/tamer/src/xir/flat.rs index 7bfa3c9f..9365eee1 100644 --- a/tamer/src/xir/flat.rs +++ b/tamer/src/xir/flat.rs @@ -146,6 +146,48 @@ pub enum XirfToken { CData(SymbolId, Span, Depth), } +impl XirfToken { + pub fn open( + qname: impl Into, + span: impl Into, + depth: Depth, + ) -> Self { + Self::Open(qname.into(), span.into(), depth) + } + + pub fn close( + qname: Option>, + span: impl Into, + depth: Depth, + ) -> Self { + Self::Close(qname.map(Into::into), span.into(), depth) + } + + pub fn attr( + qname: impl Into, + value: impl Into, + span: (impl Into, impl Into), + ) -> Self { + Self::Attr(Attr::new( + qname.into(), + value.into(), + (span.0.into(), span.1.into()), + )) + } + + pub fn comment( + comment: impl Into, + span: impl Into, + depth: Depth, + ) -> Self { + Self::Comment(comment.into(), span.into(), depth) + } + + pub fn text(text: impl Into, depth: Depth) -> Self { + Self::Text(text.into(), depth) + } +} + impl Token for XirfToken { fn ir_name() -> &'static str { "XIRF" diff --git a/tamer/tests/xmli/.gitignore b/tamer/tests/xmli/.gitignore new file mode 100644 index 00000000..51bd4512 --- /dev/null +++ b/tamer/tests/xmli/.gitignore @@ -0,0 +1 @@ +out.xmli diff --git a/tamer/tests/xmli/README.md b/tamer/tests/xmli/README.md new file mode 100644 index 00000000..bb44bf32 --- /dev/null +++ b/tamer/tests/xmli/README.md @@ -0,0 +1,29 @@ +# XMLI System Test +The `xmli` file is an intermediate file that serves as a handoff between +TAMER and the XSLT-based compiler: + +``` +xml -> (TAMER) -> xmli -> (TAME XSLT) -> xmlo +``` + +TAMER gets the first shot at processing, and then the compilation process +continues with the XSLT-based compiler. This allows TAMER to incrementally +augment and manipulate the source file and remove responsibilities from +TAME XSLT. + +Tests in this directory ensure that this process is working as +intended. TAMER's failure to perform a proper handoff will cause TAME XSLT +to compile sources incorrectly, since TAMER will have rewritten them to +something else. + +This handoff is more than just echoing tokens back into a file---it +_derives_ a new program from the state of the ASG. This program may have a +slightly different representation than the original sources, but it must +express an equivalent program, and the program must be at least as +performant when emitted by TAME XSLT. + +# Running Tests +Test are prefixed with `test-*` and are executable. They must be invoked +with the environment variable `PATH_TAMEC` set to the path of `tamec` +relative to the working directory. + diff --git a/tamer/tests/xmli/expected.xml b/tamer/tests/xmli/expected.xml new file mode 100644 index 00000000..da35b844 --- /dev/null +++ b/tamer/tests/xmli/expected.xml @@ -0,0 +1,8 @@ + + + + + + diff --git a/tamer/tests/xmli/src.xml b/tamer/tests/xmli/src.xml new file mode 100644 index 00000000..e9e0b150 --- /dev/null +++ b/tamer/tests/xmli/src.xml @@ -0,0 +1,12 @@ + + + + This is the source package to be read by `tamec`. + The output `out.xmli` is asserted against `expected.xml`. + + + + + diff --git a/tamer/tests/xmli/test-xmli b/tamer/tests/xmli/test-xmli new file mode 100755 index 00000000..4fa66d9d --- /dev/null +++ b/tamer/tests/xmli/test-xmli @@ -0,0 +1,35 @@ +#!/bin/bash +# Assert that a program can be derived from the ASG as expected. +# +# See `./README.md` for more information. + +set -euo pipefail + +mypath=$(dirname "$0") +. "$mypath/../../conf.sh" + +tamer-flag-or-exit-ok wip-asg-derived-xmli + +main() { + "${TAMER_PATH_TAMEC?}" -o "$mypath/out.xmli" --emit xmlo "$mypath/src.xml" + + # Performing this check within `<()` below won't cause a failure. + : "${P_XMLLINT?}" # conf.sh + + diff <("$P_XMLLINT" --format "$mypath/expected.xml" || echo 'ERR expected.xml') \ + <("$P_XMLLINT" --format "$mypath/out.xmli" || echo 'ERR out.xmli') \ + || { + cat << EOF +!!! TEST FAILED +tamec: $TAMER_PATH_TAMEC + +note: The compiler output and diff between \`expected.xml\` and \`out.xmli\` + are above. Both files are formatted with \`xmllint\` automatically. +EOF + + exit 1 + } +} + +main "$@" +