tame/tamer/src/asg/air/test.rs

805 lines
25 KiB
Rust
Raw Normal View History

// Tests for ASG IR
//
// Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//! These are tested as if they are another API directly atop of the ASG,
//! since that is how they are used.
use super::super::graph::object::{ObjectKind, ObjectRelTo};
use super::super::Ident;
use super::*;
use crate::{
parse::{ParseError, Parsed},
span::dummy::*,
};
use std::assert_matches::assert_matches;
type Sut = AirAggregate;
#[test]
fn ident_decl() {
let id = SPair("foo".into(), S1);
let kind = IdentKind::Tpl;
let src = Source {
src: Some("test/decl".into()),
..Default::default()
};
let toks = vec![Air::IdentDecl(id, kind.clone(), src.clone())].into_iter();
let mut sut = Sut::parse(toks);
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next());
let asg = sut.finalize().unwrap().into_context();
let ident_node = asg.lookup(id).expect("identifier was not added to graph");
let ident = asg.get(ident_node).unwrap();
assert_eq!(
Ok(ident),
Ident::declare(id)
.resolve(S1, kind.clone(), src.clone())
.as_ref(),
);
// Re-instantiate the parser and test an error by attempting to
// redeclare the same identifier.
let bad_toks =
vec![Air::IdentDecl(SPair(id.symbol(), S2), kind, src)].into_iter();
let mut sut = Sut::parse_with_context(bad_toks, asg);
assert_matches!(
sut.next(),
Some(Err(ParseError::StateError(AsgError::IdentTransition(_)))),
);
}
#[test]
fn ident_extern_decl() {
let id = SPair("foo".into(), S1);
let kind = IdentKind::Tpl;
let src = Source {
src: Some("test/decl-extern".into()),
..Default::default()
};
let toks =
vec![Air::IdentExternDecl(id, kind.clone(), src.clone())].into_iter();
let mut sut = Sut::parse(toks);
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next());
let asg = sut.finalize().unwrap().into_context();
let ident_node = asg.lookup(id).expect("identifier was not added to graph");
let ident = asg.get(ident_node).unwrap();
assert_eq!(
Ok(ident),
Ident::declare(id).extern_(S1, kind, src.clone()).as_ref(),
);
// Re-instantiate the parser and test an error by attempting to
// redeclare with a different kind.
let different_kind = IdentKind::Meta;
let bad_toks = vec![Air::IdentExternDecl(
SPair(id.symbol(), S2),
different_kind,
src,
)]
.into_iter();
let mut sut = Sut::parse_with_context(bad_toks, asg);
assert_matches!(
sut.next(),
Some(Err(ParseError::StateError(AsgError::IdentTransition(_)))),
);
}
#[test]
fn ident_dep() {
let id = SPair("foo".into(), S1);
let dep = SPair("dep".into(), S2);
let toks = vec![Air::IdentDep(id, dep)].into_iter();
let mut sut = Sut::parse(toks);
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next());
let asg = sut.finalize().unwrap().into_context();
let ident_node = asg.lookup(id).expect("identifier was not added to graph");
let dep_node = asg.lookup(dep).expect("dep was not added to graph");
assert!(asg.has_dep(ident_node, dep_node));
}
#[test]
fn ident_fragment() {
let id = SPair("frag".into(), S1);
let kind = IdentKind::Tpl;
let src = Source {
src: Some("test/frag".into()),
..Default::default()
};
let frag = "fragment text".into();
let toks = vec![
// Identifier must be declared before it can be given a
// fragment.
Air::IdentDecl(id, kind.clone(), src.clone()),
Air::IdentFragment(id, frag),
]
.into_iter();
let mut sut = Sut::parse(toks);
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // IdentDecl
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // IdentFragment
let asg = sut.finalize().unwrap().into_context();
let ident_node = asg.lookup(id).expect("identifier was not added to graph");
let ident = asg.get(ident_node).unwrap();
assert_eq!(
Ok(ident),
Ident::declare(id)
.resolve(S1, kind.clone(), src.clone())
.and_then(|resolved| resolved.set_fragment(frag))
.as_ref(),
);
// Re-instantiate the parser and test an error by attempting to
// re-set the fragment.
let bad_toks = vec![Air::IdentFragment(id, frag)].into_iter();
let mut sut = Sut::parse_with_context(bad_toks, asg);
assert_matches!(
sut.next(),
Some(Err(ParseError::StateError(AsgError::IdentTransition(_)))),
);
}
// Adding a root before the identifier exists should add a
// `Ident::Missing`.
#[test]
fn ident_root_missing() {
let id = SPair("toroot".into(), S1);
let toks = vec![Air::IdentRoot(id)].into_iter();
let mut sut = Sut::parse(toks);
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next());
let asg = sut.finalize().unwrap().into_context();
let ident_node = asg
.lookup(id)
.expect("identifier was not added to the graph");
let ident = asg.get(ident_node).unwrap();
// The identifier did not previously exist,
// and so a missing node is created as a placeholder.
assert_eq!(&Ident::Missing(id), ident);
// And that missing identifier should be rooted.
assert!(asg.is_rooted(ident_node));
}
#[test]
fn ident_root_existing() {
let id = SPair("toroot".into(), S1);
let kind = IdentKind::Tpl;
let src = Source {
src: Some("test/root-existing".into()),
..Default::default()
};
// Ensure that it won't auto-root based on the kind,
// otherwise we won't be testing the right thing.
assert!(!kind.is_auto_root());
let toks = vec![
Air::IdentDecl(id, kind.clone(), src.clone()),
Air::IdentRoot(SPair(id.symbol(), S2)),
]
.into_iter();
let mut sut = Sut::parse(toks);
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // IdentDecl
assert_eq!(Some(Ok(Parsed::Incomplete)), sut.next()); // IdentRoot
let asg = sut.finalize().unwrap().into_context();
let ident_node = asg
.lookup(id)
.expect("identifier was not added to the graph");
let ident = asg.get(ident_node).unwrap();
// The previously-declared identifier...
assert_eq!(
Ok(ident),
Ident::declare(id)
.resolve(S1, kind.clone(), src.clone())
.as_ref()
);
// ...should have been subsequently rooted.
assert!(asg.is_rooted(ident_node));
}
tamer: Initial concept for AIR/ASG Expr This begins to place expressions on the graph---something that I've been thinking about for a couple of years now, so it's interesting to finally be doing it. This is going to evolve; I want to get some things committed so that it's clear how I'm moving forward. The ASG makes things a bit awkward for a number of reasons: 1. I'm dealing with older code where I had a different model of doing things; 2. It's mutable, rather than the mostly-functional lowering pipeline; 3. We're dealing with an aggregate ever-evolving blob of data (the graph) rather than a stream of tokens; and 4. We don't have as many type guarantees. I've shown with the lowering pipeline that I'm able to take a mutable reference and convert it into something that's both functional and performant, where I remove it from its container (an `Option`), create a new version of it, and place it back. Rust is able to optimize away the memcpys and such and just directly manipulate the underlying value, which is often a register with all of the inlining. _But_ this is a different scenario now. The lowering pipeline has a narrow context. The graph has to keep hitting memory. So we'll see how this goes. But it's most important to get this working and measure how it performs; I'm not trying to prematurely optimize. My attempts right now are for the way that I wish to develop. Speaking to #4 above, it also sucks that I'm not able to type the relationships between nodes on the graph. Rather, it's not that I _can't_, but a project to created a typed graph library is beyond the scope of this work and would take far too much time. I'll leave that to a personal, non-work project. Instead, I'm going to have to narrow the type any time the graph is accessed. And while that sucks, I'm going to do my best to encapsulate those details to make it as seamless as possible API-wise. The performance hit of performing the narrowing I'm hoping will be very small relative to all the business logic going on (a single cache miss is bound to be far more expensive than many narrowings which are just integer comparisons and branching)...but we'll see. Introducing branching sucks, but branch prediction is pretty damn good in modern CPUs. DEV-13160
2022-12-21 16:47:04 -05:00
#[test]
fn expr_empty_ident() {
tamer: Initial concept for AIR/ASG Expr This begins to place expressions on the graph---something that I've been thinking about for a couple of years now, so it's interesting to finally be doing it. This is going to evolve; I want to get some things committed so that it's clear how I'm moving forward. The ASG makes things a bit awkward for a number of reasons: 1. I'm dealing with older code where I had a different model of doing things; 2. It's mutable, rather than the mostly-functional lowering pipeline; 3. We're dealing with an aggregate ever-evolving blob of data (the graph) rather than a stream of tokens; and 4. We don't have as many type guarantees. I've shown with the lowering pipeline that I'm able to take a mutable reference and convert it into something that's both functional and performant, where I remove it from its container (an `Option`), create a new version of it, and place it back. Rust is able to optimize away the memcpys and such and just directly manipulate the underlying value, which is often a register with all of the inlining. _But_ this is a different scenario now. The lowering pipeline has a narrow context. The graph has to keep hitting memory. So we'll see how this goes. But it's most important to get this working and measure how it performs; I'm not trying to prematurely optimize. My attempts right now are for the way that I wish to develop. Speaking to #4 above, it also sucks that I'm not able to type the relationships between nodes on the graph. Rather, it's not that I _can't_, but a project to created a typed graph library is beyond the scope of this work and would take far too much time. I'll leave that to a personal, non-work project. Instead, I'm going to have to narrow the type any time the graph is accessed. And while that sucks, I'm going to do my best to encapsulate those details to make it as seamless as possible API-wise. The performance hit of performing the narrowing I'm hoping will be very small relative to all the business logic going on (a single cache miss is bound to be far more expensive than many narrowings which are just integer comparisons and branching)...but we'll see. Introducing branching sucks, but branch prediction is pretty damn good in modern CPUs. DEV-13160
2022-12-21 16:47:04 -05:00
let id = SPair("foo".into(), S2);
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1),
Air::IdentExpr(id),
Air::CloseExpr(S3),
];
let mut sut = Sut::parse(toks.into_iter());
assert!(sut.all(|x| x.is_ok()));
tamer: asg::air::AirAggregate: Initial impl of nested exprs This introduces a number of concepts together, again to demonstrate that they were derived. This introduces support for nested expressions, extending the previous work. It also supports error recovery for dangling expressions. The parser states are a mess; there is a lot of duplicate code here that needs refactoring, but I wanted to commit this first at a known-good state so that the diff will demonstrate the need for the change that will follow; the opportunities for abstraction are plainly visible. The immutable stack introduced here could be generalized, if needed, in the future. Another important note is that Rust optimizes away the `memcpy`s for the stack that was introduced here. The initial Parser Context was introduced because of `ArrayVec` inhibiting that elision, but Vec never had that problem. In the future, I may choose to go back and remove ArrayVec, but I had wanted to keep memory allocation out of the picture as much as possible to make the disassembly and call graph easier to reason about and to have confidence that optimizations were being performed as intended. With that said---it _should_ be eliding in tamec, since we're not doing anything meaningful yet with the graph. It does also elide in tameld, but it's possible that Rust recognizes that those code paths are never taken because tameld does nothing with expressions. So I'll have to monitor this as I progress and adjust accordingly; it's possible a future commit will call BS on everything I just said. Of course, the counter-point to that is that Rust is optimizing them away anyway, but Vec _does_ still require allocation; I was hoping to keep such allocation at the fringes. But another counter-point is that it _still_ is allocated at the fringe, when the context is initialized for the parser as part of the lowering pipeline. But I didn't know how that would all come together back then. ...alright, enough rambling. DEV-13160
2023-01-05 15:57:06 -05:00
let asg = sut.finalize().unwrap().into_context();
tamer: Initial concept for AIR/ASG Expr This begins to place expressions on the graph---something that I've been thinking about for a couple of years now, so it's interesting to finally be doing it. This is going to evolve; I want to get some things committed so that it's clear how I'm moving forward. The ASG makes things a bit awkward for a number of reasons: 1. I'm dealing with older code where I had a different model of doing things; 2. It's mutable, rather than the mostly-functional lowering pipeline; 3. We're dealing with an aggregate ever-evolving blob of data (the graph) rather than a stream of tokens; and 4. We don't have as many type guarantees. I've shown with the lowering pipeline that I'm able to take a mutable reference and convert it into something that's both functional and performant, where I remove it from its container (an `Option`), create a new version of it, and place it back. Rust is able to optimize away the memcpys and such and just directly manipulate the underlying value, which is often a register with all of the inlining. _But_ this is a different scenario now. The lowering pipeline has a narrow context. The graph has to keep hitting memory. So we'll see how this goes. But it's most important to get this working and measure how it performs; I'm not trying to prematurely optimize. My attempts right now are for the way that I wish to develop. Speaking to #4 above, it also sucks that I'm not able to type the relationships between nodes on the graph. Rather, it's not that I _can't_, but a project to created a typed graph library is beyond the scope of this work and would take far too much time. I'll leave that to a personal, non-work project. Instead, I'm going to have to narrow the type any time the graph is accessed. And while that sucks, I'm going to do my best to encapsulate those details to make it as seamless as possible API-wise. The performance hit of performing the narrowing I'm hoping will be very small relative to all the business logic going on (a single cache miss is bound to be far more expensive than many narrowings which are just integer comparisons and branching)...but we'll see. Introducing branching sucks, but branch prediction is pretty damn good in modern CPUs. DEV-13160
2022-12-21 16:47:04 -05:00
// The expression should have been bound to this identifier so that
// we're able to retrieve it from the graph by name.
tamer: asg::air::AirAggregate: Initial impl of nested exprs This introduces a number of concepts together, again to demonstrate that they were derived. This introduces support for nested expressions, extending the previous work. It also supports error recovery for dangling expressions. The parser states are a mess; there is a lot of duplicate code here that needs refactoring, but I wanted to commit this first at a known-good state so that the diff will demonstrate the need for the change that will follow; the opportunities for abstraction are plainly visible. The immutable stack introduced here could be generalized, if needed, in the future. Another important note is that Rust optimizes away the `memcpy`s for the stack that was introduced here. The initial Parser Context was introduced because of `ArrayVec` inhibiting that elision, but Vec never had that problem. In the future, I may choose to go back and remove ArrayVec, but I had wanted to keep memory allocation out of the picture as much as possible to make the disassembly and call graph easier to reason about and to have confidence that optimizations were being performed as intended. With that said---it _should_ be eliding in tamec, since we're not doing anything meaningful yet with the graph. It does also elide in tameld, but it's possible that Rust recognizes that those code paths are never taken because tameld does nothing with expressions. So I'll have to monitor this as I progress and adjust accordingly; it's possible a future commit will call BS on everything I just said. Of course, the counter-point to that is that Rust is optimizing them away anyway, but Vec _does_ still require allocation; I was hoping to keep such allocation at the fringes. But another counter-point is that it _still_ is allocated at the fringe, when the context is initialized for the parser as part of the lowering pipeline. But I didn't know how that would all come together back then. ...alright, enough rambling. DEV-13160
2023-01-05 15:57:06 -05:00
let expr = asg.expect_ident_obj::<Expr>(id);
assert_eq!(expr.span(), S1.merge(S3).unwrap());
}
#[test]
fn expr_non_empty_ident_root() {
tamer: asg::air::AirAggregate: Initial impl of nested exprs This introduces a number of concepts together, again to demonstrate that they were derived. This introduces support for nested expressions, extending the previous work. It also supports error recovery for dangling expressions. The parser states are a mess; there is a lot of duplicate code here that needs refactoring, but I wanted to commit this first at a known-good state so that the diff will demonstrate the need for the change that will follow; the opportunities for abstraction are plainly visible. The immutable stack introduced here could be generalized, if needed, in the future. Another important note is that Rust optimizes away the `memcpy`s for the stack that was introduced here. The initial Parser Context was introduced because of `ArrayVec` inhibiting that elision, but Vec never had that problem. In the future, I may choose to go back and remove ArrayVec, but I had wanted to keep memory allocation out of the picture as much as possible to make the disassembly and call graph easier to reason about and to have confidence that optimizations were being performed as intended. With that said---it _should_ be eliding in tamec, since we're not doing anything meaningful yet with the graph. It does also elide in tameld, but it's possible that Rust recognizes that those code paths are never taken because tameld does nothing with expressions. So I'll have to monitor this as I progress and adjust accordingly; it's possible a future commit will call BS on everything I just said. Of course, the counter-point to that is that Rust is optimizing them away anyway, but Vec _does_ still require allocation; I was hoping to keep such allocation at the fringes. But another counter-point is that it _still_ is allocated at the fringe, when the context is initialized for the parser as part of the lowering pipeline. But I didn't know how that would all come together back then. ...alright, enough rambling. DEV-13160
2023-01-05 15:57:06 -05:00
let id_a = SPair("foo".into(), S2);
let id_b = SPair("bar".into(), S2);
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1),
// Identifier while still empty...
Air::IdentExpr(id_a),
Air::OpenExpr(ExprOp::Sum, S3),
// (note that the inner expression _does not_ have an ident binding)
Air::CloseExpr(S4),
// ...and an identifier non-empty.
Air::IdentExpr(id_b),
Air::CloseExpr(S6),
];
let mut sut = Sut::parse(toks.into_iter());
assert!(sut.all(|x| x.is_ok()));
let asg = sut.finalize().unwrap().into_context();
let expr_a = asg.expect_ident_obj::<Expr>(id_a);
assert_eq!(expr_a.span(), S1.merge(S6).unwrap());
// Identifiers should reference the same expression.
let expr_b = asg.expect_ident_obj::<Expr>(id_b);
assert_eq!(expr_a, expr_b);
}
// Binding an identifier after a child expression means that the parser is
// creating an expression that is a child of a dangling expression,
// which only becomes reachable at the end.
#[test]
fn expr_non_empty_bind_only_after() {
let id = SPair("foo".into(), S2);
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1),
// Expression root is still dangling at this point.
Air::OpenExpr(ExprOp::Sum, S2),
Air::CloseExpr(S3),
// We only bind an identifier _after_ we've created the expression,
// which should cause the still-dangling root to become
// reachable.
Air::IdentExpr(id),
Air::CloseExpr(S5),
];
let mut sut = Sut::parse(toks.into_iter());
assert!(sut.all(|x| x.is_ok()));
let asg = sut.finalize().unwrap().into_context();
let expr = asg.expect_ident_obj::<Expr>(id);
assert_eq!(expr.span(), S1.merge(S5).unwrap());
tamer: Initial concept for AIR/ASG Expr This begins to place expressions on the graph---something that I've been thinking about for a couple of years now, so it's interesting to finally be doing it. This is going to evolve; I want to get some things committed so that it's clear how I'm moving forward. The ASG makes things a bit awkward for a number of reasons: 1. I'm dealing with older code where I had a different model of doing things; 2. It's mutable, rather than the mostly-functional lowering pipeline; 3. We're dealing with an aggregate ever-evolving blob of data (the graph) rather than a stream of tokens; and 4. We don't have as many type guarantees. I've shown with the lowering pipeline that I'm able to take a mutable reference and convert it into something that's both functional and performant, where I remove it from its container (an `Option`), create a new version of it, and place it back. Rust is able to optimize away the memcpys and such and just directly manipulate the underlying value, which is often a register with all of the inlining. _But_ this is a different scenario now. The lowering pipeline has a narrow context. The graph has to keep hitting memory. So we'll see how this goes. But it's most important to get this working and measure how it performs; I'm not trying to prematurely optimize. My attempts right now are for the way that I wish to develop. Speaking to #4 above, it also sucks that I'm not able to type the relationships between nodes on the graph. Rather, it's not that I _can't_, but a project to created a typed graph library is beyond the scope of this work and would take far too much time. I'll leave that to a personal, non-work project. Instead, I'm going to have to narrow the type any time the graph is accessed. And while that sucks, I'm going to do my best to encapsulate those details to make it as seamless as possible API-wise. The performance hit of performing the narrowing I'm hoping will be very small relative to all the business logic going on (a single cache miss is bound to be far more expensive than many narrowings which are just integer comparisons and branching)...but we'll see. Introducing branching sucks, but branch prediction is pretty damn good in modern CPUs. DEV-13160
2022-12-21 16:47:04 -05:00
}
// Danging expressions are unreachable and therefore not useful
// constructions.
// Prohibit them,
// since they're either mistakes or misconceptions.
#[test]
tamer: asg::air::AirAggregate: Initial impl of nested exprs This introduces a number of concepts together, again to demonstrate that they were derived. This introduces support for nested expressions, extending the previous work. It also supports error recovery for dangling expressions. The parser states are a mess; there is a lot of duplicate code here that needs refactoring, but I wanted to commit this first at a known-good state so that the diff will demonstrate the need for the change that will follow; the opportunities for abstraction are plainly visible. The immutable stack introduced here could be generalized, if needed, in the future. Another important note is that Rust optimizes away the `memcpy`s for the stack that was introduced here. The initial Parser Context was introduced because of `ArrayVec` inhibiting that elision, but Vec never had that problem. In the future, I may choose to go back and remove ArrayVec, but I had wanted to keep memory allocation out of the picture as much as possible to make the disassembly and call graph easier to reason about and to have confidence that optimizations were being performed as intended. With that said---it _should_ be eliding in tamec, since we're not doing anything meaningful yet with the graph. It does also elide in tameld, but it's possible that Rust recognizes that those code paths are never taken because tameld does nothing with expressions. So I'll have to monitor this as I progress and adjust accordingly; it's possible a future commit will call BS on everything I just said. Of course, the counter-point to that is that Rust is optimizing them away anyway, but Vec _does_ still require allocation; I was hoping to keep such allocation at the fringes. But another counter-point is that it _still_ is allocated at the fringe, when the context is initialized for the parser as part of the lowering pipeline. But I didn't know how that would all come together back then. ...alright, enough rambling. DEV-13160
2023-01-05 15:57:06 -05:00
fn expr_dangling_no_subexpr() {
tamer: Initial concept for AIR/ASG Expr This begins to place expressions on the graph---something that I've been thinking about for a couple of years now, so it's interesting to finally be doing it. This is going to evolve; I want to get some things committed so that it's clear how I'm moving forward. The ASG makes things a bit awkward for a number of reasons: 1. I'm dealing with older code where I had a different model of doing things; 2. It's mutable, rather than the mostly-functional lowering pipeline; 3. We're dealing with an aggregate ever-evolving blob of data (the graph) rather than a stream of tokens; and 4. We don't have as many type guarantees. I've shown with the lowering pipeline that I'm able to take a mutable reference and convert it into something that's both functional and performant, where I remove it from its container (an `Option`), create a new version of it, and place it back. Rust is able to optimize away the memcpys and such and just directly manipulate the underlying value, which is often a register with all of the inlining. _But_ this is a different scenario now. The lowering pipeline has a narrow context. The graph has to keep hitting memory. So we'll see how this goes. But it's most important to get this working and measure how it performs; I'm not trying to prematurely optimize. My attempts right now are for the way that I wish to develop. Speaking to #4 above, it also sucks that I'm not able to type the relationships between nodes on the graph. Rather, it's not that I _can't_, but a project to created a typed graph library is beyond the scope of this work and would take far too much time. I'll leave that to a personal, non-work project. Instead, I'm going to have to narrow the type any time the graph is accessed. And while that sucks, I'm going to do my best to encapsulate those details to make it as seamless as possible API-wise. The performance hit of performing the narrowing I'm hoping will be very small relative to all the business logic going on (a single cache miss is bound to be far more expensive than many narrowings which are just integer comparisons and branching)...but we'll see. Introducing branching sucks, but branch prediction is pretty damn good in modern CPUs. DEV-13160
2022-12-21 16:47:04 -05:00
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1),
// No `IdentExpr`,
// so this expression is dangling.
Air::CloseExpr(S2),
];
// The error span should encompass the entire expression.
let full_span = S1.merge(S2).unwrap();
assert_eq!(
vec![
Ok(Parsed::Incomplete),
Err(ParseError::StateError(AsgError::DanglingExpr(full_span)))
],
Sut::parse(toks.into_iter()).collect::<Vec<_>>(),
);
tamer: asg::air::AirAggregate: Initial impl of nested exprs This introduces a number of concepts together, again to demonstrate that they were derived. This introduces support for nested expressions, extending the previous work. It also supports error recovery for dangling expressions. The parser states are a mess; there is a lot of duplicate code here that needs refactoring, but I wanted to commit this first at a known-good state so that the diff will demonstrate the need for the change that will follow; the opportunities for abstraction are plainly visible. The immutable stack introduced here could be generalized, if needed, in the future. Another important note is that Rust optimizes away the `memcpy`s for the stack that was introduced here. The initial Parser Context was introduced because of `ArrayVec` inhibiting that elision, but Vec never had that problem. In the future, I may choose to go back and remove ArrayVec, but I had wanted to keep memory allocation out of the picture as much as possible to make the disassembly and call graph easier to reason about and to have confidence that optimizations were being performed as intended. With that said---it _should_ be eliding in tamec, since we're not doing anything meaningful yet with the graph. It does also elide in tameld, but it's possible that Rust recognizes that those code paths are never taken because tameld does nothing with expressions. So I'll have to monitor this as I progress and adjust accordingly; it's possible a future commit will call BS on everything I just said. Of course, the counter-point to that is that Rust is optimizing them away anyway, but Vec _does_ still require allocation; I was hoping to keep such allocation at the fringes. But another counter-point is that it _still_ is allocated at the fringe, when the context is initialized for the parser as part of the lowering pipeline. But I didn't know how that would all come together back then. ...alright, enough rambling. DEV-13160
2023-01-05 15:57:06 -05:00
}
#[test]
fn expr_dangling_with_subexpr() {
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1),
// Expression root is still dangling at this point.
Air::OpenExpr(ExprOp::Sum, S2),
Air::CloseExpr(S3),
// Still no ident binding,
// so root should still be dangling.
Air::CloseExpr(S4),
];
let full_span = S1.merge(S4).unwrap();
assert_eq!(
vec![
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Err(ParseError::StateError(AsgError::DanglingExpr(full_span)))
],
Sut::parse(toks.into_iter()).collect::<Vec<_>>(),
);
}
#[test]
fn expr_dangling_with_subexpr_ident() {
let id = SPair("foo".into(), S3);
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1),
// Expression root is still dangling at this point.
Air::OpenExpr(ExprOp::Sum, S2),
// The _inner_ expression receives an identifier,
// but that should have no impact on the dangling status of the
// root,
// especially given that subexpressions are always reachable
// anyway.
Air::IdentExpr(id),
Air::CloseExpr(S4),
// But the root still has no ident binding,
// and so should still be dangling.
Air::CloseExpr(S5),
];
let full_span = S1.merge(S5).unwrap();
assert_eq!(
vec![
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Err(ParseError::StateError(AsgError::DanglingExpr(full_span)))
],
Sut::parse(toks.into_iter()).collect::<Vec<_>>(),
);
}
// Ensure that the parser correctly recognizes dangling expressions after
// having encountered a reachable expression.
// Ideally the parser will have been written to make this impossible,
// but this also protects against potential future breakages.
#[test]
fn expr_reachable_subsequent_dangling() {
let id = SPair("foo".into(), S2);
let toks = vec![
// Reachable
Air::OpenExpr(ExprOp::Sum, S1),
Air::IdentExpr(id),
Air::CloseExpr(S3),
// Dangling
Air::OpenExpr(ExprOp::Sum, S4),
Air::CloseExpr(S5),
];
// The error span should encompass the entire expression.
// TODO: ...let's actually have something inside this expression.
let second_span = S4.merge(S5).unwrap();
assert_eq!(
vec![
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Err(ParseError::StateError(AsgError::DanglingExpr(second_span)))
],
Sut::parse(toks.into_iter()).collect::<Vec<_>>(),
);
}
// Recovery from dangling expression.
#[test]
fn recovery_expr_reachable_after_dangling() {
let id = SPair("foo".into(), S4);
let toks = vec![
// Dangling
Air::OpenExpr(ExprOp::Sum, S1),
Air::CloseExpr(S2),
// Reachable, after error from dangling.
Air::OpenExpr(ExprOp::Sum, S3),
Air::IdentExpr(id),
Air::CloseExpr(S5),
];
// The error span should encompass the entire expression.
let err_span = S1.merge(S2).unwrap();
let mut sut = Sut::parse(toks.into_iter());
assert_eq!(
vec![
Ok(Parsed::Incomplete),
Err(ParseError::StateError(AsgError::DanglingExpr(err_span))),
// Recovery allows us to continue at this point with the next
// expression.
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
Ok(Parsed::Incomplete),
],
sut.by_ref().collect::<Vec<_>>(),
);
let asg = sut.finalize().unwrap().into_context();
tamer: Initial concept for AIR/ASG Expr This begins to place expressions on the graph---something that I've been thinking about for a couple of years now, so it's interesting to finally be doing it. This is going to evolve; I want to get some things committed so that it's clear how I'm moving forward. The ASG makes things a bit awkward for a number of reasons: 1. I'm dealing with older code where I had a different model of doing things; 2. It's mutable, rather than the mostly-functional lowering pipeline; 3. We're dealing with an aggregate ever-evolving blob of data (the graph) rather than a stream of tokens; and 4. We don't have as many type guarantees. I've shown with the lowering pipeline that I'm able to take a mutable reference and convert it into something that's both functional and performant, where I remove it from its container (an `Option`), create a new version of it, and place it back. Rust is able to optimize away the memcpys and such and just directly manipulate the underlying value, which is often a register with all of the inlining. _But_ this is a different scenario now. The lowering pipeline has a narrow context. The graph has to keep hitting memory. So we'll see how this goes. But it's most important to get this working and measure how it performs; I'm not trying to prematurely optimize. My attempts right now are for the way that I wish to develop. Speaking to #4 above, it also sucks that I'm not able to type the relationships between nodes on the graph. Rather, it's not that I _can't_, but a project to created a typed graph library is beyond the scope of this work and would take far too much time. I'll leave that to a personal, non-work project. Instead, I'm going to have to narrow the type any time the graph is accessed. And while that sucks, I'm going to do my best to encapsulate those details to make it as seamless as possible API-wise. The performance hit of performing the narrowing I'm hoping will be very small relative to all the business logic going on (a single cache miss is bound to be far more expensive than many narrowings which are just integer comparisons and branching)...but we'll see. Introducing branching sucks, but branch prediction is pretty damn good in modern CPUs. DEV-13160
2022-12-21 16:47:04 -05:00
tamer: asg::air::AirAggregate: Initial impl of nested exprs This introduces a number of concepts together, again to demonstrate that they were derived. This introduces support for nested expressions, extending the previous work. It also supports error recovery for dangling expressions. The parser states are a mess; there is a lot of duplicate code here that needs refactoring, but I wanted to commit this first at a known-good state so that the diff will demonstrate the need for the change that will follow; the opportunities for abstraction are plainly visible. The immutable stack introduced here could be generalized, if needed, in the future. Another important note is that Rust optimizes away the `memcpy`s for the stack that was introduced here. The initial Parser Context was introduced because of `ArrayVec` inhibiting that elision, but Vec never had that problem. In the future, I may choose to go back and remove ArrayVec, but I had wanted to keep memory allocation out of the picture as much as possible to make the disassembly and call graph easier to reason about and to have confidence that optimizations were being performed as intended. With that said---it _should_ be eliding in tamec, since we're not doing anything meaningful yet with the graph. It does also elide in tameld, but it's possible that Rust recognizes that those code paths are never taken because tameld does nothing with expressions. So I'll have to monitor this as I progress and adjust accordingly; it's possible a future commit will call BS on everything I just said. Of course, the counter-point to that is that Rust is optimizing them away anyway, but Vec _does_ still require allocation; I was hoping to keep such allocation at the fringes. But another counter-point is that it _still_ is allocated at the fringe, when the context is initialized for the parser as part of the lowering pipeline. But I didn't know how that would all come together back then. ...alright, enough rambling. DEV-13160
2023-01-05 15:57:06 -05:00
// Let's make sure that we _actually_ added it to the graph,
// despite the previous error.
let expr = asg.expect_ident_obj::<Expr>(id);
assert_eq!(expr.span(), S3.merge(S5).unwrap());
// The dangling expression may or may not be on the graph,
// but it doesn't matter;
// we cannot reference it
// (unless we break abstraction and walk the underlying graph).
// Let's leave this undefined so that we have flexibility in what we
// decide to do in the future.
// So we end here.
tamer: Initial concept for AIR/ASG Expr This begins to place expressions on the graph---something that I've been thinking about for a couple of years now, so it's interesting to finally be doing it. This is going to evolve; I want to get some things committed so that it's clear how I'm moving forward. The ASG makes things a bit awkward for a number of reasons: 1. I'm dealing with older code where I had a different model of doing things; 2. It's mutable, rather than the mostly-functional lowering pipeline; 3. We're dealing with an aggregate ever-evolving blob of data (the graph) rather than a stream of tokens; and 4. We don't have as many type guarantees. I've shown with the lowering pipeline that I'm able to take a mutable reference and convert it into something that's both functional and performant, where I remove it from its container (an `Option`), create a new version of it, and place it back. Rust is able to optimize away the memcpys and such and just directly manipulate the underlying value, which is often a register with all of the inlining. _But_ this is a different scenario now. The lowering pipeline has a narrow context. The graph has to keep hitting memory. So we'll see how this goes. But it's most important to get this working and measure how it performs; I'm not trying to prematurely optimize. My attempts right now are for the way that I wish to develop. Speaking to #4 above, it also sucks that I'm not able to type the relationships between nodes on the graph. Rather, it's not that I _can't_, but a project to created a typed graph library is beyond the scope of this work and would take far too much time. I'll leave that to a personal, non-work project. Instead, I'm going to have to narrow the type any time the graph is accessed. And while that sucks, I'm going to do my best to encapsulate those details to make it as seamless as possible API-wise. The performance hit of performing the narrowing I'm hoping will be very small relative to all the business logic going on (a single cache miss is bound to be far more expensive than many narrowings which are just integer comparisons and branching)...but we'll see. Introducing branching sucks, but branch prediction is pretty damn good in modern CPUs. DEV-13160
2022-12-21 16:47:04 -05:00
}
#[test]
fn expr_close_unbalanced() {
let id = SPair("foo".into(), S3);
let toks = vec![
// Close before _any_ open.
Air::CloseExpr(S1),
// Should recover,
// allowing for a normal expr.
Air::OpenExpr(ExprOp::Sum, S2),
Air::IdentExpr(id),
Air::CloseExpr(S4),
// And now an extra close _after_ a valid expr.
Air::CloseExpr(S5),
];
let mut sut = Sut::parse(toks.into_iter());
assert_eq!(
vec![
Err(ParseError::StateError(AsgError::UnbalancedExpr(S1))),
// Recovery should allow us to continue.
Ok(Parsed::Incomplete), // OpenExpr
Ok(Parsed::Incomplete), // IdentExpr
Ok(Parsed::Incomplete), // CloseExpr
// Another error after a successful expression.
Err(ParseError::StateError(AsgError::UnbalancedExpr(S5))),
],
sut.by_ref().collect::<Vec<_>>(),
);
let asg = sut.finalize().unwrap().into_context();
// Just verify that the expression was successfully added after recovery.
let expr = asg.expect_ident_obj::<Expr>(id);
assert_eq!(expr.span(), S2.merge(S4).unwrap());
}
#[test]
fn expr_bind_to_empty() {
let id_noexpr_a = SPair("noexpr_a".into(), S1);
let id_good = SPair("noexpr".into(), S3);
let id_noexpr_b = SPair("noexpr_b".into(), S5);
let toks = vec![
// No open expression to bind to.
Air::IdentExpr(id_noexpr_a),
// Post-recovery create an expression.
Air::OpenExpr(ExprOp::Sum, S2),
Air::IdentExpr(id_good),
Air::CloseExpr(S4),
// Once again we have nothing to bind to.
Air::IdentExpr(id_noexpr_b),
];
let mut sut = Sut::parse(toks.into_iter());
assert_eq!(
vec![
Err(ParseError::StateError(AsgError::InvalidExprBindContext(
id_noexpr_a
))),
// Recovery should allow us to continue.
Ok(Parsed::Incomplete), // OpenExpr
Ok(Parsed::Incomplete), // IdentExpr
Ok(Parsed::Incomplete), // CloseExpr
// Another error after a successful expression.
Err(ParseError::StateError(AsgError::InvalidExprBindContext(
id_noexpr_b
))),
],
sut.by_ref().collect::<Vec<_>>(),
);
let asg = sut.finalize().unwrap().into_context();
// Neither of the identifiers outside of expressions should exist on the
// graph.
assert_eq!(None, asg.get_ident_obj::<Expr>(id_noexpr_a));
assert_eq!(None, asg.get_ident_obj::<Expr>(id_noexpr_b));
// Verify that the expression was successfully added after recovery.
let expr = asg.expect_ident_obj::<Expr>(id_good);
assert_eq!(expr.span(), S2.merge(S4).unwrap());
}
// Subexpressions should not only have edges to their parent,
// but those edges ought to be ordered,
// allowing TAME to handle non-commutative expressions.
// We must further understand the relative order in which edges are stored
// for non-associative expressions.
#[test]
fn sibling_subexprs_have_ordered_edges_to_parent() {
let id_root = SPair("root".into(), S1);
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1),
// Identify the root so that it is not dangling.
Air::IdentExpr(id_root),
// Sibling A
Air::OpenExpr(ExprOp::Sum, S3),
Air::CloseExpr(S4),
// Sibling B
Air::OpenExpr(ExprOp::Sum, S5),
Air::CloseExpr(S6),
// Sibling C
Air::OpenExpr(ExprOp::Sum, S7),
Air::CloseExpr(S8),
Air::CloseExpr(S9),
];
let asg = asg_from_toks(toks);
// The root is the parent expression that should contain edges to each
// subexpression
// (the siblings above).
// Note that we retrieve its _index_,
// not the object itself.
let oi_root = asg.expect_ident_oi::<Expr>(id_root);
let siblings = oi_root
.edges::<Expr>(&asg)
.map(|oi| oi.resolve(&asg))
.collect::<Vec<_>>();
// The reversal here is an implementation detail with regards to how
// Petgraph stores its edges as effectively linked lists,
// using node indices instead of pointers.
// It is very important that we understand this behavior.
assert_eq!(siblings.len(), 3);
assert_eq!(siblings[2].span(), S3.merge(S4).unwrap());
assert_eq!(siblings[1].span(), S5.merge(S6).unwrap());
assert_eq!(siblings[0].span(), S7.merge(S8).unwrap());
}
#[test]
fn nested_subexprs_related_to_relative_parent() {
let id_root = SPair("root".into(), S1);
let id_suba = SPair("suba".into(), S2);
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1), // 0
Air::IdentExpr(id_root),
Air::OpenExpr(ExprOp::Sum, S2), // 1
Air::IdentExpr(id_suba),
Air::OpenExpr(ExprOp::Sum, S3), // 2
Air::CloseExpr(S4),
Air::CloseExpr(S5),
Air::CloseExpr(S6),
];
let asg = asg_from_toks(toks);
let oi_0 = asg.expect_ident_oi::<Expr>(id_root);
let subexprs_0 = collect_subexprs(&asg, oi_0);
// Subexpr 1
assert_eq!(subexprs_0.len(), 1);
let (oi_1, subexpr_1) = subexprs_0[0];
assert_eq!(subexpr_1.span(), S2.merge(S5).unwrap());
let subexprs_1 = collect_subexprs(&asg, oi_1);
// Subexpr 2
assert_eq!(subexprs_1.len(), 1);
let (_, subexpr_2) = subexprs_1[0];
assert_eq!(subexpr_2.span(), S3.merge(S4).unwrap());
}
#[test]
fn expr_redefine_ident() {
// Same identifier but with different spans
// (which would be the case in the real world).
let id_first = SPair("foo".into(), S2);
let id_dup = SPair("foo".into(), S3);
let toks = vec![
Air::OpenExpr(ExprOp::Sum, S1),
Air::IdentExpr(id_first),
Air::OpenExpr(ExprOp::Sum, S3),
Air::IdentExpr(id_dup),
Air::CloseExpr(S4),
Air::CloseExpr(S5),
];
let mut sut = Sut::parse(toks.into_iter());
assert_eq!(
vec![
Ok(Parsed::Incomplete), // OpenExpr
Ok(Parsed::Incomplete), // IdentExpr (first)
Ok(Parsed::Incomplete), // OpenExpr
Err(ParseError::StateError(AsgError::IdentRedefine(
id_first,
id_dup.span(),
))),
// RECOVERY: Ignore the attempt to redefine and continue.
Ok(Parsed::Incomplete), // CloseExpr
Ok(Parsed::Incomplete), // CloseExpr
],
sut.by_ref().collect::<Vec<_>>(),
);
let asg = sut.finalize().unwrap().into_context();
// The identifier should continue to reference the first expression.
let expr = asg.expect_ident_obj::<Expr>(id_first);
assert_eq!(expr.span(), S1.merge(S5).unwrap());
}
// Similar to the above test,
// but with two entirely separate expressions,
// such that a failure to identify an expression ought to leave it in an
// unreachable state.
#[test]
fn expr_still_dangling_on_redefine() {
// Same identifier but with different spans
// (which would be the case in the real world).
let id_first = SPair("foo".into(), S2);
let id_dup = SPair("foo".into(), S5);
let id_dup2 = SPair("foo".into(), S8);
let id_second = SPair("bar".into(), S9);
let toks = vec![
// First expr (OK)
Air::OpenExpr(ExprOp::Sum, S1),
Air::IdentExpr(id_first),
Air::CloseExpr(S3),
// Second expr should still dangle due to use of duplicate
// identifier
Air::OpenExpr(ExprOp::Sum, S4),
Air::IdentExpr(id_dup),
Air::CloseExpr(S6),
// Third expr will error on redefine but then be successful.
// This probably won't happen in practice with TAME's original
// source language,
// but could happen at e.g. a REPL.
Air::OpenExpr(ExprOp::Sum, S7),
Air::IdentExpr(id_dup2), // fail
Air::IdentExpr(id_second), // succeed
Air::CloseExpr(S10),
];
let mut sut = Sut::parse(toks.into_iter());
assert_eq!(
vec![
Ok(Parsed::Incomplete), // OpenExpr
Ok(Parsed::Incomplete), // IdentExpr (first)
Ok(Parsed::Incomplete), // CloseExpr
// Beginning of second expression
Ok(Parsed::Incomplete), // OpenExpr
Err(ParseError::StateError(AsgError::IdentRedefine(
id_first,
id_dup.span(),
))),
// RECOVERY: Ignore the attempt to redefine and continue.
// ...but then immediately fail _again_ because we've closed a
// dangling expression.
Err(ParseError::StateError(AsgError::DanglingExpr(
S4.merge(S6).unwrap()
))),
// RECOVERY: But we'll continue onto one final expression,
// which we will fail to define but then subsequently define
// successfully.
Ok(Parsed::Incomplete), // OpenExpr
Err(ParseError::StateError(AsgError::IdentRedefine(
id_first,
id_dup2.span(),
))),
// RECOVERY: Despite the initial failure,
// we can now re-attempt to bind with a unique id.
Ok(Parsed::Incomplete), // IdentExpr (second)
Ok(Parsed::Incomplete), // CloseExpr
],
sut.by_ref().collect::<Vec<_>>(),
);
let asg = sut.finalize().unwrap().into_context();
// The identifier should continue to reference the first expression.
let expr = asg.expect_ident_obj::<Expr>(id_first);
assert_eq!(expr.span(), S1.merge(S3).unwrap());
// There's nothing we can do using the ASG's public API at the time of
// writing to try to reference the dangling expression.
// The second identifier should have been successfully bound despite the
// failed initial attempt.
let expr = asg.expect_ident_obj::<Expr>(id_second);
assert_eq!(expr.span(), S7.merge(S10).unwrap());
}
fn asg_from_toks<I: IntoIterator<Item = Air>>(toks: I) -> Asg
where
I::IntoIter: Debug,
{
let mut sut = Sut::parse(toks.into_iter());
assert!(sut.all(|x| x.is_ok()));
sut.finalize().unwrap().into_context()
}
fn collect_subexprs<O: ObjectKind>(
asg: &Asg,
oi: ObjectIndex<O>,
) -> Vec<(ObjectIndex<O>, &O)>
where
O: ObjectRelTo<O>,
{
oi.edges::<O>(&asg)
.map(|oi| (oi, oi.resolve(&asg)))
.collect::<Vec<_>>()
}