// ASG IR
//
// Copyright (C) 2014-2022 Ryan Specialty Group, LLC.
//
// This file is part of TAME.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see .
use super::{
Asg, AsgError, ExprOp, FragmentText, IdentKind, ObjectIndex, Source,
};
use crate::{
asg::Expr,
f::Functor,
fmt::{DisplayWrapper, TtQuote},
parse::{self, util::SPair, ParseState, Token, Transition, Transitionable},
span::{Span, UNKNOWN_SPAN},
sym::SymbolId,
};
use std::fmt::{Debug, Display};
///! Intermediate representation for construction of the
///! [abstract semantic graph (ASG)](super) (AIR).
///!
///! AIR serves as an abstraction layer between higher-level parsers and the
///! aggregate ASG.
///! It allows parsers to operate as a raw stream of data without having to
///! worry about ownership of or references to the ASG,
///! and allows for multiple such parsers to be joined.
///!
///! AIR is _not_ intended to replace the API of the ASG---it
///! is intended as a termination point for the parsing pipeline,
///! and as such implements a subset of the ASG's API that is suitable
///! for aggregating raw data from source and object files.
///! Given that it does so little and is so close to the [`Asg`] API,
///! one might say that the abstraction is as light as air,
///! but that would surely result in face-palming and so we're not going
///! air such cringeworthy dad jokes here.
pub type IdentSym = SymbolId;
pub type DepSym = SymbolId;
/// AIR token.
///
/// These tokens mimic a public API for the ASG,
/// and allow parsers to be completely decoupled from the ASG object that
/// they will eventually aggregate data into.
///
/// This IR is not intended to perform sophisticated manipulation of the
/// ASG---it
/// is intended to perform initial aggregation as part of a parsing
/// phase,
/// populating the ASG with the raw data that that will be
/// subsequently analyzed and rewritten.
#[derive(Debug, PartialEq)]
pub enum Air {
/// Placeholder token for objects that do not yet have a proper place on
/// the ASG.
Todo,
/// Create a new [`Expr`] on the graph and place it atop of the
/// expression stack.
///
/// If there was previously an expression ρ atop of the stack before
/// this operation,
/// a reference to this new expression will be automatically added
/// to ρ,
/// treating it as a child expression.
/// Otherwise,
/// the expression will be dangling unless bound to an identifier,
/// which will produce an error.
///
/// All expressions have an associated [`ExprOp`] that determines how
/// the expression will be evaluated.
/// An expression is associated with a source location,
/// but is anonymous unless assigned an identifier using
/// [`Air::IdentExpr`].
///
/// Expressions are composed of references to other expressions.
OpenExpr(ExprOp, Span),
/// Complete the expression atop of the expression stack and pop it from
/// the stack.
CloseExpr(Span),
/// Assign an identifier to the expression atop of the expression stack.
///
/// An expression may be bound to multiple identifiers,
/// but an identifier can only be bound to a single expression.
/// Binding an identifier will declare it.
IdentExpr(SPair),
/// Declare a resolved identifier.
IdentDecl(SPair, IdentKind, Source),
/// Declare an external identifier that must be resolved before linking.
IdentExternDecl(SPair, IdentKind, Source),
/// Declare that an identifier depends on another for its definition.
///
/// The first identifier will depend on the second
/// (`0 -> 1`).
/// The spans associated with each [`SPair`] will be used
/// if the respective identifier has not yet been defined.
IdentDep(SPair, SPair),
/// Associate a code fragment with an identifier.
///
/// A fragment does not have an associated span because it is
/// conceptually associated with all the spans from which it is
/// derived;
/// the format of the object file will change in the future to
/// retain this information.
IdentFragment(SPair, FragmentText),
/// Root an identifier at the request of some entity at the associated
/// span of the [`SPair`].
///
/// Rooting is caused by _something_,
/// and the span is intended to aid in tracking down why rooting
/// occurred.
IdentRoot(SPair),
}
impl Token for Air {
fn ir_name() -> &'static str {
"AIR"
}
fn span(&self) -> crate::span::Span {
use Air::*;
match self {
Todo => UNKNOWN_SPAN,
OpenExpr(_, span) | CloseExpr(span) => *span,
IdentExpr(spair)
| IdentDecl(spair, _, _)
| IdentExternDecl(spair, _, _)
| IdentDep(spair, _)
| IdentFragment(spair, _)
| IdentRoot(spair) => spair.span(),
}
}
}
impl parse::Object for Air {}
impl Display for Air {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use Air::*;
match self {
Todo => write!(f, "TODO"),
OpenExpr(op, _) => write!(f, "open {op} expression"),
CloseExpr(_) => write!(f, "close expression"),
IdentExpr(id) => {
write!(f, "identify expression as {}", TtQuote::wrap(id))
}
IdentDecl(spair, _, _) => {
write!(f, "declaration of identifier {}", TtQuote::wrap(spair))
}
IdentExternDecl(spair, _, _) => {
write!(
f,
"declaration of external identifier {}",
TtQuote::wrap(spair)
)
}
IdentDep(isym, dsym) => write!(
f,
// TODO: Use list wrapper
"declaration of identifier dependency `{isym} -> {dsym}`"
),
IdentFragment(depsym, _text) => {
write!(f, "identifier {}` fragment text", TtQuote::wrap(depsym))
}
IdentRoot(sym) => {
write!(f, "rooting of identifier {}", TtQuote::wrap(sym))
}
}
}
}
/// Stack of held expressions,
/// with the root expression at the bottom of the stack.
///
/// Expression [`ObjectIndex`]es are pushed onto this stack when
/// parsing a subexpression,
/// and are popped when the subexpression terminates.
/// The active expression is _not_ stored on this stack to avoid unnecessary
/// indirection.
///
/// Despite the immutable interface,
/// this does modify the inner [`Vec`] in-place;
/// it does not reallocate unless its capacity has been reached.
///
/// Unlike other parts of the system,
/// this is heap-allocated,
/// but should be very cache-friendly.
/// This reason for heap allocation is that this is explicitly
/// _unbounded_—systems like code generators ought to be able to output
/// expressions in a tacit style without worrying about arbitrary limits.
/// It is worth noting that the other parts of the system using
/// stack-allocated data structures is less about performance and more
/// about the simplicity afforded by keeping allocators out of the picture.
/// We'll address performance issues if they appear during profiling.
///
/// Another benefit of using [`Vec`] here is that Rust is able to properly
/// optimize away `memcpy`s for it,
/// rather than having to utilize the parser's mutable context.
/// Further,
/// the ASG is heap-allocated,
/// so we're not avoiding the heap anyway.
///
/// The interface is modeled after [Haskell's `Stack`][haskell-stack],
/// with a slight variation for [`Self::pop`] so that we can avoid
/// reallocation after a stack is used up,
/// which is frequent.
///
/// [haskell-stack]: https://hackage.haskell.org/package/Stack/docs/Data-Stack.html
#[derive(Debug, PartialEq, Eq)]
pub struct ExprStack(Vec>);
impl ExprStack {
fn push(self, item: ObjectIndex) -> Self {
let Self(mut stack) = self;
stack.push(item);
Self(stack)
}
/// Attempt to remove an item from the stack,
/// returning a new stack and the item,
/// if any.
///
/// This returns a new [`Self`] even if it is empty so that it can be
/// reused without having to reallocate.
fn pop(self) -> (Self, Option>) {
let Self(mut stack) = self;
let oi = stack.pop();
(Self(stack), oi)
}
/// Whether the current expression being parsed is the root expression.
///
/// This simply means that the stack is empty.
fn is_at_root(&self) -> bool {
matches!(self, Self(stack) if stack.is_empty())
}
}
impl Default for ExprStack {
fn default() -> Self {
// TODO: 16 is a generous guess that is very unlikely to be exceeded
// in practice at the time of writing,
// even with template expansion,
// but let's develop an informed heuristic.
// Note that this is very unlikely to make a difference;
// I just don't like using numbers without data to back them up.
Self(Vec::with_capacity(16))
}
}
/// AIR parser state.
#[derive(Debug, PartialEq, Eq)]
pub enum AirAggregate {
/// Parser is not currently performing any work.
///
/// This state is accepting iff the inner [`ExprStack`] is empty.
Empty(ExprStack),
/// Building an expression whose root is yet not reachable from any
/// other object.
///
/// Dangling expressions are expected to transition into
/// [`Self::ReachableExpr`] after being bound to an identifier.
/// Closing a dangling expression will result in a
/// [`AsgError::DanglingExpr`].
DanglingExpr(ExprStack, ObjectIndex),
/// Building an expression that is reachable from another object.
///
/// See also [`Self::DanglingExpr`].
ReachableExpr(ExprStack, ObjectIndex),
}
impl Default for AirAggregate {
fn default() -> Self {
Self::Empty(ExprStack::default())
}
}
impl Display for AirAggregate {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use AirAggregate::*;
match self {
Empty(es) => write!(f, "awaiting AIR input for ASG with {es}"),
DanglingExpr(es, _) => {
write!(f, "building dangling expression with {es}")
}
ReachableExpr(es, _) => {
write!(f, "building reachable expression with {es}")
}
}
}
}
impl Display for ExprStack {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let Self(stack) = self;
write!(f, "expression stack length of {}", stack.len())
}
}
impl ParseState for AirAggregate {
type Token = Air;
type Object = ();
type Error = AsgError;
/// Destination [`Asg`] that this parser lowers into.
///
/// This ASG will be yielded by [`parse::Parser::finalize`].
type Context = Asg;
fn parse_token(
self,
tok: Self::Token,
asg: &mut Self::Context,
) -> crate::parse::TransitionResult {
use Air::*;
use AirAggregate::*;
match (self, tok) {
(st, Todo) => Transition(st).incomplete(),
(Empty(es), OpenExpr(op, span)) => {
let oi = asg.create(Expr::new(op, span));
Transition(DanglingExpr(es, oi)).incomplete()
}
(DanglingExpr(es, poi), OpenExpr(op, span)) => {
let oi = asg.create(Expr::new(op, span));
Transition(DanglingExpr(es.push(poi), oi)).incomplete()
}
(ReachableExpr(es, poi), OpenExpr(op, span)) => {
let oi = asg.create(Expr::new(op, span));
Transition(ReachableExpr(es.push(poi), oi)).incomplete()
}
(Empty(_), CloseExpr(_)) => todo!("no matching expr to end"),
(DanglingExpr(es, oi), CloseExpr(end)) if es.is_at_root() => {
let start: Span = oi.into();
match es.pop() {
(es, Some(poi)) => Transition(DanglingExpr(es, poi)),
(es, None) => Transition(Empty(es)),
}
.err(AsgError::DanglingExpr(start.merge(end).unwrap_or(start)))
}
(DanglingExpr(es, oi), CloseExpr(end)) => {
let _ = asg.mut_map_obj::(oi, |expr| {
expr.map(|span| span.merge(end).unwrap_or(span))
});
match es.pop() {
(es, Some(poi)) => Transition(DanglingExpr(es, poi)),
(es, None) => Transition(Empty(es)),
}
.incomplete()
}
(ReachableExpr(es, oi), CloseExpr(end)) => {
let _ = asg.mut_map_obj::(oi, |expr| {
expr.map(|span| span.merge(end).unwrap_or(span))
});
match es.pop() {
(es, Some(poi)) => Transition(ReachableExpr(es, poi)),
(es, None) => Transition(Empty(es)),
}
.incomplete()
}
(Empty(_), IdentExpr(_)) => todo!("cannot bind ident to nothing"),
(DanglingExpr(es, oi), IdentExpr(id)) => {
// TODO: error on existing ident
let identi = asg.lookup_or_missing(id);
asg.add_dep(identi, oi);
if es.is_at_root() {
Transition(ReachableExpr(es, oi)).incomplete()
} else {
Transition(DanglingExpr(es, oi)).incomplete()
}
}
(ReachableExpr(es, oi), IdentExpr(id)) => {
// TODO: error on existing ident
let identi = asg.lookup_or_missing(id);
asg.add_dep(identi, oi);
Transition(ReachableExpr(es, oi)).incomplete()
}
(st @ Empty(_), IdentDecl(name, kind, src)) => {
asg.declare(name, kind, src).map(|_| ()).transition(st)
}
(st @ Empty(_), IdentExternDecl(name, kind, src)) => asg
.declare_extern(name, kind, src)
.map(|_| ())
.transition(st),
(st @ Empty(_), IdentDep(sym, dep)) => {
asg.add_dep_lookup(sym, dep);
Transition(st).incomplete()
}
(st @ Empty(_), IdentFragment(sym, text)) => {
asg.set_fragment(sym, text).map(|_| ()).transition(st)
}
(st @ Empty(_), IdentRoot(sym)) => {
let obj = asg.lookup_or_missing(sym);
asg.add_root(obj);
Transition(st).incomplete()
}
(
st,
tok @ (IdentDecl(..) | IdentExternDecl(..) | IdentDep(..)
| IdentFragment(..) | IdentRoot(..)),
) => todo!("{st:?}, {tok:?}"),
}
}
fn is_accepting(&self, _: &Self::Context) -> bool {
matches!(self, Self::Empty(es) if es.is_at_root())
}
}
#[cfg(test)]
mod test;