// ASG IR // // Copyright (C) 2014-2023 Ryan Specialty, LLC. // // This file is part of TAME. // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . //! Intermediate representation for construction of the //! [abstract semantic graph (ASG)](super) (AIR). //! //! AIR serves as an abstraction layer between higher-level parsers and the //! aggregate ASG. //! It allows parsers to operate as a raw stream of data without having to //! worry about ownership of or references to the ASG, //! and allows for multiple such parsers to be joined. //! //! AIR is _not_ intended to replace the API of the ASG---it //! is intended as a termination point for the parsing pipeline, //! and as such implements a subset of the ASG's API that is suitable //! for aggregating raw data from source and object files. //! Given that it does so little and is so close to the [`Asg`] API, //! one might say that the abstraction is as light as air, //! but that would surely result in face-palming and so we're not going //! air such cringeworthy dad jokes here. use super::{ graph::object::{ObjectIndexTo, ObjectIndexToTree, Pkg, Tpl}, Asg, AsgError, Expr, Ident, ObjectIndex, }; use crate::{ diagnose::Annotate, diagnostic_todo, parse::{prelude::*, StateStack}, span::{Span, UNKNOWN_SPAN}, sym::SymbolId, }; use std::fmt::{Debug, Display}; #[macro_use] mod ir; pub use ir::Air; mod expr; mod tpl; use expr::AirExprAggregate; use tpl::AirTplAggregate; pub type IdentSym = SymbolId; pub type DepSym = SymbolId; /// AIR parser state. #[derive(Debug, PartialEq, Default)] pub enum AirAggregate { /// Parser is not currently performing any work. #[default] Empty, /// Expecting a package-level token. Toplevel(ObjectIndex), /// Parsing an expression. /// /// This expects to inherit an [`AirExprAggregate`] from the prior state /// so that we are not continuously re-allocating its stack for each /// new expression root. PkgExpr(AirExprAggregate), /// Parser is in template parsing mode. /// /// All objects encountered until the closing [`Air::TplEnd`] will be /// parented to this template rather than the parent [`Pkg`]. /// See [`Air::TplStart`] for more information. PkgTpl(AirTplAggregate), } impl Display for AirAggregate { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { use AirAggregate::*; match self { Empty => write!(f, "awaiting AIR input for ASG"), Toplevel(_) => { write!(f, "expecting package header or an expression") } PkgExpr(expr) => { write!(f, "defining a package expression: {expr}") } PkgTpl(tpl) => { write!(f, "building a template: {tpl}",) } } } } impl From for AirAggregate { fn from(st: AirExprAggregate) -> Self { Self::PkgExpr(st) } } impl From for AirAggregate { fn from(st: AirTplAggregate) -> Self { Self::PkgTpl(st) } } impl ParseState for AirAggregate { type Token = Air; type Object = (); type Error = AsgError; type Context = AirAggregateCtx; /// Destination [`Asg`] that this parser lowers into. /// /// This ASG will be yielded by [`crate::parse::Parser::finalize`]. type PubContext = Asg; fn parse_token( self, tok: Self::Token, ctx: &mut Self::Context, ) -> crate::parse::TransitionResult { use ir::{ AirBind::*, AirDoc::*, AirIdent::*, AirPkg::*, AirSubsets::*, AirTodo::*, }; use AirAggregate::*; // TODO: Seems to be about time for refactoring this... match (self, tok.into()) { (st, AirTodo(Todo(_))) => Transition(st).incomplete(), (Empty, AirPkg(PkgStart(span))) => { let oi_pkg = ctx.begin_pkg(span); Transition(Toplevel(oi_pkg)).incomplete() } ( st @ (Toplevel(_) | PkgExpr(_) | PkgTpl(_)), AirPkg(PkgStart(span)), ) => { // This should always be available in this context. let first_span = ctx.pkg_oi().map(|oi| oi.span()).unwrap_or(UNKNOWN_SPAN); Transition(st).err(AsgError::NestedPkgStart(span, first_span)) } // No expression was started. (Toplevel(oi_pkg), AirPkg(PkgEnd(span))) => { oi_pkg.close(ctx.asg_mut(), span); Transition(Empty).incomplete() } // Packages are identified by their paths. (st @ Toplevel(..), AirBind(BindIdent(id))) => { Transition(st).err(AsgError::InvalidBindContext(id)) } (Toplevel(oi_pkg), tok @ AirDoc(DocIndepClause(..))) => { diagnostic_todo!( vec![ oi_pkg.note("for this package"), tok.internal_error( "this package description is not yet supported" ) ], "package-level short description is not yet supported by TAMER", ) } (Toplevel(oi_pkg), AirDoc(DocText(text))) => { oi_pkg.append_doc_text(ctx.asg_mut(), text); Transition(Toplevel(oi_pkg)).incomplete() } // Package import (Toplevel(oi_pkg), AirBind(RefIdent(pathspec))) => { oi_pkg.import(ctx.asg_mut(), pathspec); Transition(Toplevel(oi_pkg)).incomplete() } // Note: We unfortunately can't match on `AirExpr | AirBind | ...` // and delegate in the same block // (without having to duplicate type checks and then handle // unreachable paths) // because of the different inner types. (st @ (Toplevel(_) | PkgTpl(_)), tok @ AirExpr(..)) => { ctx.ret_or_transfer(st, tok, AirExprAggregate::new()) } (PkgExpr(expr), AirExpr(etok)) => ctx.proxy(expr, etok), (PkgExpr(expr), AirBind(etok)) => ctx.proxy(expr, etok), (PkgExpr(expr), AirDoc(etok)) => ctx.proxy(expr, etok), // Template parsing. (st @ (Toplevel(_) | PkgExpr(_)), tok @ AirTpl(..)) => { ctx.ret_or_transfer(st, tok, AirTplAggregate::new()) } (PkgTpl(tplst), AirTpl(ttok)) => ctx.proxy(tplst, ttok), (PkgTpl(tplst), AirBind(ttok)) => ctx.proxy(tplst, ttok), (PkgTpl(tplst), AirDoc(ttok)) => ctx.proxy(tplst, ttok), (Empty, AirPkg(PkgEnd(span))) => { Transition(Empty).err(AsgError::InvalidPkgEndContext(span)) } (st @ (PkgExpr(_) | PkgTpl(_)), AirPkg(PkgEnd(span))) => { match st.active_is_accepting(ctx) { true => { ctx.stack().ret_or_dead(Empty, AirPkg(PkgEnd(span))) } false => { Transition(st).err(AsgError::InvalidPkgEndContext(span)) } } } ( Empty, tok @ (AirExpr(..) | AirBind(..) | AirTpl(..) | AirDoc(..)), ) => Transition(Empty).err(AsgError::PkgExpected(tok.span())), (Toplevel(oi_pkg), AirIdent(IdentDecl(name, kind, src))) => { let asg = ctx.asg_mut(); let oi_root = asg.root(name); asg.lookup_or_missing(oi_root, name) .declare(asg, name, kind, src) .map(|_| ()) .transition(Toplevel(oi_pkg)) } (Toplevel(oi_pkg), AirIdent(IdentExternDecl(name, kind, src))) => { let asg = ctx.asg_mut(); let oi_root = asg.root(name); asg.lookup_or_missing(oi_root, name) .declare_extern(asg, name, kind, src) .map(|_| ()) .transition(Toplevel(oi_pkg)) } (Toplevel(oi_pkg), AirIdent(IdentDep(name, dep))) => { let asg = ctx.asg_mut(); let oi_root = asg.root(dep); let oi_from = asg.lookup_or_missing(oi_root, name); let oi_to = asg.lookup_or_missing(oi_root, dep); oi_from.add_opaque_dep(ctx.asg_mut(), oi_to); Transition(Toplevel(oi_pkg)).incomplete() } (Toplevel(oi_pkg), AirIdent(IdentFragment(name, text))) => { let asg = ctx.asg_mut(); let oi_root = asg.root(name); asg.lookup_or_missing(oi_root, name) .set_fragment(asg, text) .map(|_| ()) .transition(Toplevel(oi_pkg)) } (Toplevel(oi_pkg), AirIdent(IdentRoot(name))) => { let asg = ctx.asg_mut(); asg.root(name).root_ident(asg, name); Transition(Toplevel(oi_pkg)).incomplete() } (st, tok @ AirIdent(..)) => todo!("{st:?}, {tok:?}"), } } fn is_accepting(&self, _: &Self::Context) -> bool { matches!(self, Self::Empty) } } impl AirAggregate { /// Whether the active parser is in an accepting state. /// /// If a child parser is active, /// then its [`ParseState::is_accepting`] will be consulted. fn active_is_accepting(&self, ctx: &::Context) -> bool { use AirAggregate::*; match self { Empty => true, Toplevel(_) => self.is_accepting(ctx), PkgExpr(st) => st.is_accepting(ctx), PkgTpl(st) => st.is_accepting(ctx), } } /// The rooting context for [`Ident`]s for the active parser. /// /// A value of [`None`] indicates that the current parser does not /// support direct bindings, /// but a parent context may /// (see [`AirAggregateCtx::rooting_oi`]). fn active_rooting_oi(&self) -> Option> { match self { AirAggregate::Empty => None, AirAggregate::Toplevel(pkg_oi) => Some((*pkg_oi).into()), // Expressions never serve as roots for identifiers; // this will always fall through to the parent context. // Since the parent context is a package or a template, // the next frame should succeed. AirAggregate::PkgExpr(_) => None, // Identifiers bound while within a template definition context // must bind to the eventual _expansion_ site, // as if the body were pasted there. // Templates must therefore serve as containers for identifiers // bound therein. AirAggregate::PkgTpl(tplst) => { tplst.active_tpl_oi().map(Into::into) } } } } /// Additional parser context, /// including the ASG and parser stack frames. /// /// [`ObjectIndex`] lookups perform reverse linear searches beginning from /// the last stack frame until a non-[`None`] value is found; /// this creates an environment whereby inner contexts shadow outer. /// Missing values create holes, /// much like a prototype chain. /// In practice, /// this should only have to search the last two frames. #[derive(Debug, Default)] pub struct AirAggregateCtx(Asg, AirStack, Option>); /// Limit of the maximum number of held parser frames. /// /// Note that this is the number of [`ParseState`]s held, /// _not_ the depth of the graph at a given point. /// The intent of this is to limit runaway recursion in the event of some /// bug in the system; /// while the input stream is certainly finite, /// lookahead tokens cause recursion that does not provably /// terminate. /// /// This limit is arbitrarily large, /// but hopefully such that no legitimate case will ever hit it. const MAX_AIR_STACK_DEPTH: usize = 1024; /// Held parser stack frames. /// /// See [`AirAggregateCtx`] for more information. pub type AirStack = StateStack; impl AirAggregateCtx { fn asg_mut(&mut self) -> &mut Asg { self.as_mut() } fn stack(&mut self) -> &mut AirStack { let Self(_, stack, _) = self; stack } /// Return control to the parser atop of the stack if `st` is an /// accepting state, /// otherwise transfer control to a new parser `to`. /// /// This serves as a balance with the behavior of [`Self::proxy`]. /// Rather than checking for an accepting state after each proxy, /// or having the child parsers return to the top stack frame once /// they have completed, /// we leave the child parser in place to potentially handle more /// tokens of the same type. /// For example, /// adjacent expressions can re-use the same parser rather than having /// to pop and push for each sibling. /// /// Consequently, /// this means that a parser may be complete when we need to push and /// transfer control to another parser. /// Before pushing, /// we first check to see if the parser atop of the stack is in an /// accepting state. /// If so, /// then we are a sibling, /// and so instead of proceeding with instantiating a new parser, /// we return to the one atop of the stack and delegate to it. /// /// If `st` is _not_ in an accepting state, /// that means that we are a _child_; /// we then set aside the state `st` on the stack and transfer /// control to the child `to`. /// /// See also [`Self::proxy`]. fn ret_or_transfer, SB: Into>( &mut self, st: S, tok: impl Token + Into, to: SB, ) -> TransitionResult { let st_super = st.into(); if st_super.active_is_accepting(self) { // TODO: dead state or error self.stack().ret_or_dead(AirAggregate::Empty, tok) } else { self.stack().transfer_with_ret( Transition(st_super), Transition(to.into()).incomplete().with_lookahead(tok), ) } } /// Proxy `tok` to `st`, /// returning to the state atop of the stack if parsing reaches a dead /// state. /// /// See also [`Self::ret_or_transfer`]. fn proxy>( &mut self, st: S, tok: impl Token + Into, ) -> TransitionResult { st.delegate_child(tok.into(), self, |_deadst, tok, ctx| { ctx.stack().ret_or_dead(AirAggregate::Empty, tok) }) } /// Create a new rooted package and record it as the active package. fn begin_pkg(&mut self, span: Span) -> ObjectIndex { let Self(asg, _, pkg) = self; let oi_pkg = asg.create(Pkg::new(span)).root(asg); pkg.replace(oi_pkg); oi_pkg } /// The active package if any. fn pkg_oi(&self) -> Option> { match self { Self(_, _, oi) => *oi, } } /// The active container (rooting context) for [`Ident`]s. /// /// The integer value returned represents the stack offset at which the /// rooting index was found, /// with `0` representing the package. /// /// A value of [`None`] indicates that no bindings are permitted in the /// current context. fn rooting_oi(&self) -> Option> { let Self(_, stack, _) = self; stack.iter().rev().find_map(|st| st.active_rooting_oi()) } /// The active dangling expression context for [`Expr`]s. /// /// A value of [`None`] indicates that expressions are not permitted to /// dangle in the current context /// (and so must be identified). fn dangling_expr_oi(&self) -> Option> { let Self(_, stack, _) = self; stack.iter().rev().find_map(|st| match st { AirAggregate::Empty => None, // A dangling expression in a package context would be // unreachable. // There should be no parent frame and so this will fail to find // a value. AirAggregate::Toplevel(_) => None, // Expressions may always contain other expressions, // and so this method should not be consulted in such a // context. // Nonetheless, // fall through to the parent frame and give a correct answer. AirAggregate::PkgExpr(_) => None, // Templates serve as containers for dangling expressions, // since they may expand into an context where they are not // considered to be dangling. AirAggregate::PkgTpl(tplst) => { tplst.active_tpl_oi().map(Into::into) } }) } /// The active expansion target (splicing context) for [`Tpl`]s. /// /// A value of [`None`] indicates that template expansion is not /// permitted in this current context. fn expansion_oi(&self) -> Option> { let Self(_, stack, _) = self; stack.iter().rev().find_map(|st| match st { AirAggregate::Empty => None, AirAggregate::Toplevel(pkg_oi) => Some((*pkg_oi).into()), AirAggregate::PkgExpr(exprst) => { exprst.active_expr_oi().map(Into::into) } AirAggregate::PkgTpl(tplst) => { tplst.active_tpl_oi().map(Into::into) } }) } /// Root an identifier using the [`Self::rooting_oi`] atop of the stack. fn defines(&mut self, name: SPair) -> Result, AsgError> { let oi_root = self .rooting_oi() .ok_or(AsgError::InvalidBindContext(name))?; Ok(self.lookup_lexical_or_missing(name).add_edge_from( self.asg_mut(), oi_root, None, )) } /// Attempt to locate a lexically scoped identifier, /// or create a new one if missing. /// /// Until [`Asg`] can be further generalized, /// there are unfortunately two rooting strategies employed: /// /// 1. If the stack has only a single held frame at a scope boundary, /// then it is assumed to be the package representing the active /// compilation unit and the identifier is indexed in the global /// scope. /// 2. Otherwise, /// the identifier is defined locally and does not undergo /// indexing. /// /// TODO: This is very informal and just starts to get things working. fn lookup_lexical_or_missing(&mut self, name: SPair) -> ObjectIndex { let Self(asg, stack, _) = self; stack .iter() .rev() .filter_map(|st| st.active_rooting_oi()) .find_map(|oi| asg.lookup(oi, name)) .unwrap_or_else(|| self.create_env_indexed_ident(name)) } /// Index an identifier within its environment. /// /// TODO: More information as this is formalized. fn create_env_indexed_ident(&mut self, name: SPair) -> ObjectIndex { let oi_ident = self.asg_mut().create(Ident::declare(name)); // TODO: This currently only indexes for the top of the stack, // but we'll want no-shadow records for the rest of the env. if let Some(oi) = self.rooting_oi() { self.asg_mut().index(oi, name, oi_ident); } oi_ident } } impl AsMut for AirAggregateCtx { fn as_mut(&mut self) -> &mut AirAggregateCtx { self } } impl AsRef for AirAggregateCtx { fn as_ref(&self) -> &Asg { match self { Self(asg, _, _) => asg, } } } impl AsMut for AirAggregateCtx { fn as_mut(&mut self) -> &mut Asg { match self { Self(asg, _, _) => asg, } } } impl AsMut for AirAggregateCtx { fn as_mut(&mut self) -> &mut AirStack { match self { Self(_, stack, _) => stack, } } } impl From for Asg { fn from(ctx: AirAggregateCtx) -> Self { match ctx { AirAggregateCtx(asg, _, _) => asg, } } } impl From for AirAggregateCtx { fn from(asg: Asg) -> Self { Self(asg, Default::default(), None) } } #[cfg(test)] mod test;