tamer: asg: Initial identifier scoping

Okay, this is finally distilling into something fairly simple and
reasonable, but I'm not quite there yet.

In particular, the responsibility is simply between `Asg` (as the owner of
the index) and `AirAggregateCtx` (as the owner of the stack frames from
which environments and scope are derived).  This was inevitable and I was
waiting for it, but now I have a good idea of how to clean it up and
proceed.

This also doesn't index in root yet (`active_rooting_oi` is still `None` for
`Root`), and I think I may remove `Pool` and just make it `Visible` at that
point, since it won't be going any further anyway.  I don't think the
distinction is meaningful and will just complicate implementations.

The tests also need some more cleanup---the assertions ideally would live in
independent tests, and the assertion failure is in a function call rather
than the test (function) itself, so requires a Rust backtrace to locate the
line number of (unless you look at the failure data).

So I suppose this is more of a mental synchronization point than
anything.  Nothing's broken, though.

DEV-13162
main
Mike Gerwitz 2023-05-16 14:52:01 -04:00
parent 9fb2169a06
commit 33f34bf244
4 changed files with 342 additions and 130 deletions

View File

@ -36,10 +36,11 @@
//! air such cringeworthy dad jokes here.
use super::{
graph::object::{ObjectIndexTo, ObjectIndexToTree, Pkg, Tpl},
graph::object::{Object, ObjectIndexTo, ObjectIndexToTree, Pkg, Tpl},
Asg, AsgError, Expr, Ident, ObjectIndex,
};
use crate::{
f::Functor,
parse::{prelude::*, StateStack},
span::Span,
sym::SymbolId,
@ -257,6 +258,49 @@ impl AirAggregate {
}
}
}
/// Adjust a [`EnvScopeKind`] while crossing an environment boundary
/// into `self`.
///
/// An identifier is _visible_ at the environment in which it is defined.
/// This identifier casts a _shadow_ to lower environments,
/// with the exception of the root.
/// The _root_ will absorb adjacent visible identifiers into a _pool_,
/// which is distinct from the hierarchy that is otherwise created at
/// the package level and lower.
fn env_cross_boundary_into<T>(
&self,
kind: EnvScopeKind<T>,
) -> EnvScopeKind<T> {
use AirAggregate::*;
use EnvScopeKind::*;
match (self, kind) {
// Pool and Hidden are fixpoints
(_, kind @ (Pool(_) | Hidden(_))) => kind,
// Expressions do not introduce their own environment
// (they are not containers)
// and so act as an identity function.
(PkgExpr(_), kind) => kind,
// A visible identifier will always cast a shadow in one step.
// A shadow will always be cast (propagate) until the root.
(Pkg(_) | PkgTpl(_), Visible(x) | Shadow(x)) => Shadow(x),
// Above we see that Visual will always transition to Shadow in
// one step.
// Consequently,
// Visible at Root means that we're a package-level Visible,
// which must contribute to the pool.
(Root, Visible(x)) => Pool(x),
// If we're _not_ Visible at the root,
// then we're _not_ a package-level definition,
// and so we should _not_ contribute to the pool.
(Root, Shadow(x)) => Hidden(x),
}
}
}
/// Additional parser context,
@ -511,13 +555,22 @@ impl AirAggregateCtx {
///
/// TODO: More information as this is formalized.
fn create_env_indexed_ident(&mut self, name: SPair) -> ObjectIndex<Ident> {
let oi_ident = self.asg_mut().create(Ident::declare(name));
let Self(asg, stack, _) = self;
let oi_ident = asg.create(Ident::declare(name));
// TODO: This currently only indexes for the top of the stack,
// but we'll want no-shadow records for the rest of the env.
if let Some(oi) = self.rooting_oi() {
self.asg_mut().index(oi, name, oi_ident);
}
// TODO: This will need the active OI to support `AirIdent`s
stack
.iter()
.rev()
.filter_map(|frame| frame.active_rooting_oi().map(|oi| (oi, frame)))
.fold(None, |oeoi, (imm_oi, frame)| {
let eoi_next = oeoi
.map(|eoi| frame.env_cross_boundary_into(eoi))
.unwrap_or(EnvScopeKind::Visible(oi_ident));
asg.index(imm_oi, name, eoi_next);
Some(eoi_next)
});
oi_ident
}
@ -541,9 +594,8 @@ impl AirAggregateCtx {
/// and scopes slicing those layers along the y-axies.
///
/// TODO: Example visualization.
#[cfg(test)]
#[derive(Debug, PartialEq)]
enum EnvScopeKind {
#[derive(Debug, PartialEq, Copy, Clone)]
pub(super) enum EnvScopeKind<T = ObjectIndex<Object>> {
/// Identifiers are pooled without any defined hierarchy.
///
/// An identifier that is part of a pool must be unique.
@ -556,7 +608,7 @@ enum EnvScopeKind {
/// An identifier's scope can be further refined to provide more useful
/// diagnostic messages by descending into the package in which it is
/// defined and evaluating scope relative to the package.
_Pool,
Pool(T),
/// Identifier in this environment is a shadow of a deeper environment.
///
@ -570,11 +622,68 @@ enum EnvScopeKind {
/// but it cannot be used for lookup;
/// this environment should be filtered out of this identifier's
/// scope.
_Shadow,
Shadow(T),
/// This environment owns the identifier or is an environment descended
/// from one that does.
Visible,
Visible(T),
/// The identifier is not in scope.
Hidden(T),
}
impl<T> EnvScopeKind<T> {
pub fn into_inner(self) -> T {
use EnvScopeKind::*;
match self {
Pool(x) | Shadow(x) | Visible(x) | Hidden(x) => x,
}
}
/// Whether this represents an identifier that is in scope.
pub fn in_scope(self) -> Option<Self> {
use EnvScopeKind::*;
match self {
Pool(_) | Visible(_) => Some(self),
Shadow(_) | Hidden(_) => None,
}
}
}
impl<T> AsRef<T> for EnvScopeKind<T> {
fn as_ref(&self) -> &T {
use EnvScopeKind::*;
match self {
Pool(x) | Shadow(x) | Visible(x) | Hidden(x) => x,
}
}
}
impl<T, U> Functor<T, U> for EnvScopeKind<T> {
type Target = EnvScopeKind<U>;
fn map(self, f: impl FnOnce(T) -> U) -> Self::Target {
use EnvScopeKind::*;
match self {
Pool(x) => Pool(f(x)),
Shadow(x) => Shadow(f(x)),
Visible(x) => Visible(f(x)),
Hidden(x) => Hidden(f(x)),
}
}
}
impl<T> From<EnvScopeKind<T>> for Span
where
T: Into<Span>,
{
fn from(kind: EnvScopeKind<T>) -> Self {
kind.into_inner().into()
}
}
impl AsMut<AirAggregateCtx> for AirAggregateCtx {

View File

@ -65,36 +65,18 @@ fn m(a: Span, b: Span) -> Span {
a.merge(b).unwrap()
}
#[test]
fn pkg_child_definition() {
let pkg_name = SPair("/pkg".into(), S1);
let name = SPair("foo".into(), S3);
#[rustfmt::skip]
let toks = vec![
// ENV: 0 global
PkgStart(S1, pkg_name),
// ENV: 1 pkg
ExprStart(ExprOp::Sum, S2),
// ENV: 1 pkg
BindIdent(name),
ExprEnd(S4),
PkgEnd(S5),
];
let asg = asg_from_toks_raw(toks);
#[rustfmt::skip]
assert_scope(&asg, name, [
// The identifier is not local,
// and so its scope should extend into the global environment.
// TODO: (Root, S0, Pool),
// Expr does not introduce a new environment,
// and so the innermost environment in which we should be able to
// find the identifier is the Pkg.
(Pkg, m(S1, S5), Visible)
]);
/// Apply [`assert_scope()`] without concern for the inner type or value of
/// the expected [`EnvScopeKind`].
macro_rules! assert_scope {
(
$asg:ident, $name:ident, [
$( ($obj:ident, $span:expr, $kind:ident), )*
]
) => {
assert_scope(&$asg, $name, [
$( ($obj, $span, $kind(())), )*
])
}
}
#[test]
@ -105,25 +87,37 @@ fn pkg_nested_expr_definition() {
#[rustfmt::skip]
let toks = vec![
// ENV: 0 global
PkgStart(S1, pkg_name),
// ENV: 1 pkg
ExprStart(ExprOp::Sum, S2),
// ENV: 1 pkg
BindIdent(outer),
ExprStart(ExprOp::Sum, S4),
// ENV: 1 pkg
BindIdent(inner),
ExprEnd(S6),
ExprEnd(S7),
PkgEnd(S8),
// ENV: 0 global lexical scoping boundaries (envs)
PkgStart(S1, pkg_name), //- -.
// ENV: 1 pkg // :
ExprStart(ExprOp::Sum, S2), // :
// ENV: 1 pkg // :
BindIdent(outer), // v : p
// :
ExprStart(ExprOp::Sum, S4), // 1: 0
// ENV: 1 pkg // :
BindIdent(inner), // v : p
ExprEnd(S6), // :
ExprEnd(S7), // :
PkgEnd(S8), //- -'
];
let asg = asg_from_toks_raw(toks);
#[rustfmt::skip]
assert_scope(&asg, inner, [
assert_scope!(asg, outer, [
// The identifier is not local,
// and so its scope should extend into the global environment.
// TODO: (Root, S0, Pool),
// Expr does not introduce a new environment,
// and so the innermost environment in which we should be able to
// find the identifier is the Pkg.
(Pkg, m(S1, S8), Visible),
]);
#[rustfmt::skip]
assert_scope!(asg, inner, [
// The identifier is not local,
// and so its scope should extend into the global environment.
// TODO: (Root, S0, Pool),
@ -131,7 +125,7 @@ fn pkg_nested_expr_definition() {
// Expr does not introduce a new environment,
// and so just as the outer expression,
// the inner is scoped to a package environment.
(Pkg, m(S1, S8), Visible)
(Pkg, m(S1, S8), Visible),
]);
}
@ -149,64 +143,148 @@ fn pkg_tpl_definition() {
#[rustfmt::skip]
let toks = vec![
// ENV: 0 global
PkgStart(S1, pkg_name),
// ENV: 1 pkg
TplStart(S2),
// ENV: 2 tpl
BindIdent(tpl_outer),
TplMetaStart(S4),
BindIdent(meta_outer),
TplMetaEnd(S6),
ExprStart(ExprOp::Sum, S7),
BindIdent(expr_outer),
ExprEnd(S9),
TplStart(S10),
// ENV: 3 tpl
BindIdent(tpl_inner),
TplMetaStart(S12),
BindIdent(meta_inner),
TplMetaEnd(S14),
ExprStart(ExprOp::Sum, S15),
BindIdent(expr_inner),
ExprEnd(S17),
TplEnd(S18),
TplEnd(S19),
PkgEnd(S20),
];
// ENV: 0 global lexical scoping boundaries (envs)
PkgStart(S1, pkg_name), //- - - - -.
// ENV: 1 pkg // :
TplStart(S2), //-----. :
// ENV: 2 tpl // | :
BindIdent(tpl_outer), // |v :p
// | :
TplMetaStart(S4), // | :
BindIdent(meta_outer), // vl|s :
TplMetaEnd(S6), // | :
// | :
ExprStart(ExprOp::Sum, S7), // | :
BindIdent(expr_outer), // vd|s :
ExprEnd(S9), // | :
// | :
TplStart(S10), //---. | :
// ENV: 3 tpl // | | :
BindIdent(tpl_inner), // |v |s :
// | | :
TplMetaStart(S12), // | | :
BindIdent(meta_inner), // vl|s |s :
TplMetaEnd(S14), // | | :
// 3| 2| 1: 0
ExprStart(ExprOp::Sum, S15), // | | :
BindIdent(expr_inner), // vd|s |s :
ExprEnd(S17), // | | :
TplEnd(S18), //---' | : v,s,p = EnvScopeKind
TplEnd(S19), //-----' : |
PkgEnd(S20), //- - - - -' |`- l = local
]; // ^ `- d = defer
// observe: - (l)ocal shadows until root
// - (d)efer shadows until root
// - visual >|> shadow
// - visual >:> pool
// - shadow >|> shadow
// - shadow >:> (no pool)
let asg = asg_from_toks_raw(toks);
#[rustfmt::skip]
assert_scope(&asg, tpl_outer, [
assert_scope!(asg, tpl_outer, [
// The template is defined at the package level,
// and so is incorporated into the global environment.
// TODO: (Root, S0, Pool),
(Pkg, m(S1, S20), Visible)
// Definition environment.
(Pkg, m(S1, S20), Visible),
]);
#[rustfmt::skip]
assert_scope(&asg, meta_outer, [
// TODO: (Tpl, m(S2, S19), Visible)
assert_scope!(asg, meta_outer, [
// The metavariable is local to the template,
// and so is not scoped outside of it.
// It does not contribute to the global scope,
// however we must introduce shadow records so that we're able to
// provide an error if shadowing would occur due to another
// identifier of the same name,
// such as a template within another template.
// Root never contains shadow records since it is not part of a
// hierarchy,
// so it is omitted from the metavariable's scope.
// TODO: (Pkg, m(S1, S20), Shadow),
// TODO: (Tpl, m(S2, S19), Visible),
]);
#[rustfmt::skip]
assert_scope(&asg, expr_outer, [
(Tpl, m(S2, S19), Visible)
assert_scope!(asg, expr_outer, [
// Expressions defined within templates will eventually be scoped to
// their _expansion site_.
// Since the future scope cannot possibly be known until the point
// of expansion,
// we don't know what its parent environment will be.
//
// Why, then, does it shadow?
//
// Templates in TAMER
// (unlike in the original XSLT-based TAME)
// are designed to _close_ over their definition environment.
// If a template references a value defined within the scope of its
// definition
// (e.g. an identifier imported into the package into which the
// template itself was defined),
// the intent is to be able to utilize that identifier at the
// expansion site without having to break encapsulation by
// having to know implementation details of the template;
// this awkward problem is the reason for `import/@export`,
// so that packages templates could re-export their symbols
// to avoid this trap,
// which is far too heavy-handed of an approach and is
// easily forgotten.
// In that sense,
// templates act more like how one would expect functions to
// operate.
//
// Because of that lexical capture,
// it is important that identifiers shadow to ensure that we do
// not rebind an identifier without the user realizing it.
// The intent is that the system should just do the right thing
// unless there happens to be a problem.
// If a user references an identifier from the outer scope,
// the intent is almost certainly to have it be lexically captured
// and available at the expansion site.
// If an identifier is unknown,
// perhaps the intent is to have it defined by another template,
// or to be defined at the expansion site.
// And if the situation changes from the second to the first because
// of the introduction of an import or a duplicate identifier,
// we want to help the user at the earliest possible moment.
(Pkg, m(S1, S20), Shadow),
(Tpl, m(S2, S19), Visible),
]);
#[rustfmt::skip]
assert_scope(&asg, tpl_inner, [
(Tpl, m(S2, S19), Visible)
assert_scope!(asg, tpl_inner, [
// This is similar to `expr_outer` above.
// Even though the template is entirely scoped within the parent
// `tpl_outer` such that it isn't even defined until it is expanded,
// at which point it is defined within its expansion context,
// we still want shadow records so that any _references_ to this
// template can be resolved unambiguously in ways that are
// helpful to the user
// (see `expr_outer` above for more information).
(Pkg, m(S1, S20), Shadow),
(Tpl, m(S2, S19), Visible),
]);
#[rustfmt::skip]
assert_scope(&asg, meta_inner, [
// TODO: (Tpl, m(S10, S18), Visible)
assert_scope!(asg, meta_inner, [
// Just as the previous metavariable,
// we need to cast a shadow all the way up to the package level to
// ensure that we do not permit identifier shadowing.
// See `meta_outer` above for more information.
// TODO: (Pkg, m(S1, S20), Shadow),
// TODO: (Tpl, m(S2, S19), Shadow),
// TODO: (Tpl, m(S10, S18), Visible),
]);
#[rustfmt::skip]
assert_scope(&asg, expr_inner, [
(Tpl, m(S10, S18), Visible)
assert_scope!(asg, expr_inner, [
// Just the same as the previous expression.
// Note the intended consequence of this:
// if `tpl_outer` contains an identifier,
// it cannot be shadowed by `tpl_inner`.
(Pkg, m(S1, S20), Shadow),
(Tpl, m(S2, S19), Shadow),
(Tpl, m(S10, S18), Visible),
]);
}
@ -229,7 +307,7 @@ fn pkg_tpl_definition() {
fn assert_scope(
asg: &Asg,
name: SPair,
expected: impl IntoIterator<Item = (ObjectTy, Span, EnvScopeKind)>,
expected: impl IntoIterator<Item = (ObjectTy, Span, EnvScopeKind<()>)>,
) {
// We are interested only in identifiers for scoping,
// not the objects that they point to.
@ -266,7 +344,7 @@ fn assert_scope(
(
dynrel.target_ty(),
dynrel.target().resolve(asg).span(),
asg.lookup(oi_to, name),
asg.lookup_raw(oi_to, name),
)
})
});
@ -274,18 +352,21 @@ fn assert_scope(
// `tree_reconstruction` omits root,
// so we'll have to add it ourselves.
let oi_root = asg.root(name);
let given = once((Root, S0, asg.lookup(oi_root, name)))
let given = once((Root, S0, asg.lookup_raw(oi_root, name)))
.chain(given_without_root)
.filter_map(|(ty, span, ooi)| ooi.map(|oi| (ty, span, oi.resolve(asg))))
.inspect(|(ty, span, ident)| assert_eq!(
.filter_map(|(ty, span, oeoi)| {
oeoi.map(|eoi| (ty, span, eoi.map(ObjectIndex::cresolve(asg))))
})
.inspect(|(ty, span, eid)| assert_eq!(
expected_span,
ident.span(),
eid.as_ref().span(),
"expected {wname} span {expected_span} at {ty}:{span}, but found {given}",
wname = TtQuote::wrap(name),
given = ident.span(),
given = eid.as_ref().span(),
))
// TODO
.map(|(ty, span, _)| (ty, span, EnvScopeKind::Visible));
// We discard the inner ObjectIndex since it is not relevant for the
// test assertion.
.map(|(ty, span, eid)| (ty, span, eid.map(|_| ())));
// Collection allows us to see the entire expected and given lists on
// assertion failure.

View File

@ -26,7 +26,7 @@ use self::object::{
ObjectRelTy, ObjectRelatable, Root,
};
use super::{AsgError, Object, ObjectIndex, ObjectKind};
use super::{air::EnvScopeKind, AsgError, Object, ObjectIndex, ObjectKind};
use crate::{
diagnose::{panic::DiagnosticPanic, Annotate, AnnotatedSpan},
f::Functor,
@ -108,7 +108,7 @@ pub struct Asg {
/// the public API encapsulates it within an [`ObjectIndex`].
index: FxHashMap<
(ObjectRelTy, SymbolId, ObjectIndex<Object>),
ObjectIndex<Object>,
EnvScopeKind<ObjectIndex<Object>>,
>,
/// The root node used for reachability analysis and topological
@ -198,16 +198,17 @@ impl Asg {
&mut self,
imm_env: OS,
name: S,
oi: ObjectIndex<O>,
eoi: EnvScopeKind<ObjectIndex<O>>,
) -> Result<(), ObjectIndex<O>> {
let sym = name.into();
let prev = self
.index
.insert((O::rel_ty(), sym, imm_env.widen()), oi.widen());
let prev = self.index.insert(
(O::rel_ty(), sym, imm_env.widen()),
eoi.map(ObjectIndex::widen),
);
match prev {
None => Ok(()),
Some(oi) => Err(oi.must_narrow_into::<O>()),
Some(eoi) => Err(eoi.into_inner().must_narrow_into::<O>()),
}
}
@ -234,10 +235,10 @@ impl Asg {
&mut self,
imm_env: OS,
name: S,
oi: ObjectIndex<O>,
eoi: EnvScopeKind<ObjectIndex<O>>,
) {
let sym = name.into();
let prev = self.try_index(imm_env, sym, oi);
let prev = self.try_index(imm_env, sym, eoi);
// We should never overwrite indexes
#[allow(unused_variables)] // used only for debug
@ -248,17 +249,17 @@ impl Asg {
vec![
imm_env.widen().note("at this scope boundary"),
prev_oi.note("previously indexed identifier was here"),
oi.internal_error(
eoi.internal_error(
"this identifier has already been indexed at the above scope boundary"
),
oi.help(
eoi.help(
"this is a bug in the system responsible for analyzing \
identifier scope;"
),
oi.help(
eoi.help(
" you can try to work around it by duplicating the definition of "
),
oi.help(
eoi.help(
format!(
" {} as a _new_ identifier with a different name.",
TtQuote::wrap(sym),
@ -294,7 +295,12 @@ impl Asg {
{
self.lookup(imm_env, name).unwrap_or_else(|| {
let oi = self.create(O::missing(name));
self.index(imm_env, name.symbol(), oi);
// TODO: This responsibility is split between `Asg` and
// `AirAggregateCtx`!
let eoi = EnvScopeKind::Visible(oi);
self.index(imm_env, name.symbol(), eoi);
oi
})
}
@ -515,16 +521,27 @@ impl Asg {
/// this method cannot be used to retrieve all possible objects on the
/// graph---for
/// that, see [`Asg::get`].
///
/// The global environment is defined as the environment of the current
/// compilation unit,
/// which is a package.
#[inline]
pub fn lookup<O: ObjectRelatable>(
&self,
imm_env: impl ObjectIndexRelTo<O>,
id: SPair,
) -> Option<ObjectIndex<O>> {
self.lookup_raw(imm_env, id)
.and_then(EnvScopeKind::in_scope)
.map(EnvScopeKind::into_inner)
}
/// Attempt to retrieve an identifier and its scope information from the
/// graph by name relative to the immediate environment `imm_env`.
///
/// See [`Self::lookup`] for more information.
#[inline]
pub(super) fn lookup_raw<O: ObjectRelatable>(
&self,
imm_env: impl ObjectIndexRelTo<O>,
id: SPair,
) -> Option<EnvScopeKind<ObjectIndex<O>>> {
// The type `O` is encoded into the index on [`Self::index`] and so
// should always be able to be narrowed into the expected type.
// If this invariant somehow does not hold,
@ -533,7 +550,9 @@ impl Asg {
// static assurances.
self.index
.get(&(O::rel_ty(), id.symbol(), imm_env.widen()))
.map(|&ni| ni.overwrite(id.span()).must_narrow_into::<O>())
.map(|&eoi| {
eoi.map(|oi| oi.overwrite(id.span()).must_narrow_into::<O>())
})
}
}

View File

@ -21,7 +21,7 @@
use super::{prelude::*, Ident, Pkg};
use crate::{
asg::{IdentKind, Source},
asg::{air::EnvScopeKind, IdentKind, Source},
parse::util::SPair,
span::Span,
};
@ -111,7 +111,10 @@ impl ObjectIndex<Root> {
) -> Result<ObjectIndex<Pkg>, AsgError> {
let oi_pkg = asg.create(Pkg::new_canonical(start, name)?);
asg.try_index(self, name, oi_pkg).map_err(|oi_prev| {
// TODO: We shouldn't be responsible for this
let eoi_pkg = EnvScopeKind::Pool(oi_pkg);
asg.try_index(self, name, eoi_pkg).map_err(|oi_prev| {
let prev = oi_prev.resolve(asg);
// unwrap note: a canonical name must exist for this error to