tame/tamer/src/nir.rs

// IR that is "near" the source code.
//
//  Copyright (C) 2014-2023 Ryan Specialty, LLC.
//
//  This file is part of TAME.
//
//  This program is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program.  If not, see <http://www.gnu.org/licenses/>.

//! An IR that is "near" the source code.
//!
//! This IR is "near" the source code written by the user,
//!   performing only basic normalization tasks like desugaring.
//! It takes a verbose input language and translates it into a much more
//!   concise internal representation.
//! The hope is that most desugaring will be done by templates in the future.
//!
//! NIR cannot completely normalize the source input because it does not
//!   have enough information to do so---the
//!     template system requires a compile-time interpreter that is beyond
//!     the capabilities of NIR,
//!       and so a final normalization pass must be done later on in the
//!       lowering pipeline.
//!
//! This is a streaming IR,
//!   meaning that the equivalent AST is not explicitly represented as a
//!   tree structure in memory.
//!
//! NIR is lossy and does not retain enough information for code
//!   formatting---that
//!     type of operation will require a mapping between
//!     XIRF and NIR,
//!       where the latter is used to gather enough context for formatting
//!       and the former is used as a concrete representation of what the user
//!       actually typed.
//!
//! For more information on the parser,
//!   see [`parse`].
//! The entry point for NIR in the lowering pipeline is exported as
//!   [`XirfToNir`].

mod air;
mod interp;
mod parse;
mod tplshort;

use crate::{
    diagnose::{Annotate, Diagnostic},
    f::Functor,
    fmt::{DisplayWrapper, TtQuote},
    parse::{util::SPair, Object, Token},
    span::{Span, UNKNOWN_SPAN},
    sym::SymbolId,
    xir::{
        attr::{Attr, AttrSpan},
        fmt::TtXmlAttr,
        QName,
    },
};
use std::{
    convert::Infallible,
    error::Error,
    fmt::{Debug, Display},
};

pub use air::{NirToAir, NirToAirError};
pub use interp::{InterpError, InterpState as InterpolateNir};
pub use parse::{
    NirParseState as XirfToNir, NirParseStateError_ as XirfToNirError,
};
pub use tplshort::TplShortDesugar;

/// IR that is "near" the source code.
///
/// This represents the language of TAME after it has been extracted from
///   whatever document encloses it
///     (e.g. XML).
#[derive(Debug, PartialEq, Eq)]
pub enum Nir {
    Todo,
    TodoAttr(SPair),

    /// Begin the definition of some [`NirEntity`] and place it atop of the
    ///   stack.
    Open(NirEntity, Span),

    /// Finish definition of a [`NirEntity`] atop of the stack and pop it.
    Close(NirEntity, Span),

    /// Bind the given name as an identifier for the entity atop of the
    ///   stack.
    BindIdent(SPair),

    /// Reference the value of the given identifier.
    ///
    /// Permissible identifiers and values depend on the context in which
    ///   this appears.
    Ref(SPair),

    /// Describe the [`NirEntity`] atop of the stack.
    Desc(SPair),

    /// A string literal.
    ///
    /// The meaning of this string depends on context.
    /// For example,
    ///   it may represent literate documentation or a literal in a
    ///   metavariable definition.
    Text(SPair),
}

impl Nir {
    /// Retrieve inner [`SymbolId`] that this token represents,
    ///   if any.
    ///
    /// Not all NIR tokens contain associated symbols;
    ///   a token's [`SymbolId`] is retained only if it provides additional
    ///   information over the token itself.
    ///
    /// See also [`Nir::map`] if you wish to change the symbol.
    pub fn symbol(&self) -> Option<SymbolId> {
        use Nir::*;

        match self {
            Todo => None,
            TodoAttr(spair) => Some(spair.symbol()),

            Open(_, _) | Close(_, _) => None,

            BindIdent(spair) | Ref(spair) | Desc(spair) | Text(spair) => {
                Some(spair.symbol())
            }
        }
    }
}

impl Functor<SymbolId> for Nir {
    /// Map over a token's [`SymbolId`].
    ///
    /// This allows modifying a token's [`SymbolId`] while retaining the
    ///   associated [`Span`].
    /// This is the desired behavior when modifying the source code the user
    ///   entered,
    ///     since diagnostic messages will reference the original source
    ///     location that the modification was derived from.
    ///
    /// If a token does not contain a symbol,
    ///   this returns the token unchanged.
    ///
    /// See also [`Nir::symbol`] if you only wish to retrieve the symbol
    ///   rather than map over it.
    fn map(self, f: impl FnOnce(SymbolId) -> SymbolId) -> Self {
        use Nir::*;

        match self {
            Todo => self,
            TodoAttr(spair) => TodoAttr(spair.map(f)),

            Open(_, _) | Close(_, _) => self,

            BindIdent(spair) => BindIdent(spair.map(f)),
            Ref(spair) => Ref(spair.map(f)),
            Desc(spair) => Desc(spair.map(f)),
            Text(spair) => Text(spair.map(f)),
        }
    }
}

/// An object upon which other [`Nir`] tokens act.
#[derive(Debug, PartialEq, Eq)]
pub enum NirEntity {
    /// Package of identifiers.
    Package,

    /// Rate (verb) block,
    ///   representing a calculation with a scalar result.
    Rate,
    /// Summation (Σ) expression.
    Sum,
    /// Product (Π) expression.
    Product,
    /// Ceiling (⌈) expression.
    Ceil,
    /// Floor (⌊) expression.
    Floor,

    // Classification.
    Classify,
    /// Conjunctive (∧) expression.
    All,
    /// Disjunctive (∨) expression.
    Any,

    /// Template.
    Tpl,

    /// Template application (long form).
    TplApply,
    /// Template parameter (long form).
    TplParam,

    /// Template application (shorthand).
    TplApplyShort(QName),
    /// Template parameter (shorthand).
    ///
    /// The non-`@`-padded name is the first of the pair and the value as
    ///   the second.
    ///
    /// A shorthand param is implicitly closed and should not have a
    ///   matching [`Nir::Close`] token.
    TplParamShort(SPair, SPair),
}

impl NirEntity {
    pub fn open<S: Into<Span>>(self, span: S) -> Nir {
        Nir::Open(self, span.into())
    }

    pub fn close<S: Into<Span>>(self, span: S) -> Nir {
        Nir::Close(self, span.into())
    }
}

impl Display for NirEntity {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        use NirEntity::*;

        match self {
            Package => write!(f, "package"),

            Rate => write!(f, "rate block"),
            Sum => write!(f, "sum (∑) expression"),
            Product => write!(f, "product (Π) expression"),
            Ceil => write!(f, "ceiling (⌈) expression"),
            Floor => write!(f, "floor (⌊) expression"),

            Classify => write!(f, "classification"),
            All => write!(f, "conjunctive (∧) expression"),
            Any => write!(f, "disjunctive (∨) expression"),

            Tpl => write!(f, "template"),
            TplParam => write!(f, "template param (metavariable)"),
            TplParamShort(name, val) => write!(
                f,
                "shorthand template param key {} with value {}",
                TtQuote::wrap(name),
                TtQuote::wrap(val),
            ),
            TplApply => {
                write!(f, "full template application and expansion")
            }
            TplApplyShort(qname) => write!(
                f,
                "full shorthand template application and expansion of {}",
                TtQuote::wrap(qname.local_name())
            ),
        }
    }
}

impl Token for Nir {
    fn ir_name() -> &'static str {
        "NIR"
    }

    /// Identifying span of a token.
    ///
    /// An _identifying span_ is a selection of one of the (potentially
    ///   many) spans associated with a token that is most likely to be
    ///   associated with the identity of that token.
    fn span(&self) -> Span {
        use Nir::*;

        match self {
            Todo => UNKNOWN_SPAN,
            TodoAttr(spair) => spair.span(),

            Open(_, span) => *span,
            Close(_, span) => *span,

            BindIdent(spair) | Ref(spair) | Desc(spair) | Text(spair) => {
                spair.span()
            }
        }
    }
}

impl Object for Nir {}

impl Display for Nir {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        use Nir::*;

        match self {
            Todo => write!(f, "TODO"),
            TodoAttr(spair) => write!(f, "TODO Attr {spair}"),

            Open(entity, _) => write!(f, "open {entity} entity"),
            Close(entity, _) => write!(f, "close {entity} entity"),
            BindIdent(spair) => {
                write!(f, "bind identifier {}", TtQuote::wrap(spair))
            }
            Ref(spair) => write!(f, "ref {}", TtQuote::wrap(spair)),

            // TODO: TtQuote doesn't yet escape quotes at the time of writing!
            Desc(spair) => write!(f, "description {}", TtQuote::wrap(spair)),

            // TODO: Not yet safe to output arbitrary text;
            //   need to determine how to handle newlines and other types of
            //   output.
            Text(_) => write!(f, "text"),
        }
    }
}

impl<E> From<Nir> for Result<Nir, E> {
    fn from(val: Nir) -> Self {
        Ok(val)
    }
}

/// Tag representing the type of a NIR value.
///
/// NIR values originate from attributes,
///   which are refined into types as enough information becomes available.
/// Value parsing must be deferred if a value requires desugaring or
///   metavalue expansion.
#[derive(Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum NirSymbolTy {
    AnyIdent,
    BooleanLiteral,
    ClassIdent,
    ClassIdentList,
    ConstIdent,
    DescLiteral,
    Dim,
    DynNodeLiteral,
    FuncIdent,
    IdentDtype,
    IdentType,
    MapTransformLiteral,
    NumLiteral,
    ParamDefault,
    ParamIdent,
    ParamName,
    ParamType,
    PkgPath,
    ShortDimNumLiteral,
    StringLiteral,
    SymbolTableKey,
    TexMathLiteral,
    Title,
    TplMetaIdent,
    TplIdent,
    TplParamIdent,
    TypeIdent,
    ValueIdent,
}

impl Display for NirSymbolTy {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        use NirSymbolTy::*;

        match self {
            AnyIdent => write!(f, "any identifier"),
            BooleanLiteral => write!(
                f,
                "boolean literal {fmt_true} or {fmt_false}",
                fmt_true = TtQuote::wrap("true"),
                fmt_false = TtQuote::wrap("false"),
            ),
            ClassIdent => write!(f, "classification identifier"),
            ClassIdentList => {
                write!(f, "space-delimited list of classification identifiers")
            }
            ConstIdent => write!(f, "constant identifier"),
            DescLiteral => write!(f, "description literal"),
            Dim => write!(f, "dimension declaration"),
            DynNodeLiteral => write!(f, "dynamic node literal"),
            FuncIdent => write!(f, "function identifier"),
            IdentDtype => write!(f, "identifier primitive datatype"),
            IdentType => write!(f, "identifier type"),
            MapTransformLiteral => write!(f, "map transformation literal"),
            NumLiteral => write!(f, "numeric literal"),
            ParamDefault => write!(f, "param default"),
            ParamIdent => write!(f, "param identifier"),
            ParamName => write!(f, "param name"),
            ParamType => write!(f, "param type"),
            PkgPath => write!(f, "package path"),
            ShortDimNumLiteral => {
                write!(f, "short-hand dimensionalized numeric literal")
            }
            StringLiteral => write!(f, "string literal"),
            SymbolTableKey => write!(f, "symbol table key name"),
            TexMathLiteral => write!(f, "TeX math literal"),
            Title => write!(f, "title"),
            TplMetaIdent => write!(f, "template metadata identifier"),
            TplIdent => write!(f, "template name"),
            TplParamIdent => write!(f, "template param identifier"),
            TypeIdent => write!(f, "type identifier"),
            ValueIdent => write!(f, "value identifier"),
        }
    }
}

/// A ([`SymbolId`], [`Span`]) pair representing an
///   attribute value that may need to be interpreted within the context of
///   a template application.
///
/// _This object must be kept small_,
///   since it is used in objects that aggregate portions of the token
///   stream,
///     which must persist in memory for a short period of time,
///     and therefore cannot be optimized away as other portions of the IR.
#[derive(Debug, PartialEq, Eq)]
pub struct NirSymbol<const TY: NirSymbolTy>(SymbolId, Span);

impl<const TY: NirSymbolTy> Token for NirSymbol<TY> {
    fn ir_name() -> &'static str {
        // TODO: Include type?
        "NIR Symbol"
    }

    fn span(&self) -> Span {
        match self {
            Self(_, span) => *span,
        }
    }
}

impl<const TY: NirSymbolTy> Display for NirSymbol<TY> {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
            Self(sym, _span) => {
                write!(f, "{TY} {fmt_sym}", fmt_sym = TtQuote::wrap(sym),)
            }
        }
    }
}

impl<const TY: NirSymbolTy> From<(SymbolId, Span)> for NirSymbol<TY> {
    fn from((val, span): (SymbolId, Span)) -> Self {
        Self(val, span)
    }
}

impl<const TY: NirSymbolTy> From<Attr> for NirSymbol<TY> {
    fn from(attr: Attr) -> Self {
        match attr {
            Attr(_, val, AttrSpan(_, vspan)) => (val, vspan).into(),
        }
    }
}

// Force developer to be conscious of any changes in size;
//   see `NirSymbol` docs for more information.
assert_eq_size!(NirSymbol<{ NirSymbolTy::AnyIdent }>, (SymbolId, Span));

#[derive(Debug, PartialEq, Eq)]
pub enum PkgType {
    /// Package is intended to produce an executable program.
    ///
    /// This is specified by the `rater` root node.
    Prog,
    /// Package is intended to be imported as a component of a larger
    ///   program.
    Mod,
}

#[derive(Debug, PartialEq, Eq)]
pub struct Literal<const S: SymbolId>;

impl<const S: SymbolId> TryFrom<Attr> for Literal<S> {
    type Error = NirAttrParseError;

    fn try_from(attr: Attr) -> Result<Self, Self::Error> {
        match attr {
            Attr(_, val, _) if val == S => Ok(Literal),
            Attr(name, _, aspan) => Err(NirAttrParseError::LiteralMismatch(
                name,
                aspan.value_span(),
                S,
            )),
        }
    }
}

impl From<Infallible> for NirAttrParseError {
    fn from(x: Infallible) -> Self {
        match x {}
    }
}

type ExpectedSymbolId = SymbolId;

#[derive(Debug, PartialEq, Eq)]
pub enum NirAttrParseError {
    LiteralMismatch(QName, Span, ExpectedSymbolId),
}

impl Error for NirAttrParseError {
    fn source(&self) -> Option<&(dyn Error + 'static)> {
        None
    }
}

impl Display for NirAttrParseError {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
            Self::LiteralMismatch(name, _, _) => {
                write!(f, "unexpected value for {}", TtXmlAttr::wrap(name),)
            }
        }
    }
}

impl Diagnostic for NirAttrParseError {
    fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
        match self {
            Self::LiteralMismatch(_, span, expected) => span
                .error(format!("expecting {}", TtQuote::wrap(expected)))
                .into(),
        }
    }
}
-												tamer: nir: Sugared and plain flavors

This introduces the concept of sugared NIR and provides the boilerplate for
a desugaring pass.  The earlier commits dealing with cleaning up the
lowering pipeline were to support this work, in particular to ensure that
reporting and recovery properly applied to this lowering operation without
adding a ton more boilerplate.

DEV-13158

											
										
										
											2022-10-19 10:00:08 -04:00
+								// IR that is "near" the source code.
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								//
-												Copyright year and name update

Ryan Specialty Group (RSG) rebranded to Ryan Specialty after its IPO.

											
										
										
											2023-01-17 23:09:25 -05:00
+								//  Copyright (C) 2014-2023 Ryan Specialty, LLC.
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								//
 								//  This file is part of TAME.
 								//
 								//  This program is free software: you can redistribute it and/or modify
 								//  it under the terms of the GNU General Public License as published by
 								//  the Free Software Foundation, either version 3 of the License, or
 								//  (at your option) any later version.
 								//
 								//  This program is distributed in the hope that it will be useful,
 								//  but WITHOUT ANY WARRANTY; without even the implied warranty of
 								//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								//  GNU General Public License for more details.
 								//
 								//  You should have received a copy of the GNU General Public License
 								//  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-												tamer: nir: Re-define "NIR"

This was originally the "noramlized" IR, but that's not possible to do
without template expansion, which is going to happen at a later point.  So,
this is just "NIR", pronounced "near", which is an IR that is "near" to the
source code.  You can define it was "Near IR" if you want, but it's just a
homonym with a not-quite-defined acronym to me.

DEV-7145

											
										
										
											2022-09-16 09:59:38 -04:00
+								//! An IR that is "near" the source code.
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								//!
 								//! This IR is "near" the source code written by the user,
-												tamer: nir: Re-define "NIR"

This was originally the "noramlized" IR, but that's not possible to do
without template expansion, which is going to happen at a later point.  So,
this is just "NIR", pronounced "near", which is an IR that is "near" to the
source code.  You can define it was "Near IR" if you want, but it's just a
homonym with a not-quite-defined acronym to me.

DEV-7145

											
										
										
											2022-09-16 09:59:38 -04:00
+								//!   performing only basic normalization tasks like desugaring.
 								//! It takes a verbose input language and translates it into a much more
 								//!   concise internal representation.
 								//! The hope is that most desugaring will be done by templates in the future.
 								//!
 								//! NIR cannot completely normalize the source input because it does not
 								//!   have enough information to do so---the
 								//!     template system requires a compile-time interpreter that is beyond
 								//!     the capabilities of NIR,
 								//!       and so a final normalization pass must be done later on in the
 								//!       lowering pipeline.
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								//!
 								//! This is a streaming IR,
 								//!   meaning that the equivalent AST is not explicitly represented as a
 								//!   tree structure in memory.
 								//!
-												tamer: nir: Re-define "NIR"

This was originally the "noramlized" IR, but that's not possible to do
without template expansion, which is going to happen at a later point.  So,
this is just "NIR", pronounced "near", which is an IR that is "near" to the
source code.  You can define it was "Near IR" if you want, but it's just a
homonym with a not-quite-defined acronym to me.

DEV-7145

											
										
										
											2022-09-16 09:59:38 -04:00
+								//! NIR is lossy and does not retain enough information for code
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								//!   formatting---that
 								//!     type of operation will require a mapping between
 								//!     XIRF and NIR,
 								//!       where the latter is used to gather enough context for formatting
 								//!       and the former is used as a concrete representation of what the user
 								//!       actually typed.
-												tamer: nir::parse: Grammar summary docs

This is intended to provide just enough information to help elucidate how
the system works and why.

DEV-7145

											
										
										
											2022-09-19 09:22:07 -04:00
+								//!
 								//! For more information on the parser,
 								//!   see [`parse`].
 								//! The entry point for NIR in the lowering pipeline is exported as
 								//!   [`XirfToNir`].
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
-												tamer: tamec: Introduce NIR->AIR->ASG lowering

This does not yet yield the produces ASG, but does set up the lowering
pipeline to prepare to produce it.  It's also currently a no-op, with
`NirToAsg` just yielding `Incomplete`.

The goal is to begin to move toward vertical slices for TAMER as I start to
return to the previous approach of a handoff with the old compiler.  Now
that I've gained clarity from my previous failed approach (which I
documented in previous commits), I feel that this is the best way forward
that will allow me to incrementally introduce more fine-grained performance
improvements, at the cost of some throwaway work as this progresses.  But
the cost of delay with these build times is far greater.

DEV-13429

											
										
										
											2022-12-13 13:34:52 -05:00
+								mod air;
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								mod interp;
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								mod parse;
-												tamer: Introduce desugaring operation for shorthand template application

This moves translation from NirToAir into TplShortDesugar, and changes the
output from AIR to NIR.

This is going to be much easier to reason about as a desugaring
operation (and indeed that's always how TAME has implemented it, in XSLT);
this keeps the complexity isolated.

Ideally, NirToAir wouldn't even accept tokens that it can't handle, but
that's going to take quite a bit more work and I don't have the time right
now.  Instead, we'll fail at runtime with some hopefully-useful
information.  It shouldn't actually happen in practice.

DEV-13708

											
										
										
											2023-03-21 14:38:07 -04:00
+								mod tplshort;
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
 								use crate::{
 								    diagnose::{Annotate, Diagnostic},
-												tamer: f::Functor: New trait

This commit is purposefully coupled with changes that utilize it to
demonstrate that the need for this abstraction has been _derived_, not
forced; TAMER doesn't aim to be functional for the sake of it, since
idiomatic Rust achieves many of its benefits without the formalisms.

But, the formalisms do occasionally help, and this is one such
example.  There is other existing code that can be refactored to take
advantage of this style as well.

I do _not_ wish to pull an existing functional dependency into TAMER; I want
to keep these abstractions light, and eliminate them as necessary, as Rust
continues to integrate new features into its core.  I also want to be able
to modify the abstractions to suit our particular needs.  (This is _not_ a
general recommendation; it's particular to TAMER and to my experience.)

This implementation of `Functor` is one such example.  While it is modeled
after Haskell in that it provides `fmap`, the primitive here is instead
`map`, with `fmap` derived from it, since `map` allows for better use of
Rust idioms.  Furthermore, it's polymorphic over _trait_ type parameters,
not method, allowing for separate trait impls for different container types,
which can in turn be inferred by Rust and allow for some very concise
mapping; this is particularly important for TAMER because of the disciplined
use of newtypes.

For example, `foo.overwrite(span)` and `foo.overwrite(name)` are both
self-documenting, and better alternatives than, say, `foo.map_span(|_|
span)` and `foo.map_symbol(|_| name)`; the latter are perfectly clear in
what they do, but lack a layer of abstraction, and are verbose.  But the
clarity of the _new_ form does rely on either good naming conventions of
arguments, or explicit type annotations using turbofish notation if
necessary.

This will be implemented on core Rust types as appropriate and as
possible.  At the time of writing, we do not yet have trait specialization,
and there's too many soundness issues for me to be comfortable enabling it,
so that limits that we can do with something like, say, a generic `Result`,
while also allowing for specialized implementations based on newtypes.

DEV-13160

											
										
										
											2023-01-04 12:30:18 -05:00
+								    f::Functor,
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								    fmt::{DisplayWrapper, TtQuote},
-												tamer: xir::parse::ele::ele_parse!: Integrate `attr_parse_stream!`

This handles the bulk of the integration of the new `attr_parse_stream!` as
a replacement for `attr_parse!`, which moves from aggregate attribute
objects to a stream of attribute-derived tokens.  Rationale for this change
is in the preceding commit messages.

The first striking change here is how it affects the test cases: nearly all
`Incomplete`s are removed.  Note that the parser has an existing
optimization whereby `Incomplete` with lookahead causes immediate recursion
within `Parser`, since those situations are used only for control flow and
to keep recursion out of `ParseState`s.

Next: this removes types from `nir::parse`'s grammar for attributes.  The
types will instead be derived from NIR tokens later in the lowering
pipeline.  This simplifies NIR considerably, since adding types into the mix
at this point was taking an already really complex lowering phase and making
it ever more difficult to reason about and get everything working together
the way that I needed.

Because of `attr_parse_stream!`, there are no more required attribute
checks.  Those will be handled later in the lowering pipeline, if they're
actually needed in context, with possibly one exception: namespace
declarations.  Those are really part of the document and they ought to be
handled _earlier_ in the pipeline; I'll do that at some point.  It's not
required for compilation; it's just required to maintain compliance with the
XML spec.

We also lose checks for duplicate attributes.  This is also something that
ought to be handled at the document level, and so earlier in the pipeline,
since XML cares, not us---if we get a duplicate attribute that results in an
extra NIR token, then the next parser will error out, since it has to check
for those things anyway.

A bunch of cleanup and simplification is still needed; I want to get the
initial integration committed first.  It's a shame I'm getting rid of so
much work, but this is the right approach, and results in a much simpler
system.

DEV-13346

											
										
										
											2022-11-30 23:52:18 -05:00
+								    parse::{util::SPair, Object, Token},
-												tamer: nir: Remove token `todo!`s

Just preparing to actually define NIR itself.  The _grammar_ has been
represented (derived from our internal systems, using them as a test case),
but the IR itself has not yet received a definition.

DEV-7145

											
										
										
											2022-09-19 16:21:41 -04:00
+								    span::{Span, UNKNOWN_SPAN},
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								    sym::SymbolId,
-												tamer: nir: Detect interpolated values

This simply detects whether a value will need to be further parsed for
interpolation; it does not yet perform the parsing itself, which will happen
during desugaring.

This introduces a performance regression, for an interesting reason.  I
found that introducing a single new variant to `SugaredNir` (with a
`(SymbolId, Span)` pair), was causing the width of the `NirParseState` type
to increase just enough to cause Rust to be unable to optimize away a
significant number of memcpys related to `Parser` moves, and consequently
reducing performance by nearly 50% for `tamec`.  Yikes.

I suspected this would be a problem, and indeed have tried in all other
cases to avoid aggregation until the ASG---the problem is that I had wanted
to aggregate attributes for NIR so that the IR could actually make some
progress toward simplifying the stream (and therefore working with the
data), and be able to validate against a grammar defined in a single
place.  The problem is that the `NirParseState` type contains a sum type for
every attribute parser, and is therefore as wide as the largest one.  That
is what Rust is having trouble optimizing memcpy away for.

Indeed, reducing the number of attributes improves the situation
drastically.  However, it doesn't make it go away entirely.

If you look at a callgrind profile for `tameld` (or a dissassembly), you'll
notice that I put quite a bit of effort into ensuring that the hot code path
for the lowering pipeline contains _no_ memcpys for the parsers.  But that
is not the case with `tamec`---I had to move on.  But I do still have the
same escape hatch that I introduced for `tameld`, which is the mutable
`Context`.

It seems that may be the solution there too, but I want to get a bit further
along first to see how these data end up propagating before I go through
that somewhat significant effort.

DEV-13156

											
										
										
											2022-11-01 14:30:34 -04:00
+								    xir::{
 								        attr::{Attr, AttrSpan},
 								        fmt::TtXmlAttr,
 								        QName,
 								    },
 								};
 								use std::{
 								    convert::Infallible,
 								    error::Error,
 								    fmt::{Debug, Display},
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								};
-												tamer: tamec: Introduce NIR->AIR->ASG lowering

This does not yet yield the produces ASG, but does set up the lowering
pipeline to prepare to produce it.  It's also currently a no-op, with
`NirToAsg` just yielding `Incomplete`.

The goal is to begin to move toward vertical slices for TAMER as I start to
return to the previous approach of a handoff with the old compiler.  Now
that I've gained clarity from my previous failed approach (which I
documented in previous commits), I feel that this is the best way forward
that will allow me to incrementally introduce more fine-grained performance
improvements, at the cost of some throwaway work as this progresses.  But
the cost of delay with these build times is far greater.

DEV-13429

											
										
										
											2022-12-13 13:34:52 -05:00
+								pub use air::{NirToAir, NirToAirError};
-												tamer: nir::interp: Integrate NIR interpolation into lowering pipeline

This is the culmination of all the recent work---the third attempt at trying
to integrate this.  It ended up much cleaner than what was originally going
to be done, but only after gutting portions of the system and changing my
approach to how NIR is parsed (WRT attributes).  See prior commits for more
information.

The final step is to fill the error branches with actual errors rather than
`todo!`s.

What a relief.

DEV-13156

											
										
										
											2022-12-05 16:32:00 -05:00
+								pub use interp::{InterpError, InterpState as InterpolateNir};
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								pub use parse::{
 								    NirParseState as XirfToNir, NirParseStateError_ as XirfToNirError,
 								};
-												tamer: Introduce desugaring operation for shorthand template application

This moves translation from NirToAir into TplShortDesugar, and changes the
output from AIR to NIR.

This is going to be much easier to reason about as a desugaring
operation (and indeed that's always how TAME has implemented it, in XSLT);
this keeps the complexity isolated.

Ideally, NirToAir wouldn't even accept tokens that it can't handle, but
that's going to take quite a bit more work and I don't have the time right
now.  Instead, we'll fail at runtime with some hopefully-useful
information.  It shouldn't actually happen in practice.

DEV-13708

											
										
										
											2023-03-21 14:38:07 -04:00
+								pub use tplshort::TplShortDesugar;
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								/// IR that is "near" the source code.
-												tamer: nir: Sugared and plain flavors

This introduces the concept of sugared NIR and provides the boilerplate for
a desugaring pass.  The earlier commits dealing with cleaning up the
lowering pipeline were to support this work, in particular to ensure that
reporting and recovery properly applied to this lowering operation without
adding a ton more boilerplate.

DEV-13158

											
										
										
											2022-10-19 10:00:08 -04:00
+								///
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								/// This represents the language of TAME after it has been extracted from
 								///   whatever document encloses it
 								///     (e.g. XML).
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								#[derive(Debug, PartialEq, Eq)]
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								pub enum Nir {
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								    Todo,
-												tamer: xir::parse::ele::ele_parse!: Integrate `attr_parse_stream!`

This handles the bulk of the integration of the new `attr_parse_stream!` as
a replacement for `attr_parse!`, which moves from aggregate attribute
objects to a stream of attribute-derived tokens.  Rationale for this change
is in the preceding commit messages.

The first striking change here is how it affects the test cases: nearly all
`Incomplete`s are removed.  Note that the parser has an existing
optimization whereby `Incomplete` with lookahead causes immediate recursion
within `Parser`, since those situations are used only for control flow and
to keep recursion out of `ParseState`s.

Next: this removes types from `nir::parse`'s grammar for attributes.  The
types will instead be derived from NIR tokens later in the lowering
pipeline.  This simplifies NIR considerably, since adding types into the mix
at this point was taking an already really complex lowering phase and making
it ever more difficult to reason about and get everything working together
the way that I needed.

Because of `attr_parse_stream!`, there are no more required attribute
checks.  Those will be handled later in the lowering pipeline, if they're
actually needed in context, with possibly one exception: namespace
declarations.  Those are really part of the document and they ought to be
handled _earlier_ in the pipeline; I'll do that at some point.  It's not
required for compilation; it's just required to maintain compliance with the
XML spec.

We also lose checks for duplicate attributes.  This is also something that
ought to be handled at the document level, and so earlier in the pipeline,
since XML cares, not us---if we get a duplicate attribute that results in an
extra NIR token, then the next parser will error out, since it has to check
for those things anyway.

A bunch of cleanup and simplification is still needed; I want to get the
initial integration committed first.  It's a shame I'm getting rid of so
much work, but this is the right approach, and results in a much simpler
system.

DEV-13346

											
										
										
											2022-11-30 23:52:18 -05:00
+								    TodoAttr(SPair),
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
+								    /// Begin the definition of some [`NirEntity`] and place it atop of the
 								    ///   stack.
 								    Open(NirEntity, Span),
 								    /// Finish definition of a [`NirEntity`] atop of the stack and pop it.
-												tamer: nir, asg: Introduce package to ASG

This does not yet create edges from identifiers to the package; just getting
this introduced was quite a bit of work, so I want to get this committed.

Note that this also includes a change to NIR so that `Close` contains the
entity so that we can pattern-match for AIR transformations rather than
retaining yet another stack with checks that are already going to be done by
AIR.  This makes NIR stand less on its own from a self-validation point, but
that's okay, given that it's the language that the user entered and,
conceptually, they could enter invalid NIR the same as they enter invalid
XML (e.g. from a REPL).

In _practice_, of course, NIR is lowered from XML and the schema is enforced
during that lowering and so the validation does exist as part of that
parsing.

These concessions speak more to the verbosity of the language (Rust) than
anything.

DEV-13159

											
										
										
											2023-01-30 16:51:24 -05:00
+								    Close(NirEntity, Span),
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
 								    /// Bind the given name as an identifier for the entity atop of the
 								    ///   stack.
 								    BindIdent(SPair),
 								    /// Reference the value of the given identifier.
 								    ///
 								    /// Permissible identifiers and values depend on the context in which
 								    ///   this appears.
 								    Ref(SPair),
 								    /// Describe the [`NirEntity`] atop of the stack.
 								    Desc(SPair),
 								    /// A string literal.
 								    ///
 								    /// The meaning of this string depends on context.
 								    /// For example,
 								    ///   it may represent literate documentation or a literal in a
 								    ///   metavariable definition.
 								    Text(SPair),
 								}
-												tamer: nir::interp: Integrate NIR interpolation into lowering pipeline

This is the culmination of all the recent work---the third attempt at trying
to integrate this.  It ended up much cleaner than what was originally going
to be done, but only after gutting portions of the system and changing my
approach to how NIR is parsed (WRT attributes).  See prior commits for more
information.

The final step is to fill the error branches with actual errors rather than
`todo!`s.

What a relief.

DEV-13156

											
										
										
											2022-12-05 16:32:00 -05:00
+								impl Nir {
 								    /// Retrieve inner [`SymbolId`] that this token represents,
 								    ///   if any.
 								    ///
 								    /// Not all NIR tokens contain associated symbols;
 								    ///   a token's [`SymbolId`] is retained only if it provides additional
 								    ///   information over the token itself.
 								    ///
 								    /// See also [`Nir::map`] if you wish to change the symbol.
 								    pub fn symbol(&self) -> Option<SymbolId> {
 								        use Nir::*;
 								        match self {
 								            Todo => None,
 								            TodoAttr(spair) => Some(spair.symbol()),
-												tamer: nir, asg: Introduce package to ASG

This does not yet create edges from identifiers to the package; just getting
this introduced was quite a bit of work, so I want to get this committed.

Note that this also includes a change to NIR so that `Close` contains the
entity so that we can pattern-match for AIR transformations rather than
retaining yet another stack with checks that are already going to be done by
AIR.  This makes NIR stand less on its own from a self-validation point, but
that's okay, given that it's the language that the user entered and,
conceptually, they could enter invalid NIR the same as they enter invalid
XML (e.g. from a REPL).

In _practice_, of course, NIR is lowered from XML and the schema is enforced
during that lowering and so the validation does exist as part of that
parsing.

These concessions speak more to the verbosity of the language (Rust) than
anything.

DEV-13159

											
										
										
											2023-01-30 16:51:24 -05:00
+								            Open(_, _) | Close(_, _) => None,
-												tamer: nir::interp: Integrate NIR interpolation into lowering pipeline

This is the culmination of all the recent work---the third attempt at trying
to integrate this.  It ended up much cleaner than what was originally going
to be done, but only after gutting portions of the system and changing my
approach to how NIR is parsed (WRT attributes).  See prior commits for more
information.

The final step is to fill the error branches with actual errors rather than
`todo!`s.

What a relief.

DEV-13156

											
										
										
											2022-12-05 16:32:00 -05:00
 								            BindIdent(spair) | Ref(spair) | Desc(spair) | Text(spair) => {
 								                Some(spair.symbol())
 								            }
 								        }
 								    }
-												tamer: f::Functor: New trait

This commit is purposefully coupled with changes that utilize it to
demonstrate that the need for this abstraction has been _derived_, not
forced; TAMER doesn't aim to be functional for the sake of it, since
idiomatic Rust achieves many of its benefits without the formalisms.

But, the formalisms do occasionally help, and this is one such
example.  There is other existing code that can be refactored to take
advantage of this style as well.

I do _not_ wish to pull an existing functional dependency into TAMER; I want
to keep these abstractions light, and eliminate them as necessary, as Rust
continues to integrate new features into its core.  I also want to be able
to modify the abstractions to suit our particular needs.  (This is _not_ a
general recommendation; it's particular to TAMER and to my experience.)

This implementation of `Functor` is one such example.  While it is modeled
after Haskell in that it provides `fmap`, the primitive here is instead
`map`, with `fmap` derived from it, since `map` allows for better use of
Rust idioms.  Furthermore, it's polymorphic over _trait_ type parameters,
not method, allowing for separate trait impls for different container types,
which can in turn be inferred by Rust and allow for some very concise
mapping; this is particularly important for TAMER because of the disciplined
use of newtypes.

For example, `foo.overwrite(span)` and `foo.overwrite(name)` are both
self-documenting, and better alternatives than, say, `foo.map_span(|_|
span)` and `foo.map_symbol(|_| name)`; the latter are perfectly clear in
what they do, but lack a layer of abstraction, and are verbose.  But the
clarity of the _new_ form does rely on either good naming conventions of
arguments, or explicit type annotations using turbofish notation if
necessary.

This will be implemented on core Rust types as appropriate and as
possible.  At the time of writing, we do not yet have trait specialization,
and there's too many soundness issues for me to be comfortable enabling it,
so that limits that we can do with something like, say, a generic `Result`,
while also allowing for specialized implementations based on newtypes.

DEV-13160

											
										
										
											2023-01-04 12:30:18 -05:00
+								}
-												tamer: nir::interp: Integrate NIR interpolation into lowering pipeline

This is the culmination of all the recent work---the third attempt at trying
to integrate this.  It ended up much cleaner than what was originally going
to be done, but only after gutting portions of the system and changing my
approach to how NIR is parsed (WRT attributes).  See prior commits for more
information.

The final step is to fill the error branches with actual errors rather than
`todo!`s.

What a relief.

DEV-13156

											
										
										
											2022-12-05 16:32:00 -05:00
-												tamer: f::Functor: New trait

This commit is purposefully coupled with changes that utilize it to
demonstrate that the need for this abstraction has been _derived_, not
forced; TAMER doesn't aim to be functional for the sake of it, since
idiomatic Rust achieves many of its benefits without the formalisms.

But, the formalisms do occasionally help, and this is one such
example.  There is other existing code that can be refactored to take
advantage of this style as well.

I do _not_ wish to pull an existing functional dependency into TAMER; I want
to keep these abstractions light, and eliminate them as necessary, as Rust
continues to integrate new features into its core.  I also want to be able
to modify the abstractions to suit our particular needs.  (This is _not_ a
general recommendation; it's particular to TAMER and to my experience.)

This implementation of `Functor` is one such example.  While it is modeled
after Haskell in that it provides `fmap`, the primitive here is instead
`map`, with `fmap` derived from it, since `map` allows for better use of
Rust idioms.  Furthermore, it's polymorphic over _trait_ type parameters,
not method, allowing for separate trait impls for different container types,
which can in turn be inferred by Rust and allow for some very concise
mapping; this is particularly important for TAMER because of the disciplined
use of newtypes.

For example, `foo.overwrite(span)` and `foo.overwrite(name)` are both
self-documenting, and better alternatives than, say, `foo.map_span(|_|
span)` and `foo.map_symbol(|_| name)`; the latter are perfectly clear in
what they do, but lack a layer of abstraction, and are verbose.  But the
clarity of the _new_ form does rely on either good naming conventions of
arguments, or explicit type annotations using turbofish notation if
necessary.

This will be implemented on core Rust types as appropriate and as
possible.  At the time of writing, we do not yet have trait specialization,
and there's too many soundness issues for me to be comfortable enabling it,
so that limits that we can do with something like, say, a generic `Result`,
while also allowing for specialized implementations based on newtypes.

DEV-13160

											
										
										
											2023-01-04 12:30:18 -05:00
+								impl Functor<SymbolId> for Nir {
-												tamer: nir::interp: Integrate NIR interpolation into lowering pipeline

This is the culmination of all the recent work---the third attempt at trying
to integrate this.  It ended up much cleaner than what was originally going
to be done, but only after gutting portions of the system and changing my
approach to how NIR is parsed (WRT attributes).  See prior commits for more
information.

The final step is to fill the error branches with actual errors rather than
`todo!`s.

What a relief.

DEV-13156

											
										
										
											2022-12-05 16:32:00 -05:00
+								    /// Map over a token's [`SymbolId`].
 								    ///
 								    /// This allows modifying a token's [`SymbolId`] while retaining the
 								    ///   associated [`Span`].
 								    /// This is the desired behavior when modifying the source code the user
 								    ///   entered,
 								    ///     since diagnostic messages will reference the original source
 								    ///     location that the modification was derived from.
 								    ///
 								    /// If a token does not contain a symbol,
 								    ///   this returns the token unchanged.
 								    ///
 								    /// See also [`Nir::symbol`] if you only wish to retrieve the symbol
 								    ///   rather than map over it.
-												tamer: f::Functor: New trait

This commit is purposefully coupled with changes that utilize it to
demonstrate that the need for this abstraction has been _derived_, not
forced; TAMER doesn't aim to be functional for the sake of it, since
idiomatic Rust achieves many of its benefits without the formalisms.

But, the formalisms do occasionally help, and this is one such
example.  There is other existing code that can be refactored to take
advantage of this style as well.

I do _not_ wish to pull an existing functional dependency into TAMER; I want
to keep these abstractions light, and eliminate them as necessary, as Rust
continues to integrate new features into its core.  I also want to be able
to modify the abstractions to suit our particular needs.  (This is _not_ a
general recommendation; it's particular to TAMER and to my experience.)

This implementation of `Functor` is one such example.  While it is modeled
after Haskell in that it provides `fmap`, the primitive here is instead
`map`, with `fmap` derived from it, since `map` allows for better use of
Rust idioms.  Furthermore, it's polymorphic over _trait_ type parameters,
not method, allowing for separate trait impls for different container types,
which can in turn be inferred by Rust and allow for some very concise
mapping; this is particularly important for TAMER because of the disciplined
use of newtypes.

For example, `foo.overwrite(span)` and `foo.overwrite(name)` are both
self-documenting, and better alternatives than, say, `foo.map_span(|_|
span)` and `foo.map_symbol(|_| name)`; the latter are perfectly clear in
what they do, but lack a layer of abstraction, and are verbose.  But the
clarity of the _new_ form does rely on either good naming conventions of
arguments, or explicit type annotations using turbofish notation if
necessary.

This will be implemented on core Rust types as appropriate and as
possible.  At the time of writing, we do not yet have trait specialization,
and there's too many soundness issues for me to be comfortable enabling it,
so that limits that we can do with something like, say, a generic `Result`,
while also allowing for specialized implementations based on newtypes.

DEV-13160

											
										
										
											2023-01-04 12:30:18 -05:00
+								    fn map(self, f: impl FnOnce(SymbolId) -> SymbolId) -> Self {
-												tamer: nir::interp: Integrate NIR interpolation into lowering pipeline

This is the culmination of all the recent work---the third attempt at trying
to integrate this.  It ended up much cleaner than what was originally going
to be done, but only after gutting portions of the system and changing my
approach to how NIR is parsed (WRT attributes).  See prior commits for more
information.

The final step is to fill the error branches with actual errors rather than
`todo!`s.

What a relief.

DEV-13156

											
										
										
											2022-12-05 16:32:00 -05:00
+								        use Nir::*;
 								        match self {
 								            Todo => self,
 								            TodoAttr(spair) => TodoAttr(spair.map(f)),
-												tamer: nir, asg: Introduce package to ASG

This does not yet create edges from identifiers to the package; just getting
this introduced was quite a bit of work, so I want to get this committed.

Note that this also includes a change to NIR so that `Close` contains the
entity so that we can pattern-match for AIR transformations rather than
retaining yet another stack with checks that are already going to be done by
AIR.  This makes NIR stand less on its own from a self-validation point, but
that's okay, given that it's the language that the user entered and,
conceptually, they could enter invalid NIR the same as they enter invalid
XML (e.g. from a REPL).

In _practice_, of course, NIR is lowered from XML and the schema is enforced
during that lowering and so the validation does exist as part of that
parsing.

These concessions speak more to the verbosity of the language (Rust) than
anything.

DEV-13159

											
										
										
											2023-01-30 16:51:24 -05:00
+								            Open(_, _) | Close(_, _) => self,
-												tamer: nir::interp: Integrate NIR interpolation into lowering pipeline

This is the culmination of all the recent work---the third attempt at trying
to integrate this.  It ended up much cleaner than what was originally going
to be done, but only after gutting portions of the system and changing my
approach to how NIR is parsed (WRT attributes).  See prior commits for more
information.

The final step is to fill the error branches with actual errors rather than
`todo!`s.

What a relief.

DEV-13156

											
										
										
											2022-12-05 16:32:00 -05:00
 								            BindIdent(spair) => BindIdent(spair.map(f)),
 								            Ref(spair) => Ref(spair.map(f)),
 								            Desc(spair) => Desc(spair.map(f)),
 								            Text(spair) => Text(spair.map(f)),
 								        }
 								    }
 								}
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
+								/// An object upon which other [`Nir`] tokens act.
 								#[derive(Debug, PartialEq, Eq)]
 								pub enum NirEntity {
-												tamer: nir, asg: Introduce package to ASG

This does not yet create edges from identifiers to the package; just getting
this introduced was quite a bit of work, so I want to get this committed.

Note that this also includes a change to NIR so that `Close` contains the
entity so that we can pattern-match for AIR transformations rather than
retaining yet another stack with checks that are already going to be done by
AIR.  This makes NIR stand less on its own from a self-validation point, but
that's okay, given that it's the language that the user entered and,
conceptually, they could enter invalid NIR the same as they enter invalid
XML (e.g. from a REPL).

In _practice_, of course, NIR is lowered from XML and the schema is enforced
during that lowering and so the validation does exist as part of that
parsing.

These concessions speak more to the verbosity of the language (Rust) than
anything.

DEV-13159

											
										
										
											2023-01-30 16:51:24 -05:00
+								    /// Package of identifiers.
 								    Package,
-												tamer: nir: Basic rate block translation

This commit is what I've been sitting on for testing some of the recent
changes; it is a very basic demonstration of lowering all the way down
from source XML files into the ASG.  This can be run on real files to
observe, beyond unit tests, how the system reacts.

Once this outputs data from the graph, we'll finally have tamec end-to-end
and can just keep filling the gaps.

I'm hoping to roll the desugaring process into NirToAir rather than having a
separate process as originally planned a couple of months back.

This also introduces the `wip-nir-to-air` feature flag.  Currently,
interpolation will cause a `Nir::BindIdent` to be emitted in blocks that
aren't yet emitting NIR, and so results in an invalid parse.

DEV-13159

											
										
										
											2023-01-23 16:30:25 -05:00
+								    /// Rate (verb) block,
 								    ///   representing a calculation with a scalar result.
 								    Rate,
-												tamer: Lower sum expressions

This was a fairly simple addition, since rate blocks already lower into sum
expressions; these are just non-identified.

This does emphasize that the nir::parse `ele_parse!` abstraction I spent so
much time on ended up not being a perfect fit, as it now has some
boilerplate after it was stripped of much of its capabilities some time ago.

Don't worry, `nir::air` and `asg::graph::xmli` will get cleaned up.

DEV-13708

											
										
										
											2023-02-22 23:45:23 -05:00
+								    /// Summation (Σ) expression.
 								    Sum,
-												tamer: NIR->xmli: Initial product expression

The element only, no attributes yet.

I'll keep forming boilerplate until abstraction points become obvious with
more variety; this is still pretty close to what was already supported.

DEV-13708

											
										
										
											2023-02-24 15:55:25 -05:00
+								    /// Product (Π) expression.
 								    Product,
-												tamer: NIR->xmli: Ceil, Floor expressions

Small break from templates for something easier.  I have COVID-19, so I'll
use that as my excuse for wanting to be more lazy.

The real reason is to see some more concrete progress and ensure that
patterns hold for simple expressions before further refactoring.

But, before I proceed with such refactoring, I really ought to approach
something that requires a NIR desugaring step, like case statements.

DEV-13708

											
										
										
											2023-03-09 22:28:26 -05:00
+								    /// Ceiling (⌈) expression.
 								    Ceil,
 								    /// Floor (⌊) expression.
 								    Floor,
-												tamer: Lower sum expressions

This was a fairly simple addition, since rate blocks already lower into sum
expressions; these are just non-identified.

This does emphasize that the nir::parse `ele_parse!` abstraction I spent so
much time on ended up not being a perfect fit, as it now has some
boilerplate after it was stripped of much of its capabilities some time ago.

Don't worry, `nir::air` and `asg::graph::xmli` will get cleaned up.

DEV-13708

											
										
										
											2023-02-22 23:45:23 -05:00
-												tamer: NIR->xmli: Initial classify, any, all support

Just as `rate` is a `sum`, `classify` is an `all` by default.  The `@any`
attribute will change that interpretation, though I only intend to recognize
that in parsing later on, not emit that in XMLI.

DEV-13708

											
										
										
											2023-02-24 23:01:02 -05:00
+								    // Classification.
 								    Classify,
 								    /// Conjunctive (∧) expression.
 								    All,
 								    /// Disjunctive (∨) expression.
 								    Any,
-												tamer: NIR->xmli template definition setup

This sets the stage for template parsing, and finally decides how we're
going to represent templates on the ASG.  This is going to start simple,
since my original plans for improving how templates are
handled (conceptually) is going to have to wait.

This is the last difficult object type to figure out, with respect to graph
representation and derivation, so I wanted to get it out of the way.

DEV-13708

											
										
										
											2023-02-28 15:31:49 -05:00
+								    /// Template.
 								    Tpl,
-												tamer: nir::NirEntity::TplParam: Optional name/value pair

This will be used for shorthand desugaring.

DEV-13708

											
										
										
											2023-03-21 15:26:17 -04:00
-												tamer: nir: Apply*Short variants

This adds explicit variants for shorthand template application.  This is
less cryptic, and we'll be able to check for the close directly during
desugaring.

DEV-13708

											
										
										
											2023-03-23 22:08:21 -04:00
+								    /// Template application (long form).
 								    TplApply,
 								    /// Template parameter (long form).
 								    TplParam,
 								    /// Template application (shorthand).
 								    TplApplyShort(QName),
 								    /// Template parameter (shorthand).
-												tamer: nir::NirEntity::TplParam: Optional name/value pair

This will be used for shorthand desugaring.

DEV-13708

											
										
										
											2023-03-21 15:26:17 -04:00
+								    ///
-												tamer: nir: Apply*Short variants

This adds explicit variants for shorthand template application.  This is
less cryptic, and we'll be able to check for the close directly during
desugaring.

DEV-13708

											
										
										
											2023-03-23 22:08:21 -04:00
+								    /// The non-`@`-padded name is the first of the pair and the value as
 								    ///   the second.
-												tamer: nir: Desugar shorthand template params and yield AIR

I had intended for this to be a full vertical slice initially, but AIR's
parser is going to need enough work that it'll muddy this patch a bit too
much.

This keeps the desugaring simple, which is what I was hoping for.

The next step is to load it into the graph and emit regenerated longhand
sources.

I also don't like how the namespace prefix is just being ignored for
shorthand param desugaring.  This is also the case in the XSLT-based
compiler, but this violates TAMER's principle that it should parse every bit
of information; nothing should be ignored.  If something does not contribute
useful information, then it is not a useful construct and ought to be
rejected.

DEV-13708

											
										
										
											2023-03-22 10:07:16 -04:00
+								    ///
-												tamer: nir: Apply*Short variants

This adds explicit variants for shorthand template application.  This is
less cryptic, and we'll be able to check for the close directly during
desugaring.

DEV-13708

											
										
										
											2023-03-23 22:08:21 -04:00
+								    /// A shorthand param is implicitly closed and should not have a
-												tamer: nir: Desugar shorthand template params and yield AIR

I had intended for this to be a full vertical slice initially, but AIR's
parser is going to need enough work that it'll muddy this patch a bit too
much.

This keeps the desugaring simple, which is what I was hoping for.

The next step is to load it into the graph and emit regenerated longhand
sources.

I also don't like how the namespace prefix is just being ignored for
shorthand param desugaring.  This is also the case in the XSLT-based
compiler, but this violates TAMER's principle that it should parse every bit
of information; nothing should be ignored.  If something does not contribute
useful information, then it is not a useful construct and ought to be
rejected.

DEV-13708

											
										
										
											2023-03-22 10:07:16 -04:00
+								    ///   matching [`Nir::Close`] token.
-												tamer: nir: Apply*Short variants

This adds explicit variants for shorthand template application.  This is
less cryptic, and we'll be able to check for the close directly during
desugaring.

DEV-13708

											
										
										
											2023-03-23 22:08:21 -04:00
+								    TplParamShort(SPair, SPair),
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								}
-												tamer: nir: Basic rate block translation

This commit is what I've been sitting on for testing some of the recent
changes; it is a very basic demonstration of lowering all the way down
from source XML files into the ASG.  This can be run on real files to
observe, beyond unit tests, how the system reacts.

Once this outputs data from the graph, we'll finally have tamec end-to-end
and can just keep filling the gaps.

I'm hoping to roll the desugaring process into NirToAir rather than having a
separate process as originally planned a couple of months back.

This also introduces the `wip-nir-to-air` feature flag.  Currently,
interpolation will cause a `Nir::BindIdent` to be emitted in blocks that
aren't yet emitting NIR, and so results in an invalid parse.

DEV-13159

											
										
										
											2023-01-23 16:30:25 -05:00
+								impl NirEntity {
 								    pub fn open<S: Into<Span>>(self, span: S) -> Nir {
 								        Nir::Open(self, span.into())
 								    }
-												tamer: nir, asg: Introduce package to ASG

This does not yet create edges from identifiers to the package; just getting
this introduced was quite a bit of work, so I want to get this committed.

Note that this also includes a change to NIR so that `Close` contains the
entity so that we can pattern-match for AIR transformations rather than
retaining yet another stack with checks that are already going to be done by
AIR.  This makes NIR stand less on its own from a self-validation point, but
that's okay, given that it's the language that the user entered and,
conceptually, they could enter invalid NIR the same as they enter invalid
XML (e.g. from a REPL).

In _practice_, of course, NIR is lowered from XML and the schema is enforced
during that lowering and so the validation does exist as part of that
parsing.

These concessions speak more to the verbosity of the language (Rust) than
anything.

DEV-13159

											
										
										
											2023-01-30 16:51:24 -05:00
 								    pub fn close<S: Into<Span>>(self, span: S) -> Nir {
 								        Nir::Close(self, span.into())
 								    }
-												tamer: nir: Basic rate block translation

This commit is what I've been sitting on for testing some of the recent
changes; it is a very basic demonstration of lowering all the way down
from source XML files into the ASG.  This can be run on real files to
observe, beyond unit tests, how the system reacts.

Once this outputs data from the graph, we'll finally have tamec end-to-end
and can just keep filling the gaps.

I'm hoping to roll the desugaring process into NirToAir rather than having a
separate process as originally planned a couple of months back.

This also introduces the `wip-nir-to-air` feature flag.  Currently,
interpolation will cause a `Nir::BindIdent` to be emitted in blocks that
aren't yet emitting NIR, and so results in an invalid parse.

DEV-13159

											
										
										
											2023-01-23 16:30:25 -05:00
+								}
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
+								impl Display for NirEntity {
 								    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 								        use NirEntity::*;
 								        match self {
-												tamer: nir, asg: Introduce package to ASG

This does not yet create edges from identifiers to the package; just getting
this introduced was quite a bit of work, so I want to get this committed.

Note that this also includes a change to NIR so that `Close` contains the
entity so that we can pattern-match for AIR transformations rather than
retaining yet another stack with checks that are already going to be done by
AIR.  This makes NIR stand less on its own from a self-validation point, but
that's okay, given that it's the language that the user entered and,
conceptually, they could enter invalid NIR the same as they enter invalid
XML (e.g. from a REPL).

In _practice_, of course, NIR is lowered from XML and the schema is enforced
during that lowering and so the validation does exist as part of that
parsing.

These concessions speak more to the verbosity of the language (Rust) than
anything.

DEV-13159

											
										
										
											2023-01-30 16:51:24 -05:00
+								            Package => write!(f, "package"),
-												tamer: NIR->xmli: Initial classify, any, all support

Just as `rate` is a `sum`, `classify` is an `all` by default.  The `@any`
attribute will change that interpretation, though I only intend to recognize
that in parsing later on, not emit that in XMLI.

DEV-13708

											
										
										
											2023-02-24 23:01:02 -05:00
-												tamer: nir: Basic rate block translation

This commit is what I've been sitting on for testing some of the recent
changes; it is a very basic demonstration of lowering all the way down
from source XML files into the ASG.  This can be run on real files to
observe, beyond unit tests, how the system reacts.

Once this outputs data from the graph, we'll finally have tamec end-to-end
and can just keep filling the gaps.

I'm hoping to roll the desugaring process into NirToAir rather than having a
separate process as originally planned a couple of months back.

This also introduces the `wip-nir-to-air` feature flag.  Currently,
interpolation will cause a `Nir::BindIdent` to be emitted in blocks that
aren't yet emitting NIR, and so results in an invalid parse.

DEV-13159

											
										
										
											2023-01-23 16:30:25 -05:00
+								            Rate => write!(f, "rate block"),
-												tamer: NIR->xmli: Initial product expression

The element only, no attributes yet.

I'll keep forming boilerplate until abstraction points become obvious with
more variety; this is still pretty close to what was already supported.

DEV-13708

											
										
										
											2023-02-24 15:55:25 -05:00
+								            Sum => write!(f, "sum (∑) expression"),
 								            Product => write!(f, "product (Π) expression"),
-												tamer: NIR->xmli: Ceil, Floor expressions

Small break from templates for something easier.  I have COVID-19, so I'll
use that as my excuse for wanting to be more lazy.

The real reason is to see some more concrete progress and ensure that
patterns hold for simple expressions before further refactoring.

But, before I proceed with such refactoring, I really ought to approach
something that requires a NIR desugaring step, like case statements.

DEV-13708

											
										
										
											2023-03-09 22:28:26 -05:00
+								            Ceil => write!(f, "ceiling (⌈) expression"),
 								            Floor => write!(f, "floor (⌊) expression"),
-												tamer: NIR->xmli: Initial classify, any, all support

Just as `rate` is a `sum`, `classify` is an `all` by default.  The `@any`
attribute will change that interpretation, though I only intend to recognize
that in parsing later on, not emit that in XMLI.

DEV-13708

											
										
										
											2023-02-24 23:01:02 -05:00
 								            Classify => write!(f, "classification"),
 								            All => write!(f, "conjunctive (∧) expression"),
 								            Any => write!(f, "disjunctive (∨) expression"),
-												tamer: NIR->xmli template definition setup

This sets the stage for template parsing, and finally decides how we're
going to represent templates on the ASG.  This is going to start simple,
since my original plans for improving how templates are
handled (conceptually) is going to have to wait.

This is the last difficult object type to figure out, with respect to graph
representation and derivation, so I wanted to get it out of the way.

DEV-13708

											
										
										
											2023-02-28 15:31:49 -05:00
+								            Tpl => write!(f, "template"),
-												tamer: nir: Apply*Short variants

This adds explicit variants for shorthand template application.  This is
less cryptic, and we'll be able to check for the close directly during
desugaring.

DEV-13708

											
										
										
											2023-03-23 22:08:21 -04:00
+								            TplParam => write!(f, "template param (metavariable)"),
 								            TplParamShort(name, val) => write!(
-												tamer: nir::NirEntity::TplParam: Optional name/value pair

This will be used for shorthand desugaring.

DEV-13708

											
										
										
											2023-03-21 15:26:17 -04:00
+								                f,
 								                "shorthand template param key {} with value {}",
 								                TtQuote::wrap(name),
 								                TtQuote::wrap(val),
 								            ),
-												tamer: nir: Apply*Short variants

This adds explicit variants for shorthand template application.  This is
less cryptic, and we'll be able to check for the close directly during
desugaring.

DEV-13708

											
										
										
											2023-03-23 22:08:21 -04:00
+								            TplApply => {
-												tamer: Very basic support for template application NIR -> xmli

This this a big change that's difficult to break up, and I don't have the
energy after it.

This introduces nullary template application, short- and long-form.  Note
that a body of the short form is a `@values@` argument, so that's not
supported yet.

This continues to formalize the idea of what "template application" and
"template expansion" mean in TAMER.  It makes a separate `TplApply`
unnecessary, because now application is simply a reference to a
template.  Expansion and application are one and the same: when a template
expands, it'll re-bind metavariables to the parent context.  So in a
template context, this amounts to application.

But applying a closed template will have nothing to bind, and so is
equivalent to expansion.  And since `Meta` objects are not valid outside of
a `Tpl` context, applying a non-closed template outside of another template
will be invalid.

So we get all of this with a single primitive (getting the "value" of a
template).

The expansion is conceptually like `,@` in Lisp, where we're splicing trees.

It's a mess in some spots, but I want to get this committed before I do a
little bit of cleanup.

											
										
										
											2023-03-17 10:25:56 -04:00
+								                write!(f, "full template application and expansion")
 								            }
-												tamer: nir: Apply*Short variants

This adds explicit variants for shorthand template application.  This is
less cryptic, and we'll be able to check for the close directly during
desugaring.

DEV-13708

											
										
										
											2023-03-23 22:08:21 -04:00
+								            TplApplyShort(qname) => write!(
-												tamer: Very basic support for template application NIR -> xmli

This this a big change that's difficult to break up, and I don't have the
energy after it.

This introduces nullary template application, short- and long-form.  Note
that a body of the short form is a `@values@` argument, so that's not
supported yet.

This continues to formalize the idea of what "template application" and
"template expansion" mean in TAMER.  It makes a separate `TplApply`
unnecessary, because now application is simply a reference to a
template.  Expansion and application are one and the same: when a template
expands, it'll re-bind metavariables to the parent context.  So in a
template context, this amounts to application.

But applying a closed template will have nothing to bind, and so is
equivalent to expansion.  And since `Meta` objects are not valid outside of
a `Tpl` context, applying a non-closed template outside of another template
will be invalid.

So we get all of this with a single primitive (getting the "value" of a
template).

The expansion is conceptually like `,@` in Lisp, where we're splicing trees.

It's a mess in some spots, but I want to get this committed before I do a
little bit of cleanup.

											
										
										
											2023-03-17 10:25:56 -04:00
+								                f,
-												tamer: nir: Apply*Short variants

This adds explicit variants for shorthand template application.  This is
less cryptic, and we'll be able to check for the close directly during
desugaring.

DEV-13708

											
										
										
											2023-03-23 22:08:21 -04:00
+								                "full shorthand template application and expansion of {}",
-												tamer: Very basic support for template application NIR -> xmli

This this a big change that's difficult to break up, and I don't have the
energy after it.

This introduces nullary template application, short- and long-form.  Note
that a body of the short form is a `@values@` argument, so that's not
supported yet.

This continues to formalize the idea of what "template application" and
"template expansion" mean in TAMER.  It makes a separate `TplApply`
unnecessary, because now application is simply a reference to a
template.  Expansion and application are one and the same: when a template
expands, it'll re-bind metavariables to the parent context.  So in a
template context, this amounts to application.

But applying a closed template will have nothing to bind, and so is
equivalent to expansion.  And since `Meta` objects are not valid outside of
a `Tpl` context, applying a non-closed template outside of another template
will be invalid.

So we get all of this with a single primitive (getting the "value" of a
template).

The expansion is conceptually like `,@` in Lisp, where we're splicing trees.

It's a mess in some spots, but I want to get this committed before I do a
little bit of cleanup.

											
										
										
											2023-03-17 10:25:56 -04:00
+								                TtQuote::wrap(qname.local_name())
 								            ),
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
+								        }
 								    }
 								}
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								impl Token for Nir {
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								    fn ir_name() -> &'static str {
-												tamer: Introduce desugaring operation for shorthand template application

This moves translation from NirToAir into TplShortDesugar, and changes the
output from AIR to NIR.

This is going to be much easier to reason about as a desugaring
operation (and indeed that's always how TAME has implemented it, in XSLT);
this keeps the complexity isolated.

Ideally, NirToAir wouldn't even accept tokens that it can't handle, but
that's going to take quite a bit more work and I don't have the time right
now.  Instead, we'll fail at runtime with some hopefully-useful
information.  It shouldn't actually happen in practice.

DEV-13708

											
										
										
											2023-03-21 14:38:07 -04:00
+								        "NIR"
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								    }
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								    /// Identifying span of a token.
 								    ///
 								    /// An _identifying span_ is a selection of one of the (potentially
 								    ///   many) spans associated with a token that is most likely to be
 								    ///   associated with the identity of that token.
-												tamer: nir: Sugared and plain flavors

This introduces the concept of sugared NIR and provides the boilerplate for
a desugaring pass.  The earlier commits dealing with cleaning up the
lowering pipeline were to support this work, in particular to ensure that
reporting and recovery properly applied to this lowering operation without
adding a ton more boilerplate.

DEV-13158

											
										
										
											2022-10-19 10:00:08 -04:00
+								    fn span(&self) -> Span {
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								        use Nir::*;
-												tamer: nir: Remove token `todo!`s

Just preparing to actually define NIR itself.  The _grammar_ has been
represented (derived from our internal systems, using them as a test case),
but the IR itself has not yet received a definition.

DEV-7145

											
										
										
											2022-09-19 16:21:41 -04:00
 								        match self {
 								            Todo => UNKNOWN_SPAN,
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
+								            TodoAttr(spair) => spair.span(),
 								            Open(_, span) => *span,
-												tamer: nir, asg: Introduce package to ASG

This does not yet create edges from identifiers to the package; just getting
this introduced was quite a bit of work, so I want to get this committed.

Note that this also includes a change to NIR so that `Close` contains the
entity so that we can pattern-match for AIR transformations rather than
retaining yet another stack with checks that are already going to be done by
AIR.  This makes NIR stand less on its own from a self-validation point, but
that's okay, given that it's the language that the user entered and,
conceptually, they could enter invalid NIR the same as they enter invalid
XML (e.g. from a REPL).

In _practice_, of course, NIR is lowered from XML and the schema is enforced
during that lowering and so the validation does exist as part of that
parsing.

These concessions speak more to the verbosity of the language (Rust) than
anything.

DEV-13159

											
										
										
											2023-01-30 16:51:24 -05:00
+								            Close(_, span) => *span,
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
 								            BindIdent(spair) | Ref(spair) | Desc(spair) | Text(spair) => {
 								                spair.span()
 								            }
-												tamer: nir: Remove token `todo!`s

Just preparing to actually define NIR itself.  The _grammar_ has been
represented (derived from our internal systems, using them as a test case),
but the IR itself has not yet received a definition.

DEV-7145

											
										
										
											2022-09-19 16:21:41 -04:00
+								        }
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								    }
 								}
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								impl Object for Nir {}
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								impl Display for Nir {
-												tamer: nir: Remove token `todo!`s

Just preparing to actually define NIR itself.  The _grammar_ has been
represented (derived from our internal systems, using them as a test case),
but the IR itself has not yet received a definition.

DEV-7145

											
										
										
											2022-09-19 16:21:41 -04:00
+								    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								        use Nir::*;
-												tamer: nir: Remove token `todo!`s

Just preparing to actually define NIR itself.  The _grammar_ has been
represented (derived from our internal systems, using them as a test case),
but the IR itself has not yet received a definition.

DEV-7145

											
										
										
											2022-09-19 16:21:41 -04:00
 								        match self {
 								            Todo => write!(f, "TODO"),
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
+								            TodoAttr(spair) => write!(f, "TODO Attr {spair}"),
 								            Open(entity, _) => write!(f, "open {entity} entity"),
-												tamer: nir, asg: Introduce package to ASG

This does not yet create edges from identifiers to the package; just getting
this introduced was quite a bit of work, so I want to get this committed.

Note that this also includes a change to NIR so that `Close` contains the
entity so that we can pattern-match for AIR transformations rather than
retaining yet another stack with checks that are already going to be done by
AIR.  This makes NIR stand less on its own from a self-validation point, but
that's okay, given that it's the language that the user entered and,
conceptually, they could enter invalid NIR the same as they enter invalid
XML (e.g. from a REPL).

In _practice_, of course, NIR is lowered from XML and the schema is enforced
during that lowering and so the validation does exist as part of that
parsing.

These concessions speak more to the verbosity of the language (Rust) than
anything.

DEV-13159

											
										
										
											2023-01-30 16:51:24 -05:00
+								            Close(entity, _) => write!(f, "close {entity} entity"),
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
+								            BindIdent(spair) => {
 								                write!(f, "bind identifier {}", TtQuote::wrap(spair))
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								            }
-												tamer: nir::interp: Expand into new NIR tokens

This begins to introduce the new, simplified NIR by creating tokens that
serve as the expansion for interpolation.  Admittedly, `Text` may change, as
it doesn't really represent `<text>foo</text>`, and I'd rather that node
change as well, though I'll probably want to maintain some sort of BC.

DEV-13156

											
										
										
											2022-12-02 00:15:31 -05:00
+								            Ref(spair) => write!(f, "ref {}", TtQuote::wrap(spair)),
 								            // TODO: TtQuote doesn't yet escape quotes at the time of writing!
 								            Desc(spair) => write!(f, "description {}", TtQuote::wrap(spair)),
 								            // TODO: Not yet safe to output arbitrary text;
 								            //   need to determine how to handle newlines and other types of
 								            //   output.
 								            Text(_) => write!(f, "text"),
-												tamer: nir: Remove token `todo!`s

Just preparing to actually define NIR itself.  The _grammar_ has been
represented (derived from our internal systems, using them as a test case),
but the IR itself has not yet received a definition.

DEV-7145

											
										
										
											2022-09-19 16:21:41 -04:00
+								        }
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
+								    }
 								}
-												tamer: Integrate clippy

This invokes clippy as part of `make check` now, which I had previously
avoided doing (I'll elaborate on that below).

This commit represents the changes needed to resolve all the warnings
presented by clippy.  Many changes have been made where I find the lints to
be useful and agreeable, but there are a number of lints, rationalized in
`src/lib.rs`, where I found the lints to be disagreeable.  I have provided
rationale, primarily for those wondering why I desire to deviate from the
default lints, though it does feel backward to rationalize why certain lints
ought to be applied (the reverse should be true).

With that said, this did catch some legitimage issues, and it was also
helpful in getting some older code up-to-date with new language additions
that perhaps I used in new code but hadn't gone back and updated old code
for.  My goal was to get clippy working without errors so that, in the
future, when others get into TAMER and are still getting used to Rust,
clippy is able to help guide them in the right direction.

One of the reasons I went without clippy for so long (though I admittedly
forgot I wasn't using it for a period of time) was because there were a
number of suggestions that I found disagreeable, and I didn't take the time
to go through them and determine what I wanted to follow.  Furthermore, it
was hard to make that judgment when I was new to the language and lacked
the necessary experience to do so.

One thing I would like to comment further on is the use of `format!` with
`expect`, which is also what the diagnostic system convenience methods
do (which clippy does not cover).  Because of all the work I've done trying
to understand Rust and looking at disassemblies and seeing what it
optimizes, I falsely assumed that Rust would convert such things into
conditionals in my otherwise-pure code...but apparently that's not the case,
when `format!` is involved.

I noticed that, after making the suggested fix with `get_ident`, Rust
proceeded to then inline it into each call site and then apply further
optimizations.  It was also previously invoking the thread lock (for the
interner) unconditionally and invoking the `Display` implementation.  That
is not at all what I intended for, despite knowing the eager semantics of
function calls in Rust.

Anyway, possibly more to come on that, I'm just tired of typing and need to
move on.  I'll be returning to investigate further diagnostic messages soon.

											
										
										
											2023-01-12 10:46:48 -05:00
+								impl<E> From<Nir> for Result<Nir, E> {
 								    fn from(val: Nir) -> Self {
 								        Ok(val)
-												tamer: xir::parse::ele::ele_parse!: Integrate `attr_parse_stream!`

This handles the bulk of the integration of the new `attr_parse_stream!` as
a replacement for `attr_parse!`, which moves from aggregate attribute
objects to a stream of attribute-derived tokens.  Rationale for this change
is in the preceding commit messages.

The first striking change here is how it affects the test cases: nearly all
`Incomplete`s are removed.  Note that the parser has an existing
optimization whereby `Incomplete` with lookahead causes immediate recursion
within `Parser`, since those situations are used only for control flow and
to keep recursion out of `ParseState`s.

Next: this removes types from `nir::parse`'s grammar for attributes.  The
types will instead be derived from NIR tokens later in the lowering
pipeline.  This simplifies NIR considerably, since adding types into the mix
at this point was taking an already really complex lowering phase and making
it ever more difficult to reason about and get everything working together
the way that I needed.

Because of `attr_parse_stream!`, there are no more required attribute
checks.  Those will be handled later in the lowering pipeline, if they're
actually needed in context, with possibly one exception: namespace
declarations.  Those are really part of the document and they ought to be
handled _earlier_ in the pipeline; I'll do that at some point.  It's not
required for compilation; it's just required to maintain compliance with the
XML spec.

We also lose checks for duplicate attributes.  This is also something that
ought to be handled at the document level, and so earlier in the pipeline,
since XML cares, not us---if we get a duplicate attribute that results in an
extra NIR token, then the next parser will error out, since it has to check
for those things anyway.

A bunch of cleanup and simplification is still needed; I want to get the
initial integration committed first.  It's a shame I'm getting rid of so
much work, but this is the right approach, and results in a much simpler
system.

DEV-13346

											
										
										
											2022-11-30 23:52:18 -05:00
+								    }
 								}
-												tamer: nir: Detect interpolated values

This simply detects whether a value will need to be further parsed for
interpolation; it does not yet perform the parsing itself, which will happen
during desugaring.

This introduces a performance regression, for an interesting reason.  I
found that introducing a single new variant to `SugaredNir` (with a
`(SymbolId, Span)` pair), was causing the width of the `NirParseState` type
to increase just enough to cause Rust to be unable to optimize away a
significant number of memcpys related to `Parser` moves, and consequently
reducing performance by nearly 50% for `tamec`.  Yikes.

I suspected this would be a problem, and indeed have tried in all other
cases to avoid aggregation until the ASG---the problem is that I had wanted
to aggregate attributes for NIR so that the IR could actually make some
progress toward simplifying the stream (and therefore working with the
data), and be able to validate against a grammar defined in a single
place.  The problem is that the `NirParseState` type contains a sum type for
every attribute parser, and is therefore as wide as the largest one.  That
is what Rust is having trouble optimizing memcpy away for.

Indeed, reducing the number of attributes improves the situation
drastically.  However, it doesn't make it go away entirely.

If you look at a callgrind profile for `tameld` (or a dissassembly), you'll
notice that I put quite a bit of effort into ensuring that the hot code path
for the lowering pipeline contains _no_ memcpys for the parsers.  But that
is not the case with `tamec`---I had to move on.  But I do still have the
same escape hatch that I introduced for `tameld`, which is the mutable
`Context`.

It seems that may be the solution there too, but I want to get a bit further
along first to see how these data end up propagating before I go through
that somewhat significant effort.

DEV-13156

											
										
										
											2022-11-01 14:30:34 -04:00
+								/// Tag representing the type of a NIR value.
 								///
 								/// NIR values originate from attributes,
 								///   which are refined into types as enough information becomes available.
 								/// Value parsing must be deferred if a value requires desugaring or
 								///   metavalue expansion.
 								#[derive(Debug, PartialEq, Eq)]
 								#[repr(u8)]
 								pub enum NirSymbolTy {
 								    AnyIdent,
 								    BooleanLiteral,
 								    ClassIdent,
 								    ClassIdentList,
 								    ConstIdent,
 								    DescLiteral,
 								    Dim,
 								    DynNodeLiteral,
 								    FuncIdent,
 								    IdentDtype,
 								    IdentType,
 								    MapTransformLiteral,
 								    NumLiteral,
 								    ParamDefault,
 								    ParamIdent,
 								    ParamName,
 								    ParamType,
 								    PkgPath,
 								    ShortDimNumLiteral,
 								    StringLiteral,
 								    SymbolTableKey,
 								    TexMathLiteral,
 								    Title,
 								    TplMetaIdent,
-												tamer: nir::NirSymbolTy (Display): Add impl

Add initial descriptions and consolodate some of the types.  There'll be
more to come; this is just to get `Display` derives working for types
that'll be using it.  I'd like to see where this description manifests
itself before I decide how user-friendly I'd like it to be.

DEV-13156

											
										
										
											2022-11-01 16:23:51 -04:00
+								    TplIdent,
-												tamer: nir: Detect interpolated values

This simply detects whether a value will need to be further parsed for
interpolation; it does not yet perform the parsing itself, which will happen
during desugaring.

This introduces a performance regression, for an interesting reason.  I
found that introducing a single new variant to `SugaredNir` (with a
`(SymbolId, Span)` pair), was causing the width of the `NirParseState` type
to increase just enough to cause Rust to be unable to optimize away a
significant number of memcpys related to `Parser` moves, and consequently
reducing performance by nearly 50% for `tamec`.  Yikes.

I suspected this would be a problem, and indeed have tried in all other
cases to avoid aggregation until the ASG---the problem is that I had wanted
to aggregate attributes for NIR so that the IR could actually make some
progress toward simplifying the stream (and therefore working with the
data), and be able to validate against a grammar defined in a single
place.  The problem is that the `NirParseState` type contains a sum type for
every attribute parser, and is therefore as wide as the largest one.  That
is what Rust is having trouble optimizing memcpy away for.

Indeed, reducing the number of attributes improves the situation
drastically.  However, it doesn't make it go away entirely.

If you look at a callgrind profile for `tameld` (or a dissassembly), you'll
notice that I put quite a bit of effort into ensuring that the hot code path
for the lowering pipeline contains _no_ memcpys for the parsers.  But that
is not the case with `tamec`---I had to move on.  But I do still have the
same escape hatch that I introduced for `tameld`, which is the mutable
`Context`.

It seems that may be the solution there too, but I want to get a bit further
along first to see how these data end up propagating before I go through
that somewhat significant effort.

DEV-13156

											
										
										
											2022-11-01 14:30:34 -04:00
+								    TplParamIdent,
 								    TypeIdent,
 								    ValueIdent,
 								}
-												tamer: nir::NirSymbolTy (Display): Add impl

Add initial descriptions and consolodate some of the types.  There'll be
more to come; this is just to get `Display` derives working for types
that'll be using it.  I'd like to see where this description manifests
itself before I decide how user-friendly I'd like it to be.

DEV-13156

											
										
										
											2022-11-01 16:23:51 -04:00
+								impl Display for NirSymbolTy {
 								    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 								        use NirSymbolTy::*;
 								        match self {
 								            AnyIdent => write!(f, "any identifier"),
 								            BooleanLiteral => write!(
 								                f,
 								                "boolean literal {fmt_true} or {fmt_false}",
 								                fmt_true = TtQuote::wrap("true"),
 								                fmt_false = TtQuote::wrap("false"),
 								            ),
 								            ClassIdent => write!(f, "classification identifier"),
 								            ClassIdentList => {
 								                write!(f, "space-delimited list of classification identifiers")
 								            }
 								            ConstIdent => write!(f, "constant identifier"),
 								            DescLiteral => write!(f, "description literal"),
 								            Dim => write!(f, "dimension declaration"),
 								            DynNodeLiteral => write!(f, "dynamic node literal"),
 								            FuncIdent => write!(f, "function identifier"),
 								            IdentDtype => write!(f, "identifier primitive datatype"),
 								            IdentType => write!(f, "identifier type"),
 								            MapTransformLiteral => write!(f, "map transformation literal"),
 								            NumLiteral => write!(f, "numeric literal"),
 								            ParamDefault => write!(f, "param default"),
 								            ParamIdent => write!(f, "param identifier"),
 								            ParamName => write!(f, "param name"),
 								            ParamType => write!(f, "param type"),
 								            PkgPath => write!(f, "package path"),
 								            ShortDimNumLiteral => {
 								                write!(f, "short-hand dimensionalized numeric literal")
 								            }
 								            StringLiteral => write!(f, "string literal"),
 								            SymbolTableKey => write!(f, "symbol table key name"),
 								            TexMathLiteral => write!(f, "TeX math literal"),
 								            Title => write!(f, "title"),
 								            TplMetaIdent => write!(f, "template metadata identifier"),
 								            TplIdent => write!(f, "template name"),
 								            TplParamIdent => write!(f, "template param identifier"),
 								            TypeIdent => write!(f, "type identifier"),
 								            ValueIdent => write!(f, "value identifier"),
 								        }
 								    }
 								}
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								/// A ([`SymbolId`], [`Span`]) pair representing an
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								///   attribute value that may need to be interpreted within the context of
 								///   a template application.
-												tamer: nir: Detect interpolated values

This simply detects whether a value will need to be further parsed for
interpolation; it does not yet perform the parsing itself, which will happen
during desugaring.

This introduces a performance regression, for an interesting reason.  I
found that introducing a single new variant to `SugaredNir` (with a
`(SymbolId, Span)` pair), was causing the width of the `NirParseState` type
to increase just enough to cause Rust to be unable to optimize away a
significant number of memcpys related to `Parser` moves, and consequently
reducing performance by nearly 50% for `tamec`.  Yikes.

I suspected this would be a problem, and indeed have tried in all other
cases to avoid aggregation until the ASG---the problem is that I had wanted
to aggregate attributes for NIR so that the IR could actually make some
progress toward simplifying the stream (and therefore working with the
data), and be able to validate against a grammar defined in a single
place.  The problem is that the `NirParseState` type contains a sum type for
every attribute parser, and is therefore as wide as the largest one.  That
is what Rust is having trouble optimizing memcpy away for.

Indeed, reducing the number of attributes improves the situation
drastically.  However, it doesn't make it go away entirely.

If you look at a callgrind profile for `tameld` (or a dissassembly), you'll
notice that I put quite a bit of effort into ensuring that the hot code path
for the lowering pipeline contains _no_ memcpys for the parsers.  But that
is not the case with `tamec`---I had to move on.  But I do still have the
same escape hatch that I introduced for `tameld`, which is the mutable
`Context`.

It seems that may be the solution there too, but I want to get a bit further
along first to see how these data end up propagating before I go through
that somewhat significant effort.

DEV-13156

											
										
										
											2022-11-01 14:30:34 -04:00
+								///
 								/// _This object must be kept small_,
 								///   since it is used in objects that aggregate portions of the token
 								///   stream,
 								///     which must persist in memory for a short period of time,
 								///     and therefore cannot be optimized away as other portions of the IR.
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								#[derive(Debug, PartialEq, Eq)]
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								pub struct NirSymbol<const TY: NirSymbolTy>(SymbolId, Span);
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								impl<const TY: NirSymbolTy> Token for NirSymbol<TY> {
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								    fn ir_name() -> &'static str {
 								        // TODO: Include type?
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								        "NIR Symbol"
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								    }
 								    fn span(&self) -> Span {
 								        match self {
 								            Self(_, span) => *span,
 								        }
 								    }
 								}
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								impl<const TY: NirSymbolTy> Display for NirSymbol<TY> {
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 								        match self {
-												tamer: xir::parse::attrstream: Streaming attribute parser

As I talked about in the previous commit, this is going to be the
replacement for the aggreagte `attr_parse!`; the next commit will integrate
it into `ele_parse!` so that I can begin to remove the old one.

It is disappointing, since I did put a bit of work into this and I think the
end result was pretty neat, even if was never fully utilized.  But, this
simplifies things significantly; no use in maintaining features that serve
no purpose but to confound people.

DEV-13346

											
										
										
											2022-11-29 11:42:19 -05:00
+								            Self(sym, _span) => {
 								                write!(f, "{TY} {fmt_sym}", fmt_sym = TtQuote::wrap(sym),)
 								            }
-												tamer: nir::desugar: Initial interpolation desugaring

This demonstrates how desugaring of interpolated strings will work, testing
one of the happy paths.  The remaining work to be done is largely
refactoring; handling some other cases; and errors.  Each of those items are
marked with `todo!`s.

I'm pleased with how this is turning out, and I'm excited to see diagnostic
reporting within the specification string using the derived spans once I get
a bit further along; this robust system is going to be much more helpful to
developers than the existing system in XSLT.

This also eliminates the ~50% performance degredation mentioned in a recent
commit by eliminating the SugaredNirSymbol enum and replacing it with a
newtype; this is a much better approach, though it doesn't change that I do
need to eventually address the excessive `memcpy`s on hot code paths.

DEV-13156

											
										
										
											2022-11-05 00:08:50 -04:00
+								        }
 								    }
 								}
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								impl<const TY: NirSymbolTy> From<(SymbolId, Span)> for NirSymbol<TY> {
 								    fn from((val, span): (SymbolId, Span)) -> Self {
 								        Self(val, span)
 								    }
 								}
 								impl<const TY: NirSymbolTy> From<Attr> for NirSymbol<TY> {
 								    fn from(attr: Attr) -> Self {
 								        match attr {
 								            Attr(_, val, AttrSpan(_, vspan)) => (val, vspan).into(),
 								        }
 								    }
 								}
-												tamer: nir: Detect interpolated values

This simply detects whether a value will need to be further parsed for
interpolation; it does not yet perform the parsing itself, which will happen
during desugaring.

This introduces a performance regression, for an interesting reason.  I
found that introducing a single new variant to `SugaredNir` (with a
`(SymbolId, Span)` pair), was causing the width of the `NirParseState` type
to increase just enough to cause Rust to be unable to optimize away a
significant number of memcpys related to `Parser` moves, and consequently
reducing performance by nearly 50% for `tamec`.  Yikes.

I suspected this would be a problem, and indeed have tried in all other
cases to avoid aggregation until the ASG---the problem is that I had wanted
to aggregate attributes for NIR so that the IR could actually make some
progress toward simplifying the stream (and therefore working with the
data), and be able to validate against a grammar defined in a single
place.  The problem is that the `NirParseState` type contains a sum type for
every attribute parser, and is therefore as wide as the largest one.  That
is what Rust is having trouble optimizing memcpy away for.

Indeed, reducing the number of attributes improves the situation
drastically.  However, it doesn't make it go away entirely.

If you look at a callgrind profile for `tameld` (or a dissassembly), you'll
notice that I put quite a bit of effort into ensuring that the hot code path
for the lowering pipeline contains _no_ memcpys for the parsers.  But that
is not the case with `tamec`---I had to move on.  But I do still have the
same escape hatch that I introduced for `tameld`, which is the mutable
`Context`.

It seems that may be the solution there too, but I want to get a bit further
along first to see how these data end up propagating before I go through
that somewhat significant effort.

DEV-13156

											
										
										
											2022-11-01 14:30:34 -04:00
+								// Force developer to be conscious of any changes in size;
-												tamer: NIR re-simplification

Alright, this has been a rather tortured experience.  The previous commit
began to state what is going on.

This is reversing a lot of prior work, with the benefit of
hindsight.  Little bit of history, for the people who will probably never
read this, but who knows:

As noted at the top of NIR, I've long wanted a very simple set of general
primitives where all desugaring is done by the template system---TAME is a
metalanguage after all.  Therefore, I never intended on having any explicit
desugaring operations.

But I didn't have time to augment the template system to support parsing on
attribute strings (nor am I sure if I want to do such a thing), so it became
clear that interpolation would be a pass in the compiler.  Which led me to
the idea of a desugaring pass.

That in turn spiraled into representing the status of whether NIR was
desugared, and separating primitives, etc, which lead to a lot of additional
complexity.  The idea was to have a Sugared and Plan NIR, and further within
them have symbols that have latent types---if they require interpolation,
then those types would be deferred until after template expansion.

The obvious problem there is that now:

  1. NIR has the complexity of various types; and
  2. Types were tightly coupled with NIR and how it was defined in terms of
     XML destructuring.

The first attempt at this didn't go well: it was clear that the symbol types
would make mapping from Sugared to Plain NIR very complicated.  Further,
since NIR had any number of symbols per Sugared NIR token, interpolation was
a pain in the ass.

So that lead to the idea of interpolating at the _attribute_ level.  That
seemed to be going well at first, until I realized that the token stream of
the attribute parser does not match that of the element parser, and so that
general solution fell apart.  It wouldn't have been great anyway, since then
interpolation was _also_ coupled to the destructuring of the document.

Another goal of mine has been to decouple TAME from XML.  Not because I want
to move away from XML (if I did, I'd want S-expressions, not YAML, but I
don't think the team would go for that).  This decoupling would allow the
use of a subset of the syntax of TAME in other places, like CSVMs and YAML
test cases, for example, if appropriate.

This approach makes sense: the grammar of TAME isn't XML, it's _embedded
within_ XML.  The XML layer has to be stripped to expose it.

And so that's what NIR is now evolving into---the stripped, bare
repsentation of TAME's language.  That also has other benefits too down the
line, like a REPL where you can use any number of syntaxes.  I intend for
NIR to be stack-based, which I'd find to be intuitive for manipulating and
querying packages, but it could have any number of grammars, including
Prolog-like for expressing Horn clauses and querying with a
Prolog/Datalog-like syntax.  But that's for the future...

The next issue is that of attribute types.  If we have a better language for
NIR, then the types can be associated with the NIR tokens, rather than
having to associate each symbol with raw type data, which doesn't make a
whole lot of sense.  That also allows for AIR to better infer types and
determine what they ought to be, and further makes checking types after
template application natural, since it's not part of NIR at all.  It also
means the template system can naturally apply to any sources.

Now, if we take that final step further, and make attributes streaming
instead of aggregating, we're back to a streaming pipeline where all
aggregation takes place on the ASG (which also resolves the memcpy concerns
worked around previously, also further simplifying `ele_parse` again, though
it sucks that I wasted that time).  And, without the symbol types getting
in the way, since now NIR has types more fundamentally associated with
tokens, we're able to interpolate on a token stream using simple SPairs,
like I always hoped (and reverted back to in the previous commit).

Oh, and what about that desugaring pass?  There's the issue of how to
represent such a thing in the type system---ideally we'd know statically
that desugaring always lowers into a more primitive NIR that reduces the
mapping that needs to be done to AIR.  But that adds complexity, as
mentioned above.  The alternative is to just use the templat system, as I
originally wanted to, and resolve shortcomings by augmenting the template
system to be able to handle it.  That not only keeps NIR and the compiler
much simpler, but exposes more powerful tools to developers via TAME's
metalanguage, if such a thing is appropriate.

Anyway, this creates a system that's far more intuitive, and far
simpler.  It does kick the can to AIR, but that's okay, since it's also
better positioned to deal with it.

Everything I wrote above is a thought dump and has not been proof-read, so
good luck!  And lets hope this finally works out...it's actually feeling
good this time.  The journey was necessary to discover and justify what came
out of it---everything I'm stripping away was like a cocoon, and within it
is a more beautiful and more elegant TAME.

DEV-13346

											
										
										
											2022-11-28 13:35:10 -05:00
+								//   see `NirSymbol` docs for more information.
 								assert_eq_size!(NirSymbol<{ NirSymbolTy::AnyIdent }>, (SymbolId, Span));
-												tamer: Introduce NIR (accepting only)

This introduces NIR, but only as an accepting grammar; it doesn't yet emit
the NIR IR, beyond TODOs.

This modifies `tamec` to, while copying XIR, also attempt to lower NIR to
produce parser errors, if any.  It does not yet fail compilation, as I just
want to be cautious and observe that everything's working properly for a
little while as people use it, before I potentially break builds.

This is the culmination of months of supporting effort.  The NIR grammar is
derived from our existing TAME sources internally, which I use for now as a
test case until I introduce test cases directly into TAMER later on (I'd do
it now, if I hadn't spent so much time on this; I'll start introducing tests
as I begin emitting NIR tokens).  This is capable of fully parsing our
largest system with >900 packages, as well as `core`.

`tamec`'s lowering is a mess; that'll be cleaned up in future commits.  The
same can be said about `tameld`.

NIR's grammar has some initial documentation, but this will improve over
time as well.

The generated docs still need some improvement, too, especially with
generated identifiers; I just want to get this out here for testing.

DEV-7145

											
										
										
											2022-08-29 15:28:03 -04:00
 								#[derive(Debug, PartialEq, Eq)]
 								pub enum PkgType {
 								    /// Package is intended to produce an executable program.
 								    ///
 								    /// This is specified by the `rater` root node.
 								    Prog,
 								    /// Package is intended to be imported as a component of a larger
 								    ///   program.
 								    Mod,
 								}
 								#[derive(Debug, PartialEq, Eq)]
 								pub struct Literal<const S: SymbolId>;
 								impl<const S: SymbolId> TryFrom<Attr> for Literal<S> {
 								    type Error = NirAttrParseError;
 								    fn try_from(attr: Attr) -> Result<Self, Self::Error> {
 								        match attr {
 								            Attr(_, val, _) if val == S => Ok(Literal),
 								            Attr(name, _, aspan) => Err(NirAttrParseError::LiteralMismatch(
 								                name,
 								                aspan.value_span(),
 								                S,
 								            )),
 								        }
 								    }
 								}
 								impl From<Infallible> for NirAttrParseError {
 								    fn from(x: Infallible) -> Self {
 								        match x {}
 								    }
 								}
 								type ExpectedSymbolId = SymbolId;
 								#[derive(Debug, PartialEq, Eq)]
 								pub enum NirAttrParseError {
 								    LiteralMismatch(QName, Span, ExpectedSymbolId),
 								}
 								impl Error for NirAttrParseError {
 								    fn source(&self) -> Option<&(dyn Error + 'static)> {
 								        None
 								    }
 								}
 								impl Display for NirAttrParseError {
 								    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 								        match self {
 								            Self::LiteralMismatch(name, _, _) => {
 								                write!(f, "unexpected value for {}", TtXmlAttr::wrap(name),)
 								            }
 								        }
 								    }
 								}
 								impl Diagnostic for NirAttrParseError {
 								    fn describe(&self) -> Vec<crate::diagnose::AnnotatedSpan> {
 								        match self {
 								            Self::LiteralMismatch(_, span, expected) => span
 								                .error(format!("expecting {}", TtQuote::wrap(expected)))
 								                .into(),
 								        }
 								    }
 								}