% The TAME Programming Language Classification System
%
%  Copyright (C) 2021 Ryan Specialty Group, LLC.
%
%  Licensed under the Creative Commons Attribution-ShareAlike 4.0
%  International License.
%%

\section{Classification System}\seclabel{class}
\index{classification|textbf}
A \dfn{classification} is a user-defined abstraction that describes
  (``classifies'') arbitrary data.
Classifications can be used as predicates, generating functions, and can be
  composed into more complex classifications.
Nearly all conditions in \tame{} are specified using classifications.

\index{first-order logic!sentence}
\index{classification!coupling}
All classifications represent \dfn{first-order sentences}---%
  that is,
    they contain no \dfn{free variables}.
Intuitively,
  this means that all variables within a~classification are
  \dfn{tightly coupled} to the classification itself.
This limitation is mitigated through use of the template system.

\begin{axiom}[Classification Introduction]\axmlabel{class-intro}
\indexsym\Classify{classification}
\indexsym\gamma{classification, yield}
\index{classification!index set}
\index{index set!classification}
\index{classification!classify@\xmlnode{classify}}
\index{classification!as@\xmlattr{as}}
\index{classification!yields@\xmlattr{yields}}
\todo{Symbol in place of $=$ here ($\equiv$ not appropriate).}
\begin{alignat}{3}
    &\xml{<classify as="$c$" }&&\xml{yields="$\gamma$" desc}&&\xml{="$\_$"
          $\alpha$>}\label{eq:xml-classify} \\
    &\quad \MFam{M^0}jJkK   &&\VFam{v^0}jJ   &&\quad s^0    \nonumber\\[-4mm]
    &\quad \quad\vdots      &&\quad\vdots    &&\quad \vdots \nonumber\\
    &\quad \MFam{M^l}jJkK   &&\VFam{v^m}jJ   &&\quad s^n    \nonumber\\[-3mm]
    &\xml{</classify>}
      % NB: This -50mu needs adjustment if you change the alignment above!
      &&\mspace{-50mu}= \Classify^c_\gamma\left(\odot,M,v,s\right), \nonumber
\end{alignat}

\noindent
where

\indexsym\emptystr{empty string}
\index{empty string (\ensuremath\emptystr)}
\begin{align}
    J &\subset\Int \neq\emptyset, \\
    \forall{j\in J}\Big(K_j &\subset\Int \neq\emptyset\Big), \\
    \forall{k}\Big(M^k &: J \rightarrow K_{j\in J} \rightarrow \Bool\Big),
                                            \label{eq:class-matrix} \\
    \forall{k}\Big(v^k &: J \rightarrow \Bool\Big), \\
    \forall{k}\Big(s^k &\in\Bool\Big), \\
    \alpha &\in\Set{\emptystr,\, \texttt{any="true"}}, \label{eq:xml-any-domain}
\end{align}

\noindent
and the monoid~$\odot$ is defined as

\indexsym\odot{classification, monoid}
\index{classification!any@\xmlattr{any}}
\index{classification!monoid|(}
\begin{equation}\label{eq:classify-rel}
  \odot = \begin{cases}
        \Monoid\Bool\land\true &\alpha = \emptystr,\\
        \Monoid\Bool\lor\false &\alpha = \texttt{any="true"}.
      \end{cases}
\end{equation}
\end{axiom}


% This TODO was the initial motivation for this paper!
\todo{Emphasize index sets, both relationships and nonempty.}
We use a $4$-tuple $\Classify\left(\odot,M,v,s\right)$ to represent a
  $\odot_1$-classification
    (a classification with the binary operation $\land$ or~$\lor$)
  consisting of a combination of matrix~($M$), vector~($v$), and
    scalar~($s$) matches,
      rendered above in columns.\footnote{%
        The symbol~$\odot$ was chosen since the binary operation for a monoid
          is~$\bullet$
            (see \secref{monoids})
          and~$\odot$ looks vaguely like~$(\bullet)$,
            representing a portion of the monoid triple.}
A $\land$-classification is pronounced ``conjunctive classification'',
  and $\lor$ ``disjunctive''.\footnote{%
    \index{classification!terminology history}
    Conjunctive and disjunctive classifications used to be referred to,
      respectively,
      as \dfn{universal} and \dfn{existential},
        referring to fact that
          $\forall\Set{a_0,\ldots,a_n}(a) \equiv a_0\land\ldots\land a_n$,
            and similarly for $\exists$.
    This terminology has changed since all classifications are in fact
      existential over their matches' index sets,
        and so the terminology would otherwise lead to confusion.}

The variables~$c$ and~$\gamma$ are required in~\tame{} but are both optional
  in our notation~$\Classify^c_\gamma$,
    and can be used to identify the two different data representations of
    the classification.\footnote{%
      \xpath{classify/@yields} is optional in the grammar of \tame{},
        but the compiler will generate one for us if one is not provided.
      As such,
        we will for simplicity consider it to be required here.}

$\alpha$~serves as a placeholder for an optional \xml{any="true"},
  with $\emptystr$~representing the empty string in~\eqref{eq:xml-any-domain}.
Note the wildcard variable matching \xmlattr{desc}---%
  its purpose is only to provide documentation.

\begin{corollary}[$\odot$ Commutative Monoid]\corlabel{odot-monoid}
\index{classification!commutativity|(}
  $\odot$ is a commutative monoid in \axmref{class-intro}.
\end{corollary}
\begin{proof}
  By \axmref{class-intro},
    $\odot$ must be a monoid.
  Assume $\alpha=\emptystr$.
  Then,
    $\odot = \Monoid\Bool\land\true$,
      which is proved by \lemref{monoid-land}.
  Next, assume $\alpha=\texttt{any="true"}$.
  Then,
    $\odot = \Monoid\Bool\lor\false$,
      which is proved by \lemref{monoid-land}.
\end{proof}

While \axmref{class-intro} seems to imply an ordering to matches,
  users of the language are free to specify matches in any order
    and the compiler will rearrange matches as it sees fit.
\index{compiler!classification commutativity}
This is due to the commutativity of~$\odot$ as proved by
  \corref{odot-monoid},
    and not only affords great ease of use to users of~\tame{},
      but also great flexibility to compiler writers.
\index{classification!commutativity|)}

For notational convenience,
  we will let

\index{classification!monoid|)}
\begin{align}
  \odot^\land &= \Monoid\Bool\land\true, \\
  \odot^\lor &= \Monoid\Bool\lor\false.
\end{align}


\def\cpredmatseq{{M^0_j}_k \bullet\cdots\bullet {M^l_j}_k}
\def\cpredvecseq{v^0_j\bullet\cdots\bullet v^m_j}
\def\cpredscalarseq{s^0\bullet\cdots\bullet s^n}


\begin{axiom}[Classification-Predicate Equivalence]\axmlabel{class-pred}
\index{classification!as predicate}
  Let $\Classify^c_\gamma\left(\Monoid\Bool\bullet e,M,v,s\right)$ be a
    classification by~\axmref{class-intro}.
  We then have the first-order sentence
  \begin{equation*}
    c \equiv
      {} \Exists{j\in J}{\Exists{k\in K_j}\cpredmatseq\bullet\cpredvecseq}
        \bullet\cpredscalarseq.
  \end{equation*}
\end{axiom}


\begin{axiom}[Classification Yield]\axmlabel{class-yield}
\indexsym\Gamma{classification, yield}
\index{classification!yield (\ensuremath\gamma, \ensuremath\Gamma)}
  Let $\Classify^c_\gamma\left(\Monoid\Bool\bullet e,M,v,s\right)$ be a
    classification by~\axmref{class-intro}.
  Then,

  \begin{align}
    r &= \begin{cases}
           2 &M\neq\emptyset, \\
           1 &M=\emptyset \land v\neq\emptyset, \\
           0 &M\union v = \emptyset,
         \end{cases} \\
    \exists{j\in J}\Big(\exists{k\in K_j}\Big(
      \Gamma^2_{j_k} &= \cpredmatseq\bullet\cpredvecseq\bullet\cpredscalarseq
    \Big)\Big), \\
    %
    \exists{j\in J}\Big(
      \Gamma^1_j &= \cpredvecseq\bullet\cpredscalarseq
    \Big), \\
    %
    \Gamma^0 &= \cpredscalarseq. \\
    %
    \gamma &= \Gamma^r.
  \end{align}
\end{axiom}

\begin{theorem}[Classification Composition]\thmlabel{class-compose}
\index{classification!composition|(}
  Classifications may be composed to create more complex classifications
    using the classification yield~$\gamma$ as in~\axmref{class-yield}.
  This interpretation is equivalent to \axmref{class-pred} by
  \begin{equation}
    c \equiv \Exists{j\in J}{
               \Exists{k\in K_j}{\Gamma^2_{j_k}}
               \bullet \Gamma^1_j
             }
             \bullet \Gamma^0.
  \end{equation}
\end{theorem}

\def\eejJ{\equiv \exists{j\in J}\Big(}

\begin{proof}
  Expanding each~$\Gamma$ in \axmref{class-yield},
    we have

  \begin{alignat*}{3}
    c &\eejJ\Exists{k\in K_j}{\Gamma^2_{j_k}}
              \bullet \Gamma^1_j
            \Big)
            \bullet \Gamma^0
        &&\text{by \axmref{class-yield}} \\
      %
      &\eejJ\exists{k\in K_j}\Big(
              \cpredmatseq \bullet \cpredvecseq \bullet \cpredscalarseq
            \Big) \\
      &\hphantom{\eejJ}\;\cpredvecseq \bullet \cpredscalarseq \Big)
                \bullet \cpredscalarseq, \\
      %
      &\eejJ\exists{k\in K_j}\Big(\cpredmatseq\Big)
            \bullet \cpredvecseq \bullet \cpredscalarseq \\
      &\hphantom{\eejJ}\;\cpredvecseq \bullet \cpredscalarseq \Big)
                \bullet \cpredscalarseq,
        &&\text{by \dfnref{quant-conn}} \\
      %
      &\eejJ\exists{k\in K_j}\Big(\cpredmatseq\Big)
        &&\text{by \dfnref{prop-taut}} \\
      &\hphantom{\eejJ}\;\cpredvecseq \bullet \cpredscalarseq \Big)
                \bullet \cpredscalarseq, \\
      %
      &\eejJ\exists{k\in K_j}\Big(\cpredmatseq\Big)
        &&\text{by \dfnref{quant-conn}} \\
      &\hphantom{\eejJ}\;\cpredvecseq\Big) \bullet \cpredscalarseq
                \bullet \cpredscalarseq, \\
      %
      &\eejJ\exists{k\in K_j}\Big(\cpredmatseq\Big)
        &&\text{by \dfnref{prop-taut}} \\
      &\hphantom{\eejJ}\;\cpredvecseq\Big)
                \bullet \cpredscalarseq.
        \tag*{\qedhere} \\
  \end{alignat*}
\end{proof}
\index{classification!composition|)}


\begin{lemma}[Classification Predicate Vacuity]\lemlabel{class-pred-vacu}
\index{classification!vacuity|(}
  Let $\Classify^c_\gamma\left(\Monoid\Bool\bullet e,\emptyset,\emptyset,\emptyset\right)$
    be a classification by~\axmref{class-intro}.
  $\odot$ is a monoid by \corref{odot-monoid}.
  Then $c \equiv \gamma \equiv e$.
\end{lemma}
\begin{proof}
  First consider $c$.
  \begin{alignat}{3}
    c &\equiv \Exists{j\in J}{\Exists{k}{e}\bullet e} \bullet e
        \qquad&&\text{by \dfnref{monoid-seq}} \label{p:cri-c} \\
      &\equiv \Exists{j\in J}{e \bullet e} \bullet e
        &&\text{by \dfnref{quant-elim}} \\
      &\equiv \Exists{j\in J}{e} \bullet e
        &&\text{by \ref{eq:monoid-identity}} \\
      &\equiv e \bullet e
        &&\text{by \dfnref{quant-elim}} \\
      &\equiv e.
        &&\text{by \ref{eq:monoid-identity}}
  \end{alignat}

  For $\gamma$,
    we have $r=0$ by \axmref{class-yield},
    and so by similar steps as~$c$,
      $\gamma=\Gamma^r=e$.
  Therefore $c\equiv e$.
\end{proof}


\begin{figure}[h]\label{fig:always-never}
  \begin{alignat*}{3}
    \begin{aligned}
      \xml{<classify }&\xml{as="always" yields="alwaysTrue"} \xmlnl
                      &\xml{desc="Always true" />}
    \end{aligned}
      \quad&=\quad
      \Classify^\texttt{always}_\texttt{alwaysTrue}
        &&\left(\odot^\land,\emptyset,\emptyset,\emptyset\right). \\
    %
    \begin{aligned}
      \xml{<classify }&\xml{as="never" yields="neverTrue"} \xmlnl
                      &\xml{any="true"} \xmlnl
                      &\xml{desc="Never true" />}
    \end{aligned}
      \quad&=\quad
      \Classify^\texttt{never}_\texttt{neverTrue}
        &&\left(\odot^\lor,\emptyset,\emptyset,\emptyset\right).
  \end{alignat*}
  \caption{\tameclass{always} and \tameclass{never} from package
             \tamepkg{core/base}.}
\end{figure}

Figure~\ref{fig:always-never} demonstrates \lemref{class-pred-vacu} in the
  definitions of the classifications \tameclass{always} and
  \tameclass{never}.
These classifications are typically referenced directly for clarity rather
  than creating other vacuous classifications,
    encapsulating \lemref{class-pred-vacu}.
\index{classification!vacuity|)}


\begin{theorem}[Classification Rank Independence]\thmlabel{class-rank-indep}
\index{classification!rank|(}
  Let $\odot=\Monoid\Bool\bullet e$.
  Then,
  \begin{equation}
    \Classify_\gamma\left(\odot,M,v,s\right)
      \equiv \Classify\left(
        \odot,
        \Classify_{\gamma'''}\left(\odot,M,\emptyset,\emptyset\right),
        \Classify_{\gamma''}\left(\odot,\emptyset,v,\emptyset\right),
        \Classify_{\gamma'}\left(\odot,\emptyset,\emptyset,s\right)
      \right).
  \end{equation}
\end{theorem}

\begin{proof}
  First,
    by \axmref{class-yield},
    observe these special cases following from \lemref{class-pred-vacu}:

  \begin{alignat}{3}
    \Gamma'''^2 &= \cpredmatseq, \qquad&&\text{assuming $v\union s=\emptyset$} \\
    \Gamma''^1 &= \cpredvecseq,        &&\text{assuming $M\union s=\emptyset$}\\
    \Gamma'^0 &= \cpredscalarseq.      &&\text{assuming $M\union v=\emptyset$}
  \end{alignat}

  By \thmref{class-compose},
    we must prove

  \begin{align}
    \Exists{j\in J}{
        \Exists{k\in K_j}{\cpredmatseq}
        \bullet \cpredvecseq
      }
      \bullet \cpredscalarseq \nonumber\\
    \equiv c \equiv
    \Exists{j\in J}{
        \Exists{k\in K_j}{\gamma'''_{j_k}}
        \bullet \gamma''_j
      }
      \bullet \gamma'. \label{eq:rank-indep-goal}
  \end{align}

  By \axmref{class-yield},
    we have $r'''=2$, $r''=1$, and $r'=0$,
    and so $\gamma'''=\Gamma'''^2$,
      $\gamma''=\Gamma''^1$,
      and $\gamma'=\Gamma'^0$.
  By substituting these values in~\ref{eq:rank-indep-goal},
    the theorem is proved.
\end{proof}
\index{classification!rank|)}

\begin{corollary}[Classification As Proposition]
\index{classification!as proposition|(}
  Classifications with $M\union v=\emptyset$ or with constant index sets can
    be represented by propositional logic
      (that is---without first-order logic).
\end{corollary}
\begin{proof}
  Assume $M\union v=\emptyset$.
  By \thmref{class-rank-indep},

  \begin{align*}
    c &\equiv \cpredscalarseq,
  \end{align*}

  \noindent
  which is a propositional formula.

  Similarly,
    if we define our index set~$J$ to be constant
      (such that it is known at compile-time)\footnote{%
        Alternatively,
          we could set an upper bound for~$J$,
          always expand into that upper bound,
          and then let undefined values of $v^m_j$ be~$e$.},
        we are then able to eliminate existential quantification over~$J$
        as follows:
  Then,

  \begin{align}\label{eq:prop-vec}
    c &\equiv \Exists{j\in J}{\cpredvecseq}, \nonumber\\
      &\equiv \left(v^0_0\bullet\cdots\bullet v^m_0\right)
              \lor\cdots\lor
              \left(v^0_{|J|-1}\bullet\cdots\bullet v^m_{|J|-1}\right),
  \end{align}
  \noindent
  which is a propositional formula.

  Similarly,
    for matrices,

  \begin{align}
    c &\equiv \Exists{j\in J}{\Exists{k\in K_j}{\cpredmatseq}}, \nonumber\\
      &\equiv \Exists{j\in J}{
                \left({M^0_j}_0\bullet\cdots\bullet{M^0_j}_{|K_j|-1}\right)
                \lor\cdots\lor
                \left({M^l_j}_0\bullet\cdots\bullet{M^l_j}_{|K_j|-1}\right)
              },
  \end{align}
  \noindent
  and then proceed as in~\ref{eq:prop-vec}.
\end{proof}
\index{classification!as proposition|)}

\INCOMPLETE{The classification definitions are incomplete!}

These definitions may also be used as a form of pattern matching to look up
  a corresponding variable.
For example,
  if we have $\Classify^\texttt{foo}$ and want to know its \xmlattr{yields},
    we can write~$\Classify^\texttt{foo}_\gamma$ to bind the
    \xmlattr{yields} to~$\gamma$.\footnote{%
      This is conceptually like a symbol table lookup in the compiler.}

\mremark{Note that these illustrate \emph{scalar} values only.}
Consider the following classification $\Classify^\texttt{cost-exceeded}$.
Let~\tameparam{cost} be a scalar parameter.

\begin{lstlisting}
  <classify as="cost-exceeded" desc="Cost of item is too expensive">
    <t:match-gt on="cost" value="100.00" />
  </classify>
\end{lstlisting}

\noindent
is then equivalent to the proposition

\begin{equation*}
  \tameclass{cost-exceeded} \equiv \tameparam{cost} > 100.00.
\end{equation*}

\index{classification!domain}
A classification is either \true or~\false.
Let $\tameparam{cost}=150.00$.
Then,

\begin{align*}
  \tameclass{cost-exceeded} & \equiv \tameparam{cost} > 100.00 \\
                            & \equiv 150.00 > 100.00 \\
                            & \equiv \true.
\end{align*}

Each \xmlnode{match} of a classification is a~\dfn{predicate}.
Multiple predicates are by default joined by conjunction:

\begin{lstlisting}
  <classify as="pool-hazard" desc="Hazardous pool">
    <match on="diving_board" />
    <t:match-lt on="pool_depth_ft" value="8" />
  </classify>
\end{lstlisting}

\noindent
is equivalent to the proposition

\begin{equation*}
  \tameclass{pool-hazard} \equiv \tameparam{diving\_board}
    \land \tameparam{pool\_depth\_ft} < 8.
\end{equation*}


\subsection{Matches}
\todo{Non-scalar values.}
\begin{definition}[Match Equality]
  \begin{equation*}
    \xml{<match on="$x$" value="$y$" />} \equiv x = y.
  \end{equation*}
\end{definition}

\begin{definition}[Match Equality Short Form]
  \begin{equation*}
    \xml{<match on="$x$" />}
      \equiv \xml{<match on="$x$" value="TRUE" />}.
  \end{equation*}
\end{definition}

\begin{definition}[Match Equality Long Form]
  \begin{alignat*}{2}
    \xml{<match on="$x$" value="$y$" />}
      &\equiv {}&&\xml{<match on="$x$">} \\
      &         &&\quad \xml{<c:eq>} \\
      &         &&\quad\quad \xml{<c:value-of name="$y$">} \\
      &         &&\quad \xml{</c:eq>} \\
      &         &&\xml{</match>} \\
      &\equiv {}&&\xml{<t:match-eq on="$x$" value="$y$" />}.
  \end{alignat*}
\end{definition}

\begin{definition}[Match Membership Equivalence]
  When $T$ is a type defined with \xmlnode{typedef},

  \begin{equation*}
    \xml{<match on="$x$" anyOf="$T$" />} \equiv x \in T.
  \end{equation*}
\end{definition}