\section{Classification System}\seclabel{class}
\index{classification}
A \dfn{classification} is a user-defined abstraction that describes
  (``classifies'') arbitrary data.
Classifications can be used as predicates, generating functions, and can be
  composed into more complex classifications.
Nearly all conditions in \tame{} are specified using classifications.

\index{first-order logic!sentence}
\index{classification!coupling}
All classifications represent \dfn{first-order sentences}---%
  that is,
    they contain no \dfn{free variables}.
Intuitively,
  this means that all variables within a~classification are
  \dfn{tightly coupled} to the classification itself.
This limitation is mitigated through use of the template system.

\begin{axiom}[Classification Introduction]\axmlabel{class-intro}
\todo{Symbol in place of $=$ here ($\equiv$ not appropriate).}
\begin{alignat}{3}
    &\xml{<classify as="$c$" }&&\xml{yields="$\gamma$" desc}&&\xml{="$\_$"
          $\alpha$>}\label{eq:xml-classify} \\
    &\quad \MFam{M^0}jJkK   &&\VFam{v^0}jJ   &&\quad s^0    \nonumber\\[-4mm]
    &\quad \quad\vdots      &&\quad\vdots    &&\quad \vdots \nonumber\\
    &\quad \MFam{M^l}jJkK   &&\VFam{v^m}jJ   &&\quad s^n    \nonumber\\[-3mm]
    &\xml{</classify>}
      % NB: This -50mu needs adjustment if you change the alignment above!
      &&\mspace{-50mu}= \Classify^c_\gamma\left(\odot,M,v,s\right), \nonumber
\end{alignat}

\noindent
where

\begin{align}
    J &\subset\Int \neq\emptyset, \\
    \forall{j\in J}\Big(K_j &\subset\Int \neq\emptyset\Big), \\
    \forall{k}\Big(M^k &: J \rightarrow K_{j\in J} \rightarrow \Bool\Big),
                                            \label{eq:class-matrix} \\
    \forall{k}\Big(v^k &: J \rightarrow \Bool\Big), \\
    \forall{k}\Big(s^k &\in\Bool\Big), \\
    \alpha &\in\Set{\epsilon,\, \texttt{any="true"}}, \label{eq:xml-any-domain}
\end{align}

\noindent
and the monoid~$\odot$ is defined as

\begin{equation}\label{eq:classify-rel}
  \odot = \begin{cases}
        \Monoid\Bool\land\true &\alpha = \epsilon,\\
        \Monoid\Bool\lor\false &\alpha = \texttt{any="true"}.
      \end{cases}
\end{equation}
\end{axiom}


% This TODO was the initial motivation for this paper!
\todo{Emphasize index sets, both relationships and nonempty.}
We use a $4$-tuple $\Classify\left(\odot,M,v,s\right)$ to represent a
  $\odot_1$-classification
    (a classification with the binary operation $\land$ or~$\lor$)
  consisting of a combination of matrix~($M$), vector~($v$), and
    scalar~($s$) matches,
      rendered above in columns.\footnote{%
        The symbol~$\odot$ was chosen since the binary operation for a monoid
          is~$\bullet$
            (see \secref{monoids})
          and~$\odot$ looks vaguely like~$(\bullet)$,
            representing a portion of the monoid triple.}
A $\land$-classification is pronounced ``conjunctive classification'',
  and $\lor$ ``disjunctive''.\footnote{%
    Conjunctive and disjunctive classifications used to be referred to,
      respectively,
      as \dfn{universal} and \dfn{existential},
        referring to fact that
          $\forall\Set{a_0,\ldots,a_n}(a) \equiv a_0\land\ldots\land a_n$,
            and similarly for $\exists$.
    This terminology has changed since all classifications are in fact
      existential over their matches' index sets,
        and so the terminology would otherwise lead to confusion.}

The variables~$c$ and~$\gamma$ are required in~\tame{} but are both optional
  in our notation~$\Classify^c_\gamma$,
    and can be used to identify the two different data representations of
    the classification.\footnote{%
      \xpath{classify/@yields} is optional in the grammar of \tame{},
        but the compiler will generate one for us if one is not provided.
      As such,
        we will for simplicity consider it to be required here.}

$\alpha$~serves as a placeholder for an optional \xml{any="true"},
  with $\emptystr$~representing the empty string in~\eqref{eq:xml-any-domain}.
Note the wildcard variable matching \xmlattr{desc}---%
  its purpose is only to provide documentation.

\begin{corollary}[$\odot$ Commutative Monoid]\corlabel{odot-monoid}
  $\odot$ is a commutative monoid in \axmref{class-intro}.
\end{corollary}
\begin{proof}
  By \axmref{class-intro},
    $\odot$ must be a monoid.
  Assume $\alpha=\epsilon$.
  Then,
    $\odot = \Monoid\Bool\land\true$,
      which is proved by \lemref{monoid-land}.
  Next, assume $\alpha=\texttt{any="true"}$.
  Then,
    $\odot = \Monoid\Bool\lor\false$,
      which is proved by \lemref{monoid-land}.
\end{proof}

\index{classification!commutativity}
\index{compiler!classification commutativity}
While \axmref{class-intro} seems to imply an ordering to matches,
  users of the language are free to specify matches in any order
    and the compiler will rearrange matches as it sees fit.
This is due to the commutativity of~$\odot$ as proved by
  \corref{odot-monoid},
    and not only affords great ease of use to users of~\tame{},
      but also great flexibility to compiler writers.

For notational convenience,
  we will let

\begin{align}
  \odot^\land &= \Monoid\Bool\land\true, \\
  \odot^\lor &= \Monoid\Bool\lor\false.
\end{align}


\def\cpredmatseq{{M^0_j}_k \bullet\cdots\bullet {M^l_j}_k}
\def\cpredvecseq{v^0_j\bullet\cdots\bullet v^m_j}
\def\cpredscalarseq{s^0\bullet\cdots\bullet s^n}


\begin{axiom}[Classification-Predicate Equivalence]\axmlabel{class-pred}
  Let $\Classify^c_\gamma\left(\Monoid\Bool\bullet e,M,v,s\right)$ be a
    classification by~\axmref{class-intro}.
  We then have the first-order sentence
  \begin{equation*}
    c \equiv
      {} \Exists{j\in J}{\Exists{k\in K_j}\cpredmatseq\bullet\cpredvecseq}
        \bullet\cpredscalarseq.
  \end{equation*}
\end{axiom}


\begin{axiom}[Classification Yield]\axmlabel{class-yield}
  Let $\Classify^c_\gamma\left(\Monoid\Bool\bullet e,M,v,s\right)$ be a
    classification by~\axmref{class-intro}.
  Then,

  \begin{align}
    r &= \begin{cases}
           2 &M\neq\emptyset, \\
           1 &M=\emptyset \land v\neq\emptyset, \\
           0 &M\union v = \emptyset,
         \end{cases} \\
    \exists{j\in J}\Big(\exists{k\in K_j}\Big(
      \Gamma^2_{j_k} &= \cpredmatseq\bullet\cpredvecseq\bullet\cpredscalarseq
    \Big)\Big), \\
    %
    \exists{j\in J}\Big(
      \Gamma^1_j &= \cpredvecseq\bullet\cpredscalarseq
    \Big), \\
    %
    \Gamma^0 &= \cpredscalarseq. \\
    %
    \gamma &= \Gamma^r.
  \end{align}
\end{axiom}

\begin{theorem}[Classification Composition]\thmlabel{class-compose}
  Classifications may be composed to create more complex classifications
    using the classification yield~$\gamma$ as in~\axmref{class-yield}.
  This interpretation is equivalent to \axmref{class-pred} by
  \begin{equation}
    c \equiv \Exists{j\in J}{
               \Exists{k\in K_j}{\Gamma^2_{j_k}}
               \bullet \Gamma^1_j
             }
             \bullet \Gamma^0.
  \end{equation}
\end{theorem}

\def\eejJ{\equiv \exists{j\in J}\Big(}

\begin{proof}
  Expanding each~$\Gamma$ in \axmref{class-yield},
    we have

  \begin{alignat*}{3}
    c &\eejJ\Exists{k\in K_j}{\Gamma^2_{j_k}}
              \bullet \Gamma^1_j
            \Big)
            \bullet \Gamma^0
        &&\text{by \axmref{class-yield}} \\
      %
      &\eejJ\exists{k\in K_j}\Big(
              \cpredmatseq \bullet \cpredvecseq \bullet \cpredscalarseq
            \Big) \\
      &\hphantom{\eejJ}\;\cpredvecseq \bullet \cpredscalarseq \Big)
                \bullet \cpredscalarseq, \\
      %
      &\eejJ\exists{k\in K_j}\Big(\cpredmatseq\Big)
            \bullet \cpredvecseq \bullet \cpredscalarseq \\
      &\hphantom{\eejJ}\;\cpredvecseq \bullet \cpredscalarseq \Big)
                \bullet \cpredscalarseq,
        &&\text{by \dfnref{quant-conn}} \\
      %
      &\eejJ\exists{k\in K_j}\Big(\cpredmatseq\Big)
        &&\text{by \dfnref{prop-taut}} \\
      &\hphantom{\eejJ}\;\cpredvecseq \bullet \cpredscalarseq \Big)
                \bullet \cpredscalarseq, \\
      %
      &\eejJ\exists{k\in K_j}\Big(\cpredmatseq\Big)
        &&\text{by \dfnref{quant-conn}} \\
      &\hphantom{\eejJ}\;\cpredvecseq\Big) \bullet \cpredscalarseq
                \bullet \cpredscalarseq, \\
      %
      &\eejJ\exists{k\in K_j}\Big(\cpredmatseq\Big)
        &&\text{by \dfnref{prop-taut}} \\
      &\hphantom{\eejJ}\;\cpredvecseq\Big)
                \bullet \cpredscalarseq.
        \tag*{\qedhere} \\
  \end{alignat*}
\end{proof}


\begin{lemma}[Classification Predicate Vacuity]\lemlabel{class-pred-vacu}
  Let $\Classify^c_\gamma\left(\Monoid\Bool\bullet e,\emptyset,\emptyset,\emptyset\right)$
    be a classification by~\axmref{class-intro}.
  $\odot$ is a monoid by \corref{odot-monoid}.
  Then $c \equiv \gamma \equiv e$.
\end{lemma}
\begin{proof}
  First consider $c$.
  \begin{alignat}{3}
    c &\equiv \Exists{j\in J}{\Exists{k}{e}\bullet e} \bullet e
        \qquad&&\text{by \dfnref{monoid-seq}} \label{p:cri-c} \\
      &\equiv \Exists{j\in J}{e \bullet e} \bullet e
        &&\text{by \dfnref{quant-elim}} \\
      &\equiv \Exists{j\in J}{e} \bullet e
        &&\text{by \ref{eq:monoid-identity}} \\
      &\equiv e \bullet e
        &&\text{by \dfnref{quant-elim}} \\
      &\equiv e.
        &&\text{by \ref{eq:monoid-identity}}
  \end{alignat}

  For $\gamma$,
    we have $r=0$ by \axmref{class-yield},
    and so by similar steps as~$c$,
      $\gamma=\Gamma^r=e$.
  Therefore $c\equiv e$.
\end{proof}


\begin{figure}[h]\label{fig:always-never}
  \begin{alignat*}{3}
    \begin{aligned}
      \xml{<classify }&\xml{as="always" yields="alwaysTrue"} \xmlnl
                      &\xml{desc="Always true" />}
    \end{aligned}
      \quad&=\quad
      \Classify^\texttt{always}_\texttt{alwaysTrue}
        &&\left(\odot^\land,\emptyset,\emptyset,\emptyset\right). \\
    %
    \begin{aligned}
      \xml{<classify }&\xml{as="never" yields="neverTrue"} \xmlnl
                      &\xml{any="true"} \xmlnl
                      &\xml{desc="Never true" />}
    \end{aligned}
      \quad&=\quad
      \Classify^\texttt{never}_\texttt{neverTrue}
        &&\left(\odot^\lor,\emptyset,\emptyset,\emptyset\right).
  \end{alignat*}
  \caption{\tameclass{always} and \tameclass{never} from package
             \tamepkg{core/base}.}
\end{figure}

Figure~\ref{fig:always-never} demonstrates \lemref{class-pred-vacu} in the
  definitions of the classifications \tameclass{always} and
  \tameclass{never}.
These classifications are typically referenced directly for clarity rather
  than creating other vacuous classifications,
    encapsulating \lemref{class-pred-vacu}.


\begin{theorem}[Classification Rank Independence]\thmlabel{class-rank-indep}
  Let $\odot=\Monoid\Bool\bullet e$.
  Then,
  \begin{equation}
    \Classify_\gamma\left(\odot,M,v,s\right)
      \equiv \Classify\left(
        \odot,
        \Classify_{\gamma'''}\left(\odot,M,\emptyset,\emptyset\right),
        \Classify_{\gamma''}\left(\odot,\emptyset,v,\emptyset\right),
        \Classify_{\gamma'}\left(\odot,\emptyset,\emptyset,s\right)
      \right).
  \end{equation}
\end{theorem}

\begin{proof}
  First,
    by \axmref{class-yield},
    observe these special cases following from \lemref{class-pred-vacu}:

  \begin{alignat}{3}
    \Gamma'''^2 &= \cpredmatseq, \qquad&&\text{assuming $v\union s=\emptyset$} \\
    \Gamma''^1 &= \cpredvecseq,        &&\text{assuming $M\union s=\emptyset$}\\
    \Gamma'^0 &= \cpredscalarseq.      &&\text{assuming $M\union v=\emptyset$}
  \end{alignat}

  By \thmref{class-compose},
    we must prove

  \begin{align}
    \Exists{j\in J}{
        \Exists{k\in K_j}{\cpredmatseq}
        \bullet \cpredvecseq
      }
      \bullet \cpredscalarseq \nonumber\\
    \equiv c \equiv
    \Exists{j\in J}{
        \Exists{k\in K_j}{\gamma'''_{j_k}}
        \bullet \gamma''_j
      }
      \bullet \gamma'. \label{eq:rank-indep-goal}
  \end{align}

  By \axmref{class-yield},
    we have $r'''=2$, $r''=1$, and $r'=0$,
    and so $\gamma'''=\Gamma'''^2$,
      $\gamma''=\Gamma''^1$,
      and $\gamma'=\Gamma'^0$.
  By substituting these values in~\ref{eq:rank-indep-goal},
    the theorem is proved.
\end{proof}

\begin{corollary}[Classification As Proposition]
  Classifications with $M\union v=\emptyset$ or with constant index sets can
    be represented by propositional logic
      (that is---without first-order logic).
\end{corollary}
\begin{proof}
  Assume $M\union v=\emptyset$.
  By \thmref{class-rank-indep},

  \begin{align*}
    c &\equiv \cpredscalarseq,
  \end{align*}

  \noindent
  which is a propositional formula.

  Similarly,
    if we define our index set~$J$ to be constant
      (such that it is known at compile-time)\footnote{%
        Alternatively,
          we could set an upper bound for~$J$,
          always expand into that upper bound,
          and then let undefined values of $v^m_j$ be~$e$.},
        we are then able to eliminate existential quantification over~$J$
        as follows:
  Then,

  \begin{align}\label{eq:prop-vec}
    c &\equiv \Exists{j\in J}{\cpredvecseq}, \nonumber\\
      &\equiv \left(v^0_0\bullet\cdots\bullet v^m_0\right)
              \lor\cdots\lor
              \left(v^0_{|J|-1}\bullet\cdots\bullet v^m_{|J|-1}\right),
  \end{align}
  \noindent
  which is a propositional formula.

  Similarly,
    for matrices,

  \begin{align}
    c &\equiv \Exists{j\in J}{\Exists{k\in K_j}{\cpredmatseq}}, \nonumber\\
      &\equiv \Exists{j\in J}{
                \left({M^0_j}_0\bullet\cdots\bullet{M^0_j}_{|K_j|-1}\right)
                \lor\cdots\lor
                \left({M^l_j}_0\bullet\cdots\bullet{M^l_j}_{|K_j|-1}\right)
              },
  \end{align}
  \noindent
  and then proceed as in~\ref{eq:prop-vec}.
\end{proof}

\INCOMPLETE{The classification definitions are incomplete!}

These definitions may also be used as a form of pattern matching to look up
  a corresponding variable.
For example,
  if we have $\Classify^\texttt{foo}$ and want to know its \xmlattr{yields},
    we can write~$\Classify^\texttt{foo}_\gamma$ to bind the
    \xmlattr{yields} to~$\gamma$.\footnote{%
      This is conceptually like a symbol table lookup in the compiler.}

\mremark{Note that these illustrate \emph{scalar} values only.}
Consider the following classification $\Classify^\texttt{cost-exceeded}$.
Let~\tameparam{cost} be a scalar parameter.

\index{classification!classify@\xmlnode{classify}}
\begin{lstlisting}
  <classify as="cost-exceeded" desc="Cost of item is too expensive">
    <t:match-gt on="cost" value="100.00" />
  </classify>
\end{lstlisting}

\noindent
is then equivalent to the proposition

\begin{equation*}
  \tameclass{cost-exceeded} \equiv \tameparam{cost} > 100.00.
\end{equation*}

\index{classification!domain}
A classification is either \true or~\false.
Let $\tameparam{cost}=150.00$.
Then,

\begin{align*}
  \tameclass{cost-exceeded} & \equiv \tameparam{cost} > 100.00 \\
                            & \equiv 150.00 > 100.00 \\
                            & \equiv \true.
\end{align*}

Each \xmlnode{match} of a classification is a~\dfn{predicate}.
Multiple predicates are by default joined by conjunction:

\begin{lstlisting}
  <classify as="pool-hazard" desc="Hazardous pool">
    <match on="diving_board" />
    <t:match-lt on="pool_depth_ft" value="8" />
  </classify>
\end{lstlisting}

\noindent
is equivalent to the proposition

\begin{equation*}
  \tameclass{pool-hazard} \equiv \tameparam{diving\_board}
    \land \tameparam{pool\_depth\_ft} < 8.
\end{equation*}


\subsection{Matches}
\todo{Non-scalar values.}
\begin{definition}[Match Equality]
  \begin{equation*}
    \xml{<match on="$x$" value="$y$" />} \equiv x = y.
  \end{equation*}
\end{definition}

\begin{definition}[Match Equality Short Form]
  \begin{equation*}
    \xml{<match on="$x$" />}
      \equiv \xml{<match on="$x$" value="TRUE" />}.
  \end{equation*}
\end{definition}

\begin{definition}[Match Equality Long Form]
  \begin{alignat*}{2}
    \xml{<match on="$x$" value="$y$" />}
      &\equiv {}&&\xml{<match on="$x$">} \\
      &         &&\quad \xml{<c:eq>} \\
      &         &&\quad\quad \xml{<c:value-of name="$y$">} \\
      &         &&\quad \xml{</c:eq>} \\
      &         &&\xml{</match>} \\
      &\equiv {}&&\xml{<t:match-eq on="$x$" value="$y$" />}.
  \end{alignat*}
\end{definition}

\begin{definition}[Match Membership Equivalence]
  When $T$ is a type defined with \xmlnode{typedef},

  \begin{equation*}
    \xml{<match on="$x$" anyOf="$T$" />} \equiv x \in T.
  \end{equation*}
\end{definition}