vector/table: Extract bisect functions into vector/filter

* vector/filter.xml (bisect, foremost, _mask-unless_): Add to package. * vector/table.xml (bisect, foremost, _mask-unless_): Remove from package.
2018-01-26 11:50:05 -05:00 · 2018-01-26 11:50:05 -05:00 · 98f9b6fadb
parent cac9d22cb2
commit 98f9b6fadb
2 changed files with 278 additions and 263 deletions
--- a/core/vector/filter.xml
+++ b/core/vector/filter.xml
@ -23,6 +23,9 @@
         core="true"
         desc="Filtering Vectors and Matrices">

+  <import package="../base" />
+  <import package="list" />
+

  <section title="Matrix Filtering">
    \ref{mfilter} handles complex filtering of matrices.
@ -242,5 +245,280 @@
        </c:otherwise>
      </c:cases>
    </function>
+
+
+    <section title="Bisecting">
+      Perform an~$O(lg n)$ bisect on a data set.
+
+      This is intended to limit recursion on very large data sets (and
+        consequently will increase performance).
+      This will bisect up until a certain point (the gap),
+        unless it finds the value in question.
+      After finding the value,
+        it will perform an~$O(n)$ linear backward search to find the first
+          occurrence of the value.
+      If the value is not found,
+        it will halt at the gap and return the first index of the gap,
+          which we will consider its "best guess",
+            at which point a linear search can be performed by the caller to
+              determine if the value does in fact exist at all.
+
+      (The reason this operates so oddly is because of its caller;
+        we could rid the gap and make this a general-purpose function if need be.
+      Technically,
+        the gap is useless and saves $lg g$ steps,
+          which may be very small.)
+
+
+      <const name="TABLE_WHEN_MASK_VALUE" type="float" value="-0.0001"
+             desc="Used to mask when conditions" />
+      <const name="MFILTER_BISECT_GAP_MAX" type="integer" value="10"
+             desc="Quit bisect if size is less than or equal to this value" />
+
+
+      <function name="bisect"
+                desc="Bisect a matrix toward the requested column value">
+        <param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
+        <param name="col" type="integer" desc="Column index to filter on" />
+        <param name="val" type="float" desc="Column value to filter on" />
+        <param name="start" type="integer" desc="Start index" />
+        <param name="end" type="integer" desc="Start end" />
+
+        <c:let>
+          <c:values>
+            <!-- the gap represents the number of indexes between the current start
+                 and end indexes -->
+            <c:value name="gap" type="integer" desc="Gap between current start and end">
+              <c:sum>
+                <c:value-of name="end" />
+
+                <t:negate>
+                  <c:value-of name="start" />
+                </t:negate>
+              </c:sum>
+            </c:value>
+          </c:values>
+
+          <!--
+            At this point, we need to determine if we should continue the bisect or
+            halt. The purpose of the gap is based on the understanding that (with
+            our use cases) we will arrive, almost always, at one of two scenarios:
+            we found the value, but it's part of a larger set of the same values, or
+            the value we are looking for may not even exist at all.
+
+            The gap just limits recursion (but just barely) at smaller levels, since
+            bisect will take lg(n) steps). Increase the gap limit to decrease the
+            number of steps, or decrease it to 1 if you want a complete bisection.
+          -->
+          <c:cases>
+            <!-- give up if we've reached our gap limit -->
+            <c:case>
+              <c:when name="gap">
+                <c:lte>
+                  <c:value-of name="MFILTER_BISECT_GAP_MAX" />
+                </c:lte>
+              </c:when>
+
+              <!-- we tried our best; return our current position -->
+              <c:value-of name="start" />
+            </c:case>
+
+            <!-- we have not yet reached our gap limit; keep going -->
+            <c:otherwise>
+              <c:let>
+                <c:values>
+                  <c:value name="mid_index" type="integer" desc="Middle index">
+                    <!-- to determine the new mid index, add half of the gap to the
+                         current index -->
+                    <c:sum>
+                      <c:value-of name="start" />
+                      <c:ceil>
+                        <c:quotient>
+                          <c:value-of name="gap" />
+                          <c:const value="2" desc="Bisect" />
+                        </c:quotient>
+                      </c:ceil>
+                    </c:sum>
+                  </c:value>
+                </c:values>
+
+                <c:let>
+                  <c:values>
+                    <c:value name="mid" type="float" desc="Middle value">
+                      <c:value-of name="matrix">
+                        <!-- row -->
+                        <c:index>
+                          <c:value-of name="mid_index" />
+                        </c:index>
+
+                        <!-- column -->
+                        <c:index>
+                          <c:value-of name="col" />
+                        </c:index>
+                      </c:value-of>
+                    </c:value>
+                  </c:values>
+
+                  <c:cases>
+                    <!-- if the middle value is lower than our value, then take the upper half -->
+                    <c:case>
+                      <c:when name="mid">
+                        <c:lt>
+                          <c:value-of name="val" />
+                        </c:lt>
+                      </c:when>
+
+                      <c:recurse start="mid_index" />
+                    </c:case>
+
+                    <!-- similarily, take the lower half if we over-shot -->
+                    <c:case>
+                      <c:when name="mid">
+                        <c:gt>
+                          <c:value-of name="val" />
+                        </c:gt>
+                      </c:when>
+
+                      <c:recurse end="mid_index" />
+                    </c:case>
+
+                    <!-- if we have an exact match, that doesn't necessarily mean that we
+                         have every value; we may have intersected a set of them -->
+                    <c:otherwise>
+                      <!-- this will return an exact index: the first index
+                           containing the element we've been looking for (it is a
+                           linear backwards search) -->
+                      <c:apply name="foremost" matrix="matrix" col="col" i="mid_index" />
+                    </c:otherwise>
+                  </c:cases>
+                </c:let>
+              </c:let>
+            </c:otherwise>
+          </c:cases>
+        </c:let>
+      </function>
+
+
+      <function name="foremost"
+                desc="Search backwards for the first occurrance in a sorted list">
+        <param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
+        <param name="col" type="integer"               desc="Column index to search on" />
+        <param name="i" type="integer"                 desc="Current index" />
+
+        <c:let>
+          <c:values>
+            <!-- we calculate this rather than accept it via an argument so that
+                 this function may be called directly in a more convenient manner
+                 -->
+            <c:value name="val" type="float" desc="Current value">
+              <c:value-of name="matrix">
+                <!-- row -->
+                <c:index>
+                  <c:value-of name="i" />
+                </c:index>
+
+                <!-- column -->
+                <c:index>
+                  <c:value-of name="col" />
+                </c:index>
+              </c:value-of>
+            </c:value>
+
+            <c:value name="prev" type="float" desc="Previous value">
+              <c:value-of name="matrix">
+                <!-- row -->
+                <c:index>
+                  <t:dec>
+                    <c:value-of name="i" />
+                  </t:dec>
+                </c:index>
+
+                <!-- column -->
+                <c:index>
+                  <c:value-of name="col" />
+                </c:index>
+              </c:value-of>
+            </c:value>
+          </c:values>
+
+          <c:cases>
+            <!-- if we have no more indexes to check, then we're done -->
+            <c:case>
+              <c:when name="i">
+                <c:eq>
+                  <c:const value="0"
+                           desc="Did we check the final (first) index?" />
+                </c:eq>
+              </c:when>
+
+              <!-- well, then, we're done -->
+              <c:value-of name="i" />
+            </c:case>
+
+            <!-- if the previous column value is the same value, then continue checking -->
+            <c:case>
+              <c:when name="prev">
+                <c:eq>
+                  <c:value-of name="val" />
+                </c:eq>
+              </c:when>
+
+              <c:recurse>
+                <c:arg name="i">
+                  <t:dec>
+                    <c:value-of name="i" />
+                  </t:dec>
+                </c:arg>
+              </c:recurse>
+            </c:case>
+
+            <!-- otherwise, we've found the foremost index -->
+            <c:otherwise>
+              <c:value-of name="i" />
+            </c:otherwise>
+          </c:cases>
+        </c:let>
+      </function>
+
+
+      <template name="_mask-unless_"
+                desc="Mask a value unless the condition is truthful">
+        <param name="@values@" desc="Body" />
+        <param name="@name@"   desc="Scalar to check" />
+        <param name="@index@"  desc="Optional index" />
+        <param name="@desc@"   desc="Optional description" />
+
+        <c:cases>
+          <!-- if masked -->
+          <c:case>
+            <!-- no index provided -->
+            <unless name="@index@">
+              <c:when name="@name@">
+                <c:eq>
+                  <c:value-of name="FALSE" />
+                </c:eq>
+              </c:when>
+            </unless>
+
+            <!-- index provided -->
+            <if name="@index@">
+              <c:when name="@name@" index="@index@">
+                <c:eq>
+                  <c:value-of name="FALSE" />
+                </c:eq>
+              </c:when>
+            </if>
+
+            <!-- TODO: configurable mask via meta and/or param -->
+            <c:value-of name="TABLE_WHEN_MASK_VALUE" />
+          </c:case>
+
+          <!-- if not masked -->
+          <c:otherwise>
+            <param-copy name="@values@" />
+          </c:otherwise>
+        </c:cases>
+      </template>
+    </section>
  </section>
 </package>
--- a/core/vector/table.xml
+++ b/core/vector/table.xml
@ -24,7 +24,6 @@
  desc="Functions for performing table lookups">

  <import package="../base" />
-  <import package="list" />

  <!-- since templates are inlined, we need to make these symbols available to
       avoid terrible confusion -->
@ -33,11 +32,6 @@
  <import package="filter" export="true" />
  <import package="matrix" export="true" />

-
-  <const name="TABLE_WHEN_MASK_VALUE" type="float" value="-0.0001" desc="Used to mask when conditions" />
-  <const name="MFILTER_BISECT_GAP_MAX" type="integer" value="10" desc="Quit bisect if size is less than or equal to this value" />
-
-
  <!--
    Create a constant table

@ -496,262 +490,5 @@
      </c:otherwise>
    </c:cases>
  </function>
-
-
-  <!--
-    Perform a lg(n) bisect on a data set.
-
-    This is intended to limit recursion on very large data sets (and
-    consequently will increase performance). This will bisect up until a certain
-    point (the gap), unless it finds the value in question. After finding the
-    value, it will perform a linear backward search to find the first occurrence
-    of the value. If the value is not found, it will halt at the gap and return
-    the first index of the gap, which we will consider its "best guess", at
-    which point a linear search can be performed by the caller to determine if
-    the value does in fact exist at all.
-
-    (The reason this operates so oddly is because of its caller. We could rid
-    the gap and make this a general-purpose function if need be. Technically,
-    the gap is useless and saves lg(g) steps, which may be very small.)
-  -->
-  <function name="bisect" desc="Bisect a matrix toward the requested column value">
-    <param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
-    <param name="col" type="integer" desc="Column index to filter on" />
-    <param name="val" type="float" desc="Column value to filter on" />
-    <param name="start" type="integer" desc="Start index" />
-    <param name="end" type="integer" desc="Start end" />
-
-    <c:let>
-      <c:values>
-        <!-- the gap represents the number of indexes between the current start
-             and end indexes -->
-        <c:value name="gap" type="integer" desc="Gap between current start and end">
-          <c:sum>
-            <c:value-of name="end" />
-
-            <t:negate>
-              <c:value-of name="start" />
-            </t:negate>
-          </c:sum>
-        </c:value>
-      </c:values>
-
-      <!--
-        At this point, we need to determine if we should continue the bisect or
-        halt. The purpose of the gap is based on the understanding that (with
-        our use cases) we will arrive, almost always, at one of two scenarios:
-        we found the value, but it's part of a larger set of the same values, or
-        the value we are looking for may not even exist at all.
-
-        The gap just limits recursion (but just barely) at smaller levels, since
-        bisect will take lg(n) steps). Increase the gap limit to decrease the
-        number of steps, or decrease it to 1 if you want a complete bisection.
-      -->
-      <c:cases>
-        <!-- give up if we've reached our gap limit -->
-        <c:case>
-          <c:when name="gap">
-            <c:lte>
-              <c:value-of name="MFILTER_BISECT_GAP_MAX" />
-            </c:lte>
-          </c:when>
-
-          <!-- we tried our best; return our current position -->
-          <c:value-of name="start" />
-        </c:case>
-
-        <!-- we have not yet reached our gap limit; keep going -->
-        <c:otherwise>
-          <c:let>
-            <c:values>
-              <c:value name="mid_index" type="integer" desc="Middle index">
-                <!-- to determine the new mid index, add half of the gap to the
-                     current index -->
-                <c:sum>
-                  <c:value-of name="start" />
-                  <c:ceil>
-                    <c:quotient>
-                      <c:value-of name="gap" />
-                      <c:const value="2" type="integer" desc="Bisect" />
-                    </c:quotient>
-                  </c:ceil>
-                </c:sum>
-              </c:value>
-            </c:values>
-
-            <c:let>
-              <c:values>
-                <c:value name="mid" type="float" desc="Middle value">
-                  <c:value-of name="matrix">
-                    <!-- row -->
-                    <c:index>
-                      <c:value-of name="mid_index" />
-                    </c:index>
-
-                    <!-- column -->
-                    <c:index>
-                      <c:value-of name="col" />
-                    </c:index>
-                  </c:value-of>
-                </c:value>
-              </c:values>
-
-              <c:cases>
-                <!-- if the middle value is lower than our value, then take the upper half -->
-                <c:case>
-                  <c:when name="mid">
-                    <c:lt>
-                      <c:value-of name="val" />
-                    </c:lt>
-                  </c:when>
-
-                  <c:recurse start="mid_index" />
-                </c:case>
-
-                <!-- similarily, take the lower half if we over-shot -->
-                <c:case>
-                  <c:when name="mid">
-                    <c:gt>
-                      <c:value-of name="val" />
-                    </c:gt>
-                  </c:when>
-
-                  <c:recurse end="mid_index" />
-                </c:case>
-
-                <!-- if we have an exact match, that doesn't necessarily mean that we
-                     have every value; we may have intersected a set of them -->
-                <c:otherwise>
-                  <!-- this will return an exact index: the first index
-                       containing the element we've been looking for (it is a
-                       linear backwards search) -->
-                  <c:apply name="foremost" matrix="matrix" col="col" i="mid_index" />
-                </c:otherwise>
-              </c:cases>
-            </c:let>
-          </c:let>
-        </c:otherwise>
-      </c:cases>
-    </c:let>
-  </function>
-
-
-  <function name="foremost" desc="Search backwards for the first occurrance in a sorted list">
-    <param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
-    <param name="col" type="integer" desc="Column index to search on" />
-    <param name="i" type="integer" desc="Current index" />
-
-    <c:let>
-      <c:values>
-        <!-- we calculate this rather than accept it via an argument so that
-             this function may be called directly in a more convenient manner
-             -->
-        <c:value name="val" type="float" desc="Current value">
-          <c:value-of name="matrix">
-            <!-- row -->
-            <c:index>
-              <c:value-of name="i" />
-            </c:index>
-
-            <!-- column -->
-            <c:index>
-              <c:value-of name="col" />
-            </c:index>
-          </c:value-of>
-        </c:value>
-
-        <c:value name="prev" type="float" desc="Previous value">
-          <c:value-of name="matrix">
-            <!-- row -->
-            <c:index>
-              <t:dec>
-                <c:value-of name="i" />
-              </t:dec>
-            </c:index>
-
-            <!-- column -->
-            <c:index>
-              <c:value-of name="col" />
-            </c:index>
-          </c:value-of>
-        </c:value>
-      </c:values>
-
-      <c:cases>
-        <!-- if we have no more indexes to check, then we're done -->
-        <c:case>
-          <c:when name="i">
-            <c:eq>
-              <c:const value="0" type="integer" desc="Did we check the final (first) index?" />
-            </c:eq>
-          </c:when>
-
-          <!-- well, then, we're done -->
-          <c:value-of name="i" />
-        </c:case>
-
-        <!-- if the previous column value is the same value, then continue checking -->
-        <c:case>
-          <c:when name="prev">
-            <c:eq>
-              <c:value-of name="val" />
-            </c:eq>
-          </c:when>
-
-          <c:recurse>
-            <c:arg name="i">
-              <t:dec>
-                <c:value-of name="i" />
-              </t:dec>
-            </c:arg>
-          </c:recurse>
-        </c:case>
-
-        <!-- otherwise, we've found the foremost index -->
-        <c:otherwise>
-          <c:value-of name="i" />
-        </c:otherwise>
-      </c:cases>
-    </c:let>
-  </function>
-
-
-  <template name="_mask-unless_" desc="Mask a value unless the condition is truthful">
-    <param name="@values@" desc="Body" />
-    <param name="@name@" desc="Scalar to check" />
-    <param name="@index@" desc="Optional index" />
-    <param name="@desc@" desc="Optional description" />
-
-    <c:cases>
-      <!-- if masked -->
-      <c:case>
-        <!-- no index provided -->
-        <unless name="@index@">
-          <c:when name="@name@">
-            <c:eq>
-              <c:value-of name="FALSE" />
-            </c:eq>
-          </c:when>
-        </unless>
-
-        <!-- index provided -->
-        <if name="@index@">
-          <c:when name="@name@" index="@index@">
-            <c:eq>
-              <c:value-of name="FALSE" />
-            </c:eq>
-          </c:when>
-        </if>
-
-        <!-- TODO: configurable mask via meta and/or param -->
-        <c:value-of name="TABLE_WHEN_MASK_VALUE" />
-      </c:case>
-
-      <!-- if not masked -->
-      <c:otherwise>
-        <param-copy name="@values@" />
-      </c:otherwise>
-    </c:cases>
-  </template>
 </package>