vector/table: Extract bisect functions into vector/filter

* vector/filter.xml (bisect, foremost, _mask-unless_): Add to package.
* vector/table.xml (bisect, foremost, _mask-unless_): Remove from package.
master
Mike Gerwitz 2018-01-26 11:50:05 -05:00
parent cac9d22cb2
commit 98f9b6fadb
2 changed files with 278 additions and 263 deletions

View File

@ -23,6 +23,9 @@
core="true"
desc="Filtering Vectors and Matrices">
<import package="../base" />
<import package="list" />
<section title="Matrix Filtering">
\ref{mfilter} handles complex filtering of matrices.
@ -242,5 +245,280 @@
</c:otherwise>
</c:cases>
</function>
<section title="Bisecting">
Perform an~$O(lg n)$ bisect on a data set.
This is intended to limit recursion on very large data sets (and
consequently will increase performance).
This will bisect up until a certain point (the gap),
unless it finds the value in question.
After finding the value,
it will perform an~$O(n)$ linear backward search to find the first
occurrence of the value.
If the value is not found,
it will halt at the gap and return the first index of the gap,
which we will consider its "best guess",
at which point a linear search can be performed by the caller to
determine if the value does in fact exist at all.
(The reason this operates so oddly is because of its caller;
we could rid the gap and make this a general-purpose function if need be.
Technically,
the gap is useless and saves $lg g$ steps,
which may be very small.)
<const name="TABLE_WHEN_MASK_VALUE" type="float" value="-0.0001"
desc="Used to mask when conditions" />
<const name="MFILTER_BISECT_GAP_MAX" type="integer" value="10"
desc="Quit bisect if size is less than or equal to this value" />
<function name="bisect"
desc="Bisect a matrix toward the requested column value">
<param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
<param name="col" type="integer" desc="Column index to filter on" />
<param name="val" type="float" desc="Column value to filter on" />
<param name="start" type="integer" desc="Start index" />
<param name="end" type="integer" desc="Start end" />
<c:let>
<c:values>
<!-- the gap represents the number of indexes between the current start
and end indexes -->
<c:value name="gap" type="integer" desc="Gap between current start and end">
<c:sum>
<c:value-of name="end" />
<t:negate>
<c:value-of name="start" />
</t:negate>
</c:sum>
</c:value>
</c:values>
<!--
At this point, we need to determine if we should continue the bisect or
halt. The purpose of the gap is based on the understanding that (with
our use cases) we will arrive, almost always, at one of two scenarios:
we found the value, but it's part of a larger set of the same values, or
the value we are looking for may not even exist at all.
The gap just limits recursion (but just barely) at smaller levels, since
bisect will take lg(n) steps). Increase the gap limit to decrease the
number of steps, or decrease it to 1 if you want a complete bisection.
-->
<c:cases>
<!-- give up if we've reached our gap limit -->
<c:case>
<c:when name="gap">
<c:lte>
<c:value-of name="MFILTER_BISECT_GAP_MAX" />
</c:lte>
</c:when>
<!-- we tried our best; return our current position -->
<c:value-of name="start" />
</c:case>
<!-- we have not yet reached our gap limit; keep going -->
<c:otherwise>
<c:let>
<c:values>
<c:value name="mid_index" type="integer" desc="Middle index">
<!-- to determine the new mid index, add half of the gap to the
current index -->
<c:sum>
<c:value-of name="start" />
<c:ceil>
<c:quotient>
<c:value-of name="gap" />
<c:const value="2" desc="Bisect" />
</c:quotient>
</c:ceil>
</c:sum>
</c:value>
</c:values>
<c:let>
<c:values>
<c:value name="mid" type="float" desc="Middle value">
<c:value-of name="matrix">
<!-- row -->
<c:index>
<c:value-of name="mid_index" />
</c:index>
<!-- column -->
<c:index>
<c:value-of name="col" />
</c:index>
</c:value-of>
</c:value>
</c:values>
<c:cases>
<!-- if the middle value is lower than our value, then take the upper half -->
<c:case>
<c:when name="mid">
<c:lt>
<c:value-of name="val" />
</c:lt>
</c:when>
<c:recurse start="mid_index" />
</c:case>
<!-- similarily, take the lower half if we over-shot -->
<c:case>
<c:when name="mid">
<c:gt>
<c:value-of name="val" />
</c:gt>
</c:when>
<c:recurse end="mid_index" />
</c:case>
<!-- if we have an exact match, that doesn't necessarily mean that we
have every value; we may have intersected a set of them -->
<c:otherwise>
<!-- this will return an exact index: the first index
containing the element we've been looking for (it is a
linear backwards search) -->
<c:apply name="foremost" matrix="matrix" col="col" i="mid_index" />
</c:otherwise>
</c:cases>
</c:let>
</c:let>
</c:otherwise>
</c:cases>
</c:let>
</function>
<function name="foremost"
desc="Search backwards for the first occurrance in a sorted list">
<param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
<param name="col" type="integer" desc="Column index to search on" />
<param name="i" type="integer" desc="Current index" />
<c:let>
<c:values>
<!-- we calculate this rather than accept it via an argument so that
this function may be called directly in a more convenient manner
-->
<c:value name="val" type="float" desc="Current value">
<c:value-of name="matrix">
<!-- row -->
<c:index>
<c:value-of name="i" />
</c:index>
<!-- column -->
<c:index>
<c:value-of name="col" />
</c:index>
</c:value-of>
</c:value>
<c:value name="prev" type="float" desc="Previous value">
<c:value-of name="matrix">
<!-- row -->
<c:index>
<t:dec>
<c:value-of name="i" />
</t:dec>
</c:index>
<!-- column -->
<c:index>
<c:value-of name="col" />
</c:index>
</c:value-of>
</c:value>
</c:values>
<c:cases>
<!-- if we have no more indexes to check, then we're done -->
<c:case>
<c:when name="i">
<c:eq>
<c:const value="0"
desc="Did we check the final (first) index?" />
</c:eq>
</c:when>
<!-- well, then, we're done -->
<c:value-of name="i" />
</c:case>
<!-- if the previous column value is the same value, then continue checking -->
<c:case>
<c:when name="prev">
<c:eq>
<c:value-of name="val" />
</c:eq>
</c:when>
<c:recurse>
<c:arg name="i">
<t:dec>
<c:value-of name="i" />
</t:dec>
</c:arg>
</c:recurse>
</c:case>
<!-- otherwise, we've found the foremost index -->
<c:otherwise>
<c:value-of name="i" />
</c:otherwise>
</c:cases>
</c:let>
</function>
<template name="_mask-unless_"
desc="Mask a value unless the condition is truthful">
<param name="@values@" desc="Body" />
<param name="@name@" desc="Scalar to check" />
<param name="@index@" desc="Optional index" />
<param name="@desc@" desc="Optional description" />
<c:cases>
<!-- if masked -->
<c:case>
<!-- no index provided -->
<unless name="@index@">
<c:when name="@name@">
<c:eq>
<c:value-of name="FALSE" />
</c:eq>
</c:when>
</unless>
<!-- index provided -->
<if name="@index@">
<c:when name="@name@" index="@index@">
<c:eq>
<c:value-of name="FALSE" />
</c:eq>
</c:when>
</if>
<!-- TODO: configurable mask via meta and/or param -->
<c:value-of name="TABLE_WHEN_MASK_VALUE" />
</c:case>
<!-- if not masked -->
<c:otherwise>
<param-copy name="@values@" />
</c:otherwise>
</c:cases>
</template>
</section>
</section>
</package>

View File

@ -24,7 +24,6 @@
desc="Functions for performing table lookups">
<import package="../base" />
<import package="list" />
<!-- since templates are inlined, we need to make these symbols available to
avoid terrible confusion -->
@ -33,11 +32,6 @@
<import package="filter" export="true" />
<import package="matrix" export="true" />
<const name="TABLE_WHEN_MASK_VALUE" type="float" value="-0.0001" desc="Used to mask when conditions" />
<const name="MFILTER_BISECT_GAP_MAX" type="integer" value="10" desc="Quit bisect if size is less than or equal to this value" />
<!--
Create a constant table
@ -496,262 +490,5 @@
</c:otherwise>
</c:cases>
</function>
<!--
Perform a lg(n) bisect on a data set.
This is intended to limit recursion on very large data sets (and
consequently will increase performance). This will bisect up until a certain
point (the gap), unless it finds the value in question. After finding the
value, it will perform a linear backward search to find the first occurrence
of the value. If the value is not found, it will halt at the gap and return
the first index of the gap, which we will consider its "best guess", at
which point a linear search can be performed by the caller to determine if
the value does in fact exist at all.
(The reason this operates so oddly is because of its caller. We could rid
the gap and make this a general-purpose function if need be. Technically,
the gap is useless and saves lg(g) steps, which may be very small.)
-->
<function name="bisect" desc="Bisect a matrix toward the requested column value">
<param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
<param name="col" type="integer" desc="Column index to filter on" />
<param name="val" type="float" desc="Column value to filter on" />
<param name="start" type="integer" desc="Start index" />
<param name="end" type="integer" desc="Start end" />
<c:let>
<c:values>
<!-- the gap represents the number of indexes between the current start
and end indexes -->
<c:value name="gap" type="integer" desc="Gap between current start and end">
<c:sum>
<c:value-of name="end" />
<t:negate>
<c:value-of name="start" />
</t:negate>
</c:sum>
</c:value>
</c:values>
<!--
At this point, we need to determine if we should continue the bisect or
halt. The purpose of the gap is based on the understanding that (with
our use cases) we will arrive, almost always, at one of two scenarios:
we found the value, but it's part of a larger set of the same values, or
the value we are looking for may not even exist at all.
The gap just limits recursion (but just barely) at smaller levels, since
bisect will take lg(n) steps). Increase the gap limit to decrease the
number of steps, or decrease it to 1 if you want a complete bisection.
-->
<c:cases>
<!-- give up if we've reached our gap limit -->
<c:case>
<c:when name="gap">
<c:lte>
<c:value-of name="MFILTER_BISECT_GAP_MAX" />
</c:lte>
</c:when>
<!-- we tried our best; return our current position -->
<c:value-of name="start" />
</c:case>
<!-- we have not yet reached our gap limit; keep going -->
<c:otherwise>
<c:let>
<c:values>
<c:value name="mid_index" type="integer" desc="Middle index">
<!-- to determine the new mid index, add half of the gap to the
current index -->
<c:sum>
<c:value-of name="start" />
<c:ceil>
<c:quotient>
<c:value-of name="gap" />
<c:const value="2" type="integer" desc="Bisect" />
</c:quotient>
</c:ceil>
</c:sum>
</c:value>
</c:values>
<c:let>
<c:values>
<c:value name="mid" type="float" desc="Middle value">
<c:value-of name="matrix">
<!-- row -->
<c:index>
<c:value-of name="mid_index" />
</c:index>
<!-- column -->
<c:index>
<c:value-of name="col" />
</c:index>
</c:value-of>
</c:value>
</c:values>
<c:cases>
<!-- if the middle value is lower than our value, then take the upper half -->
<c:case>
<c:when name="mid">
<c:lt>
<c:value-of name="val" />
</c:lt>
</c:when>
<c:recurse start="mid_index" />
</c:case>
<!-- similarily, take the lower half if we over-shot -->
<c:case>
<c:when name="mid">
<c:gt>
<c:value-of name="val" />
</c:gt>
</c:when>
<c:recurse end="mid_index" />
</c:case>
<!-- if we have an exact match, that doesn't necessarily mean that we
have every value; we may have intersected a set of them -->
<c:otherwise>
<!-- this will return an exact index: the first index
containing the element we've been looking for (it is a
linear backwards search) -->
<c:apply name="foremost" matrix="matrix" col="col" i="mid_index" />
</c:otherwise>
</c:cases>
</c:let>
</c:let>
</c:otherwise>
</c:cases>
</c:let>
</function>
<function name="foremost" desc="Search backwards for the first occurrance in a sorted list">
<param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
<param name="col" type="integer" desc="Column index to search on" />
<param name="i" type="integer" desc="Current index" />
<c:let>
<c:values>
<!-- we calculate this rather than accept it via an argument so that
this function may be called directly in a more convenient manner
-->
<c:value name="val" type="float" desc="Current value">
<c:value-of name="matrix">
<!-- row -->
<c:index>
<c:value-of name="i" />
</c:index>
<!-- column -->
<c:index>
<c:value-of name="col" />
</c:index>
</c:value-of>
</c:value>
<c:value name="prev" type="float" desc="Previous value">
<c:value-of name="matrix">
<!-- row -->
<c:index>
<t:dec>
<c:value-of name="i" />
</t:dec>
</c:index>
<!-- column -->
<c:index>
<c:value-of name="col" />
</c:index>
</c:value-of>
</c:value>
</c:values>
<c:cases>
<!-- if we have no more indexes to check, then we're done -->
<c:case>
<c:when name="i">
<c:eq>
<c:const value="0" type="integer" desc="Did we check the final (first) index?" />
</c:eq>
</c:when>
<!-- well, then, we're done -->
<c:value-of name="i" />
</c:case>
<!-- if the previous column value is the same value, then continue checking -->
<c:case>
<c:when name="prev">
<c:eq>
<c:value-of name="val" />
</c:eq>
</c:when>
<c:recurse>
<c:arg name="i">
<t:dec>
<c:value-of name="i" />
</t:dec>
</c:arg>
</c:recurse>
</c:case>
<!-- otherwise, we've found the foremost index -->
<c:otherwise>
<c:value-of name="i" />
</c:otherwise>
</c:cases>
</c:let>
</function>
<template name="_mask-unless_" desc="Mask a value unless the condition is truthful">
<param name="@values@" desc="Body" />
<param name="@name@" desc="Scalar to check" />
<param name="@index@" desc="Optional index" />
<param name="@desc@" desc="Optional description" />
<c:cases>
<!-- if masked -->
<c:case>
<!-- no index provided -->
<unless name="@index@">
<c:when name="@name@">
<c:eq>
<c:value-of name="FALSE" />
</c:eq>
</c:when>
</unless>
<!-- index provided -->
<if name="@index@">
<c:when name="@name@" index="@index@">
<c:eq>
<c:value-of name="FALSE" />
</c:eq>
</c:when>
</if>
<!-- TODO: configurable mask via meta and/or param -->
<c:value-of name="TABLE_WHEN_MASK_VALUE" />
</c:case>
<!-- if not masked -->
<c:otherwise>
<param-copy name="@values@" />
</c:otherwise>
</c:cases>
</template>
</package>