vector/table: Extract bisect functions into vector/filter
* vector/filter.xml (bisect, foremost, _mask-unless_): Add to package. * vector/table.xml (bisect, foremost, _mask-unless_): Remove from package.master
parent
cac9d22cb2
commit
98f9b6fadb
|
@ -23,6 +23,9 @@
|
|||
core="true"
|
||||
desc="Filtering Vectors and Matrices">
|
||||
|
||||
<import package="../base" />
|
||||
<import package="list" />
|
||||
|
||||
|
||||
<section title="Matrix Filtering">
|
||||
\ref{mfilter} handles complex filtering of matrices.
|
||||
|
@ -242,5 +245,280 @@
|
|||
</c:otherwise>
|
||||
</c:cases>
|
||||
</function>
|
||||
|
||||
|
||||
<section title="Bisecting">
|
||||
Perform an~$O(lg n)$ bisect on a data set.
|
||||
|
||||
This is intended to limit recursion on very large data sets (and
|
||||
consequently will increase performance).
|
||||
This will bisect up until a certain point (the gap),
|
||||
unless it finds the value in question.
|
||||
After finding the value,
|
||||
it will perform an~$O(n)$ linear backward search to find the first
|
||||
occurrence of the value.
|
||||
If the value is not found,
|
||||
it will halt at the gap and return the first index of the gap,
|
||||
which we will consider its "best guess",
|
||||
at which point a linear search can be performed by the caller to
|
||||
determine if the value does in fact exist at all.
|
||||
|
||||
(The reason this operates so oddly is because of its caller;
|
||||
we could rid the gap and make this a general-purpose function if need be.
|
||||
Technically,
|
||||
the gap is useless and saves $lg g$ steps,
|
||||
which may be very small.)
|
||||
|
||||
|
||||
<const name="TABLE_WHEN_MASK_VALUE" type="float" value="-0.0001"
|
||||
desc="Used to mask when conditions" />
|
||||
<const name="MFILTER_BISECT_GAP_MAX" type="integer" value="10"
|
||||
desc="Quit bisect if size is less than or equal to this value" />
|
||||
|
||||
|
||||
<function name="bisect"
|
||||
desc="Bisect a matrix toward the requested column value">
|
||||
<param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
|
||||
<param name="col" type="integer" desc="Column index to filter on" />
|
||||
<param name="val" type="float" desc="Column value to filter on" />
|
||||
<param name="start" type="integer" desc="Start index" />
|
||||
<param name="end" type="integer" desc="Start end" />
|
||||
|
||||
<c:let>
|
||||
<c:values>
|
||||
<!-- the gap represents the number of indexes between the current start
|
||||
and end indexes -->
|
||||
<c:value name="gap" type="integer" desc="Gap between current start and end">
|
||||
<c:sum>
|
||||
<c:value-of name="end" />
|
||||
|
||||
<t:negate>
|
||||
<c:value-of name="start" />
|
||||
</t:negate>
|
||||
</c:sum>
|
||||
</c:value>
|
||||
</c:values>
|
||||
|
||||
<!--
|
||||
At this point, we need to determine if we should continue the bisect or
|
||||
halt. The purpose of the gap is based on the understanding that (with
|
||||
our use cases) we will arrive, almost always, at one of two scenarios:
|
||||
we found the value, but it's part of a larger set of the same values, or
|
||||
the value we are looking for may not even exist at all.
|
||||
|
||||
The gap just limits recursion (but just barely) at smaller levels, since
|
||||
bisect will take lg(n) steps). Increase the gap limit to decrease the
|
||||
number of steps, or decrease it to 1 if you want a complete bisection.
|
||||
-->
|
||||
<c:cases>
|
||||
<!-- give up if we've reached our gap limit -->
|
||||
<c:case>
|
||||
<c:when name="gap">
|
||||
<c:lte>
|
||||
<c:value-of name="MFILTER_BISECT_GAP_MAX" />
|
||||
</c:lte>
|
||||
</c:when>
|
||||
|
||||
<!-- we tried our best; return our current position -->
|
||||
<c:value-of name="start" />
|
||||
</c:case>
|
||||
|
||||
<!-- we have not yet reached our gap limit; keep going -->
|
||||
<c:otherwise>
|
||||
<c:let>
|
||||
<c:values>
|
||||
<c:value name="mid_index" type="integer" desc="Middle index">
|
||||
<!-- to determine the new mid index, add half of the gap to the
|
||||
current index -->
|
||||
<c:sum>
|
||||
<c:value-of name="start" />
|
||||
<c:ceil>
|
||||
<c:quotient>
|
||||
<c:value-of name="gap" />
|
||||
<c:const value="2" desc="Bisect" />
|
||||
</c:quotient>
|
||||
</c:ceil>
|
||||
</c:sum>
|
||||
</c:value>
|
||||
</c:values>
|
||||
|
||||
<c:let>
|
||||
<c:values>
|
||||
<c:value name="mid" type="float" desc="Middle value">
|
||||
<c:value-of name="matrix">
|
||||
<!-- row -->
|
||||
<c:index>
|
||||
<c:value-of name="mid_index" />
|
||||
</c:index>
|
||||
|
||||
<!-- column -->
|
||||
<c:index>
|
||||
<c:value-of name="col" />
|
||||
</c:index>
|
||||
</c:value-of>
|
||||
</c:value>
|
||||
</c:values>
|
||||
|
||||
<c:cases>
|
||||
<!-- if the middle value is lower than our value, then take the upper half -->
|
||||
<c:case>
|
||||
<c:when name="mid">
|
||||
<c:lt>
|
||||
<c:value-of name="val" />
|
||||
</c:lt>
|
||||
</c:when>
|
||||
|
||||
<c:recurse start="mid_index" />
|
||||
</c:case>
|
||||
|
||||
<!-- similarily, take the lower half if we over-shot -->
|
||||
<c:case>
|
||||
<c:when name="mid">
|
||||
<c:gt>
|
||||
<c:value-of name="val" />
|
||||
</c:gt>
|
||||
</c:when>
|
||||
|
||||
<c:recurse end="mid_index" />
|
||||
</c:case>
|
||||
|
||||
<!-- if we have an exact match, that doesn't necessarily mean that we
|
||||
have every value; we may have intersected a set of them -->
|
||||
<c:otherwise>
|
||||
<!-- this will return an exact index: the first index
|
||||
containing the element we've been looking for (it is a
|
||||
linear backwards search) -->
|
||||
<c:apply name="foremost" matrix="matrix" col="col" i="mid_index" />
|
||||
</c:otherwise>
|
||||
</c:cases>
|
||||
</c:let>
|
||||
</c:let>
|
||||
</c:otherwise>
|
||||
</c:cases>
|
||||
</c:let>
|
||||
</function>
|
||||
|
||||
|
||||
<function name="foremost"
|
||||
desc="Search backwards for the first occurrance in a sorted list">
|
||||
<param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
|
||||
<param name="col" type="integer" desc="Column index to search on" />
|
||||
<param name="i" type="integer" desc="Current index" />
|
||||
|
||||
<c:let>
|
||||
<c:values>
|
||||
<!-- we calculate this rather than accept it via an argument so that
|
||||
this function may be called directly in a more convenient manner
|
||||
-->
|
||||
<c:value name="val" type="float" desc="Current value">
|
||||
<c:value-of name="matrix">
|
||||
<!-- row -->
|
||||
<c:index>
|
||||
<c:value-of name="i" />
|
||||
</c:index>
|
||||
|
||||
<!-- column -->
|
||||
<c:index>
|
||||
<c:value-of name="col" />
|
||||
</c:index>
|
||||
</c:value-of>
|
||||
</c:value>
|
||||
|
||||
<c:value name="prev" type="float" desc="Previous value">
|
||||
<c:value-of name="matrix">
|
||||
<!-- row -->
|
||||
<c:index>
|
||||
<t:dec>
|
||||
<c:value-of name="i" />
|
||||
</t:dec>
|
||||
</c:index>
|
||||
|
||||
<!-- column -->
|
||||
<c:index>
|
||||
<c:value-of name="col" />
|
||||
</c:index>
|
||||
</c:value-of>
|
||||
</c:value>
|
||||
</c:values>
|
||||
|
||||
<c:cases>
|
||||
<!-- if we have no more indexes to check, then we're done -->
|
||||
<c:case>
|
||||
<c:when name="i">
|
||||
<c:eq>
|
||||
<c:const value="0"
|
||||
desc="Did we check the final (first) index?" />
|
||||
</c:eq>
|
||||
</c:when>
|
||||
|
||||
<!-- well, then, we're done -->
|
||||
<c:value-of name="i" />
|
||||
</c:case>
|
||||
|
||||
<!-- if the previous column value is the same value, then continue checking -->
|
||||
<c:case>
|
||||
<c:when name="prev">
|
||||
<c:eq>
|
||||
<c:value-of name="val" />
|
||||
</c:eq>
|
||||
</c:when>
|
||||
|
||||
<c:recurse>
|
||||
<c:arg name="i">
|
||||
<t:dec>
|
||||
<c:value-of name="i" />
|
||||
</t:dec>
|
||||
</c:arg>
|
||||
</c:recurse>
|
||||
</c:case>
|
||||
|
||||
<!-- otherwise, we've found the foremost index -->
|
||||
<c:otherwise>
|
||||
<c:value-of name="i" />
|
||||
</c:otherwise>
|
||||
</c:cases>
|
||||
</c:let>
|
||||
</function>
|
||||
|
||||
|
||||
<template name="_mask-unless_"
|
||||
desc="Mask a value unless the condition is truthful">
|
||||
<param name="@values@" desc="Body" />
|
||||
<param name="@name@" desc="Scalar to check" />
|
||||
<param name="@index@" desc="Optional index" />
|
||||
<param name="@desc@" desc="Optional description" />
|
||||
|
||||
<c:cases>
|
||||
<!-- if masked -->
|
||||
<c:case>
|
||||
<!-- no index provided -->
|
||||
<unless name="@index@">
|
||||
<c:when name="@name@">
|
||||
<c:eq>
|
||||
<c:value-of name="FALSE" />
|
||||
</c:eq>
|
||||
</c:when>
|
||||
</unless>
|
||||
|
||||
<!-- index provided -->
|
||||
<if name="@index@">
|
||||
<c:when name="@name@" index="@index@">
|
||||
<c:eq>
|
||||
<c:value-of name="FALSE" />
|
||||
</c:eq>
|
||||
</c:when>
|
||||
</if>
|
||||
|
||||
<!-- TODO: configurable mask via meta and/or param -->
|
||||
<c:value-of name="TABLE_WHEN_MASK_VALUE" />
|
||||
</c:case>
|
||||
|
||||
<!-- if not masked -->
|
||||
<c:otherwise>
|
||||
<param-copy name="@values@" />
|
||||
</c:otherwise>
|
||||
</c:cases>
|
||||
</template>
|
||||
</section>
|
||||
</section>
|
||||
</package>
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
desc="Functions for performing table lookups">
|
||||
|
||||
<import package="../base" />
|
||||
<import package="list" />
|
||||
|
||||
<!-- since templates are inlined, we need to make these symbols available to
|
||||
avoid terrible confusion -->
|
||||
|
@ -33,11 +32,6 @@
|
|||
<import package="filter" export="true" />
|
||||
<import package="matrix" export="true" />
|
||||
|
||||
|
||||
<const name="TABLE_WHEN_MASK_VALUE" type="float" value="-0.0001" desc="Used to mask when conditions" />
|
||||
<const name="MFILTER_BISECT_GAP_MAX" type="integer" value="10" desc="Quit bisect if size is less than or equal to this value" />
|
||||
|
||||
|
||||
<!--
|
||||
Create a constant table
|
||||
|
||||
|
@ -496,262 +490,5 @@
|
|||
</c:otherwise>
|
||||
</c:cases>
|
||||
</function>
|
||||
|
||||
|
||||
<!--
|
||||
Perform a lg(n) bisect on a data set.
|
||||
|
||||
This is intended to limit recursion on very large data sets (and
|
||||
consequently will increase performance). This will bisect up until a certain
|
||||
point (the gap), unless it finds the value in question. After finding the
|
||||
value, it will perform a linear backward search to find the first occurrence
|
||||
of the value. If the value is not found, it will halt at the gap and return
|
||||
the first index of the gap, which we will consider its "best guess", at
|
||||
which point a linear search can be performed by the caller to determine if
|
||||
the value does in fact exist at all.
|
||||
|
||||
(The reason this operates so oddly is because of its caller. We could rid
|
||||
the gap and make this a general-purpose function if need be. Technically,
|
||||
the gap is useless and saves lg(g) steps, which may be very small.)
|
||||
-->
|
||||
<function name="bisect" desc="Bisect a matrix toward the requested column value">
|
||||
<param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
|
||||
<param name="col" type="integer" desc="Column index to filter on" />
|
||||
<param name="val" type="float" desc="Column value to filter on" />
|
||||
<param name="start" type="integer" desc="Start index" />
|
||||
<param name="end" type="integer" desc="Start end" />
|
||||
|
||||
<c:let>
|
||||
<c:values>
|
||||
<!-- the gap represents the number of indexes between the current start
|
||||
and end indexes -->
|
||||
<c:value name="gap" type="integer" desc="Gap between current start and end">
|
||||
<c:sum>
|
||||
<c:value-of name="end" />
|
||||
|
||||
<t:negate>
|
||||
<c:value-of name="start" />
|
||||
</t:negate>
|
||||
</c:sum>
|
||||
</c:value>
|
||||
</c:values>
|
||||
|
||||
<!--
|
||||
At this point, we need to determine if we should continue the bisect or
|
||||
halt. The purpose of the gap is based on the understanding that (with
|
||||
our use cases) we will arrive, almost always, at one of two scenarios:
|
||||
we found the value, but it's part of a larger set of the same values, or
|
||||
the value we are looking for may not even exist at all.
|
||||
|
||||
The gap just limits recursion (but just barely) at smaller levels, since
|
||||
bisect will take lg(n) steps). Increase the gap limit to decrease the
|
||||
number of steps, or decrease it to 1 if you want a complete bisection.
|
||||
-->
|
||||
<c:cases>
|
||||
<!-- give up if we've reached our gap limit -->
|
||||
<c:case>
|
||||
<c:when name="gap">
|
||||
<c:lte>
|
||||
<c:value-of name="MFILTER_BISECT_GAP_MAX" />
|
||||
</c:lte>
|
||||
</c:when>
|
||||
|
||||
<!-- we tried our best; return our current position -->
|
||||
<c:value-of name="start" />
|
||||
</c:case>
|
||||
|
||||
<!-- we have not yet reached our gap limit; keep going -->
|
||||
<c:otherwise>
|
||||
<c:let>
|
||||
<c:values>
|
||||
<c:value name="mid_index" type="integer" desc="Middle index">
|
||||
<!-- to determine the new mid index, add half of the gap to the
|
||||
current index -->
|
||||
<c:sum>
|
||||
<c:value-of name="start" />
|
||||
<c:ceil>
|
||||
<c:quotient>
|
||||
<c:value-of name="gap" />
|
||||
<c:const value="2" type="integer" desc="Bisect" />
|
||||
</c:quotient>
|
||||
</c:ceil>
|
||||
</c:sum>
|
||||
</c:value>
|
||||
</c:values>
|
||||
|
||||
<c:let>
|
||||
<c:values>
|
||||
<c:value name="mid" type="float" desc="Middle value">
|
||||
<c:value-of name="matrix">
|
||||
<!-- row -->
|
||||
<c:index>
|
||||
<c:value-of name="mid_index" />
|
||||
</c:index>
|
||||
|
||||
<!-- column -->
|
||||
<c:index>
|
||||
<c:value-of name="col" />
|
||||
</c:index>
|
||||
</c:value-of>
|
||||
</c:value>
|
||||
</c:values>
|
||||
|
||||
<c:cases>
|
||||
<!-- if the middle value is lower than our value, then take the upper half -->
|
||||
<c:case>
|
||||
<c:when name="mid">
|
||||
<c:lt>
|
||||
<c:value-of name="val" />
|
||||
</c:lt>
|
||||
</c:when>
|
||||
|
||||
<c:recurse start="mid_index" />
|
||||
</c:case>
|
||||
|
||||
<!-- similarily, take the lower half if we over-shot -->
|
||||
<c:case>
|
||||
<c:when name="mid">
|
||||
<c:gt>
|
||||
<c:value-of name="val" />
|
||||
</c:gt>
|
||||
</c:when>
|
||||
|
||||
<c:recurse end="mid_index" />
|
||||
</c:case>
|
||||
|
||||
<!-- if we have an exact match, that doesn't necessarily mean that we
|
||||
have every value; we may have intersected a set of them -->
|
||||
<c:otherwise>
|
||||
<!-- this will return an exact index: the first index
|
||||
containing the element we've been looking for (it is a
|
||||
linear backwards search) -->
|
||||
<c:apply name="foremost" matrix="matrix" col="col" i="mid_index" />
|
||||
</c:otherwise>
|
||||
</c:cases>
|
||||
</c:let>
|
||||
</c:let>
|
||||
</c:otherwise>
|
||||
</c:cases>
|
||||
</c:let>
|
||||
</function>
|
||||
|
||||
|
||||
<function name="foremost" desc="Search backwards for the first occurrance in a sorted list">
|
||||
<param name="matrix" type="float" set="matrix" desc="Matrix to bisect" />
|
||||
<param name="col" type="integer" desc="Column index to search on" />
|
||||
<param name="i" type="integer" desc="Current index" />
|
||||
|
||||
<c:let>
|
||||
<c:values>
|
||||
<!-- we calculate this rather than accept it via an argument so that
|
||||
this function may be called directly in a more convenient manner
|
||||
-->
|
||||
<c:value name="val" type="float" desc="Current value">
|
||||
<c:value-of name="matrix">
|
||||
<!-- row -->
|
||||
<c:index>
|
||||
<c:value-of name="i" />
|
||||
</c:index>
|
||||
|
||||
<!-- column -->
|
||||
<c:index>
|
||||
<c:value-of name="col" />
|
||||
</c:index>
|
||||
</c:value-of>
|
||||
</c:value>
|
||||
|
||||
<c:value name="prev" type="float" desc="Previous value">
|
||||
<c:value-of name="matrix">
|
||||
<!-- row -->
|
||||
<c:index>
|
||||
<t:dec>
|
||||
<c:value-of name="i" />
|
||||
</t:dec>
|
||||
</c:index>
|
||||
|
||||
<!-- column -->
|
||||
<c:index>
|
||||
<c:value-of name="col" />
|
||||
</c:index>
|
||||
</c:value-of>
|
||||
</c:value>
|
||||
</c:values>
|
||||
|
||||
<c:cases>
|
||||
<!-- if we have no more indexes to check, then we're done -->
|
||||
<c:case>
|
||||
<c:when name="i">
|
||||
<c:eq>
|
||||
<c:const value="0" type="integer" desc="Did we check the final (first) index?" />
|
||||
</c:eq>
|
||||
</c:when>
|
||||
|
||||
<!-- well, then, we're done -->
|
||||
<c:value-of name="i" />
|
||||
</c:case>
|
||||
|
||||
<!-- if the previous column value is the same value, then continue checking -->
|
||||
<c:case>
|
||||
<c:when name="prev">
|
||||
<c:eq>
|
||||
<c:value-of name="val" />
|
||||
</c:eq>
|
||||
</c:when>
|
||||
|
||||
<c:recurse>
|
||||
<c:arg name="i">
|
||||
<t:dec>
|
||||
<c:value-of name="i" />
|
||||
</t:dec>
|
||||
</c:arg>
|
||||
</c:recurse>
|
||||
</c:case>
|
||||
|
||||
<!-- otherwise, we've found the foremost index -->
|
||||
<c:otherwise>
|
||||
<c:value-of name="i" />
|
||||
</c:otherwise>
|
||||
</c:cases>
|
||||
</c:let>
|
||||
</function>
|
||||
|
||||
|
||||
<template name="_mask-unless_" desc="Mask a value unless the condition is truthful">
|
||||
<param name="@values@" desc="Body" />
|
||||
<param name="@name@" desc="Scalar to check" />
|
||||
<param name="@index@" desc="Optional index" />
|
||||
<param name="@desc@" desc="Optional description" />
|
||||
|
||||
<c:cases>
|
||||
<!-- if masked -->
|
||||
<c:case>
|
||||
<!-- no index provided -->
|
||||
<unless name="@index@">
|
||||
<c:when name="@name@">
|
||||
<c:eq>
|
||||
<c:value-of name="FALSE" />
|
||||
</c:eq>
|
||||
</c:when>
|
||||
</unless>
|
||||
|
||||
<!-- index provided -->
|
||||
<if name="@index@">
|
||||
<c:when name="@name@" index="@index@">
|
||||
<c:eq>
|
||||
<c:value-of name="FALSE" />
|
||||
</c:eq>
|
||||
</c:when>
|
||||
</if>
|
||||
|
||||
<!-- TODO: configurable mask via meta and/or param -->
|
||||
<c:value-of name="TABLE_WHEN_MASK_VALUE" />
|
||||
</c:case>
|
||||
|
||||
<!-- if not masked -->
|
||||
<c:otherwise>
|
||||
<param-copy name="@values@" />
|
||||
</c:otherwise>
|
||||
</c:cases>
|
||||
</template>
|
||||
</package>
|
||||
|
||||
|
|
Loading…
Reference in New Issue