tame/tools/csvi

139 lines
3.0 KiB
Plaintext
Raw Normal View History

#!/usr/bin/awk -f
#
# Performs interpolation for columns in a CSV and outputs the result
#
# Copyright (C) 2016 LoVullo Associates, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Configurable values (use -vname=value from command line):
# step - use predeterminated step instead of calculating from first two rows
##
function storeline()
{
for ( i = 1; i <= hlen; i++ ) {
prev[i] = $i
}
}
function clearline()
{
for ( i = 1; i <= hlen; i++ ) {
prev[i] = 0
}
}
function getprev()
{
for ( i = 1; i <= hlen; i++ ) {
$i = prev[i]
}
}
function interpolate()
{
lastval = prev[1]
curval = $1
diff = curval - lastval
# does this value fall in line with the requested step?
if ( diff == step )
{
storeline()
# we're good; continue
print
next
}
# if we do not yet have a value large enough to reach our step, then continue
# until we do (do not store this line)
n = int( diff / step )
if ( n <= 0 ) {
next
}
# determine interpolation values
for ( i = 2; i <= hlen; i++ ) {
ival[i] = ( ( $i - prev[i] ) / n )
}
getprev()
# let us interpolate values that are divisible by the step
do
{
# increase the last value by our step
$1 += step
# interpolate each column value (notice that we skip the first column, which
# was handled directly above)
for ( i = 2; i <= hlen; i++ ) {
$i += ival[i]
}
# print the new line
print
} while ( ( diff -= step ) > 0 )
# anything remaining does not fit into our step and will be ignored; we'll
# continue with our next step at the next line
# consider this to be our last line
storeline()
}
BEGIN {
# the first row of the CSV is the header representing the column identifiers
getline
hlen = split( $0, header, /,/ )
# output the header
print $0
# delimit fields by commas (the field separator for CSVs); note that this
# won't work properly if strings contain commas
FS = OFS = ","
clearline()
getline
# if no step was provided, then calculate one based on the first two rows
if ( step == 0 ) {
# output the first row, which does not need to be interpolated
print
# compute the step
vala = $1
getline
valb = $1
step = valb - vala
# since the second line is used to determine the step, then it must match the
# step and therefore is good to output
print
# begin.
storeline()
}
}
# for each row
{ interpolate() }