csvm: Auto-sort expanded output
commit
b716e8c2cd
|
@ -0,0 +1,195 @@
|
|||
#!/usr/bin/awk -f
|
||||
#
|
||||
# Expands a "magic" CSV file into a normal CSV
|
||||
#
|
||||
# Copyright (C) 2016, 2018 R-T Specialty, LLC.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# "Magic" CSVs simply exist to make life easier: they permit comments, blank
|
||||
# lines, variables, sub-delimiter expansion, and any number of ranges per line.
|
||||
# Ranges will be expanded in every combination, making rate tables highly
|
||||
# maintainable.
|
||||
#
|
||||
# Variables are also supported when defined using :var=val. Variables may
|
||||
# expand into ranges, 'cause they're awesome. Multiple variables may be
|
||||
# delimited by semi-colons, as may multiple values.
|
||||
#
|
||||
# For example:
|
||||
# :foo=1--3
|
||||
# $foo;7;9--10:$foo, 5--10,1/1/2017
|
||||
#
|
||||
# Would generate:
|
||||
# 1, 5, 1483246800
|
||||
# 1, 6, 1483246800
|
||||
# ...
|
||||
# 5, 10, 1483246800
|
||||
# 2, 5, 1483246800
|
||||
# ...
|
||||
# 9, 5, 14832468005
|
||||
# ...
|
||||
# 1, 5, 1483246800
|
||||
# 1, 6, 1483246800
|
||||
# ...
|
||||
##
|
||||
|
||||
|
||||
# Expand variable with its value, if any
|
||||
function expand_vars( s, value )
|
||||
{
|
||||
# attempt to parse variable (may expand into a range)
|
||||
if ( match( s, /^\$([a-zA-Z_-]+)$/, m ) )
|
||||
{
|
||||
value = vars[ m[1] ];
|
||||
|
||||
if ( value == "" )
|
||||
{
|
||||
print "error: unknown variable reference: `$" m[1] "'" > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
|
||||
# Expand line
|
||||
function parseline( i, m, j, me, orig )
|
||||
{
|
||||
if ( i > NF )
|
||||
{
|
||||
print
|
||||
return
|
||||
}
|
||||
|
||||
orig = $i
|
||||
|
||||
# expand variables before any processing so that expansions
|
||||
# can include any type of formatting
|
||||
$i = expand_vars( $i )
|
||||
|
||||
if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) )
|
||||
{
|
||||
cmd = "date --date=" $i " +%s"
|
||||
cmd |& getline $i
|
||||
close(cmd)
|
||||
}
|
||||
|
||||
# check first for delimiters
|
||||
if ( match( $i, /^([^;]+);(.*)$/, m ) )
|
||||
{
|
||||
# give it a shot with the first value
|
||||
$i = m[1]
|
||||
parseline( i )
|
||||
|
||||
# strip off the first value and process with following value(s)
|
||||
$i = m[2]
|
||||
parseline( i )
|
||||
|
||||
# we've delegated; we're done
|
||||
$i = orig
|
||||
return
|
||||
}
|
||||
|
||||
# parse range
|
||||
if ( match( $i, /^([^-]+)--([^-]+)$/, m ) )
|
||||
{
|
||||
j = expand_vars( m[1] )
|
||||
me = expand_vars( m[2] )
|
||||
|
||||
if ( !match( j, /^[0-9]+$/ ) || !match( me, /^[0-9]+$/ ) )
|
||||
{
|
||||
print "error: invalid range: `" $i "'" > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
$i = j
|
||||
parseline( i + 1 )
|
||||
} while ( j++ < me )
|
||||
}
|
||||
else
|
||||
{
|
||||
parseline( i + 1 );
|
||||
}
|
||||
|
||||
# restore to original value
|
||||
$i = orig
|
||||
}
|
||||
|
||||
|
||||
BEGIN {
|
||||
# we're parsing CSVs
|
||||
FS = " *, *"
|
||||
OFS = ","
|
||||
|
||||
has_directives = 0
|
||||
directives = "!(NODIRECTIVES)"
|
||||
}
|
||||
|
||||
|
||||
# skip all lines that begin with `#', which denotes a comment, or are empty
|
||||
/^#|^$/ { next; }
|
||||
|
||||
# directives are echoed back and are intended for processing by
|
||||
# the parent csvm2csv script
|
||||
/^!/ && output_started {
|
||||
print "error: directive must appear before header: `" $0 "'" > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
/^!/ && has_directives {
|
||||
print "error: all directives must be on one line: `" $0 "'" > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
/^!/ {
|
||||
has_directives = 1
|
||||
directives = $0
|
||||
|
||||
next
|
||||
}
|
||||
|
||||
# lines that begin with a colon are variable definitions
|
||||
/^:/ {
|
||||
if ( !match( $0, /^:([a-zA-Z_-]+)=(.*?)$/, m ) )
|
||||
{
|
||||
print "error: invalid variable definition: `" $0 "'" > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
|
||||
vars[ m[1] ] = m[2]
|
||||
next
|
||||
}
|
||||
|
||||
# Always begin output with a line for directives, even if there are
|
||||
# none. This makes subsequent processing much easier, since we won't have
|
||||
# to conditionally ignore the top line.
|
||||
!output_started {
|
||||
print directives
|
||||
|
||||
output_started = 1
|
||||
}
|
||||
|
||||
# lines that need any sort of processing (ranges, dates, etc)
|
||||
/--|;|\$[a-zA-Z_-]|\// { parseline( 1 ); next; }
|
||||
|
||||
# all other lines are normal; simply output them verbatim
|
||||
{
|
||||
# this assignment will ensure that awk processes the output, ensuring that
|
||||
# extra spaces between commas are stripped
|
||||
$1=$1
|
||||
print
|
||||
}
|
|
@ -1,8 +1,7 @@
|
|||
#!/usr/bin/awk -f
|
||||
#
|
||||
#!/bin/bash
|
||||
# Compiles a "magic" CSV file into a normal CSV
|
||||
#
|
||||
# Copyright (C) 2016, 2018 R-T Specialty, LLC.
|
||||
# Copyright (C) 2018 R-T Specialty, LLC.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
@ -17,150 +16,95 @@
|
|||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# "Magic" CSVs simply exist to make life easier: they permit comments, blank
|
||||
# lines, variables, sub-delimiter expansion, and any number of ranges per line.
|
||||
# Ranges will be expanded in every combination, making rate tables highly
|
||||
# maintainable.
|
||||
# For format of CSVMs, see `csvm-expand'.
|
||||
#
|
||||
# Variables are also supported when defined using :var=val. Variables may
|
||||
# expand into ranges, 'cause they're awesome. Multiple variables may be
|
||||
# delimited by semi-colons, as may multiple values.
|
||||
#
|
||||
# For example:
|
||||
# :foo=1--3
|
||||
# $foo;7;9--10:$foo, 5--10,1/1/2017
|
||||
#
|
||||
# Would generate:
|
||||
# 1, 5, 1483246800
|
||||
# 1, 6, 1483246800
|
||||
# ...
|
||||
# 5, 10, 1483246800
|
||||
# 2, 5, 1483246800
|
||||
# ...
|
||||
# 9, 5, 14832468005
|
||||
# ...
|
||||
# 1, 5, 1483246800
|
||||
# 1, 6, 1483246800
|
||||
# ...
|
||||
# To disable sorting of CSVM output, use the `!NOSORT' directive before the
|
||||
# header line.
|
||||
##
|
||||
|
||||
# account for symlinks, since historically this script lives in a different
|
||||
# directory and has been symlinked for compatibility
|
||||
declare -r mypath=$( dirname "$( readlink -f "$0" )" )
|
||||
|
||||
# Expand variable with its value, if any
|
||||
function expand_vars( s, value )
|
||||
|
||||
# Generate -k arguments for GNU sort given a CSV header
|
||||
#
|
||||
# The generated arguments will be of the form -k1,1n ... -kl,ln, where `l'
|
||||
# is the total number of header entries.
|
||||
#
|
||||
# For example, given this header:
|
||||
# foo, bar, baz
|
||||
# the output would be:
|
||||
# -k1,1n -k2,2n -k3,3n
|
||||
sort-key-args()
|
||||
{
|
||||
# attempt to parse variable (may expand into a range)
|
||||
if ( match( s, /^\$([a-zA-Z_-]+)$/, m ) )
|
||||
{
|
||||
value = vars[ m[1] ];
|
||||
local -r header="${1?Missing CSV header}"
|
||||
|
||||
if ( value == "" )
|
||||
{
|
||||
print "error: unknown variable reference: `$" m[1] "'" > "/dev/stderr"
|
||||
exit 1
|
||||
local -i i=0
|
||||
|
||||
# generate -ki,in for each column (notice that a trailing
|
||||
# comma is added to the header because of the read delimiter)
|
||||
while read -d,; do
|
||||
echo -n "-k$((++i)),${i}n "
|
||||
done <<< "$header,"
|
||||
}
|
||||
|
||||
|
||||
# Sort every column of CSV
|
||||
#
|
||||
# The columns will all be sorted left-to-right. The header is left in place
|
||||
# as the first row.
|
||||
csv-sort()
|
||||
{
|
||||
# the first line of the expanded CSVM is the CSV header
|
||||
local header; read -r header
|
||||
local -r keys=$( sort-key-args "$header" )
|
||||
|
||||
# all remaining input (which is now sans header) is sorted
|
||||
echo "$header"
|
||||
sort -t, $keys -
|
||||
}
|
||||
|
||||
|
||||
# Output usage information
|
||||
#
|
||||
# Kudos to you if you understand the little Easter egg.
|
||||
usage()
|
||||
{
|
||||
cat <<EOU
|
||||
Usage: $0 [FILE]
|
||||
Expand CSVM represented by FILE or stdin into a CSV
|
||||
|
||||
The columns of the expanded CSV will be automatically sorted
|
||||
left-to-right. To inhibit this behavior, use the \`!NOSORT'
|
||||
directive anywhere before the header line in the source CSVM.
|
||||
|
||||
Options:
|
||||
--help Output usage information.
|
||||
|
||||
This program has magic CSV powers.
|
||||
EOU
|
||||
|
||||
exit 64 # EX_USAGE
|
||||
}
|
||||
|
||||
|
||||
# Sort CSV rows left-to-right unless the `!NOSORT' directive is provided
|
||||
main()
|
||||
{
|
||||
test ! "$1" == --help || usage
|
||||
|
||||
"$mypath/csvm-expand" "$@" \
|
||||
| {
|
||||
local directives; read -r directives
|
||||
|
||||
# ignore sorting if given NOSORT directive
|
||||
if [[ "$directives" =~ NOSORT ]]; then
|
||||
cat
|
||||
else
|
||||
csv-sort "$sort"
|
||||
fi
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
|
||||
# Expand line
|
||||
function parseline( i, m, j, me, orig )
|
||||
{
|
||||
if ( i > NF )
|
||||
{
|
||||
print
|
||||
return
|
||||
}
|
||||
|
||||
orig = $i
|
||||
|
||||
# expand variables before any processing so that expansions
|
||||
# can include any type of formatting
|
||||
$i = expand_vars( $i )
|
||||
|
||||
if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) )
|
||||
{
|
||||
cmd = "date --date=" $i " +%s"
|
||||
cmd |& getline $i
|
||||
close(cmd)
|
||||
}
|
||||
|
||||
# check first for delimiters
|
||||
if ( match( $i, /^([^;]+);(.*)$/, m ) )
|
||||
{
|
||||
# give it a shot with the first value
|
||||
$i = m[1]
|
||||
parseline( i )
|
||||
|
||||
# strip off the first value and process with following value(s)
|
||||
$i = m[2]
|
||||
parseline( i )
|
||||
|
||||
# we've delegated; we're done
|
||||
$i = orig
|
||||
return
|
||||
}
|
||||
|
||||
# parse range
|
||||
if ( match( $i, /^([^-]+)--([^-]+)$/, m ) )
|
||||
{
|
||||
j = expand_vars( m[1] )
|
||||
me = expand_vars( m[2] )
|
||||
|
||||
if ( !match( j, /^[0-9]+$/ ) || !match( me, /^[0-9]+$/ ) )
|
||||
{
|
||||
print "error: invalid range: `" $i "'" > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
$i = j
|
||||
parseline( i + 1 )
|
||||
} while ( j++ < me )
|
||||
}
|
||||
else
|
||||
{
|
||||
parseline( i + 1 );
|
||||
}
|
||||
|
||||
# restore to original value
|
||||
$i = orig
|
||||
}
|
||||
|
||||
|
||||
BEGIN {
|
||||
# we're parsing CSVs
|
||||
FS = " *, *"
|
||||
OFS = ","
|
||||
}
|
||||
|
||||
|
||||
# skip all lines that begin with `#', which denotes a comment, or are empty
|
||||
/^#|^$/ { next; }
|
||||
|
||||
# lines that begin with a colon are variable definitions
|
||||
/^:/ {
|
||||
if ( !match( $0, /^:([a-zA-Z_-]+)=(.*?)$/, m ) )
|
||||
{
|
||||
print "error: invalid variable definition: `" $0 "'" > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
|
||||
vars[ m[1] ] = m[2]
|
||||
next
|
||||
}
|
||||
|
||||
# lines that need any sort of processing (ranges, dates, etc)
|
||||
/--|;|\$[a-zA-Z_-]|\// { parseline( 1 ); next; }
|
||||
|
||||
# all other lines are normal; simply output them verbatim
|
||||
{
|
||||
# this assignment will ensure that awk processes the output, ensuring that
|
||||
# extra spaces between commas are stripped
|
||||
$1=$1
|
||||
print
|
||||
}
|
||||
main "$@"
|
||||
|
|
|
@ -38,7 +38,10 @@ run-test()
|
|||
test $? -eq 0 || return 1
|
||||
|
||||
# expected output
|
||||
diff <( cat <<< "$expected" ) <( cat <<< "$given" )
|
||||
diff <( cat <<< "$expected" ) <( cat <<< "$given" ) || {
|
||||
echo "test $testsum failure" >&2
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -92,11 +95,11 @@ test-delim()
|
|||
|
||||
declare -r expected='header,line
|
||||
1,2
|
||||
3,6
|
||||
3,9
|
||||
4,2
|
||||
4,6
|
||||
4,9
|
||||
3,6
|
||||
3,9'
|
||||
4,9'
|
||||
|
||||
run-test "$input" "$expected"
|
||||
}
|
||||
|
@ -179,11 +182,12 @@ test-var-with-var()
|
|||
:baz=$range;$foo
|
||||
$baz, 5'
|
||||
|
||||
# note that the output is sorted
|
||||
declare -r expected='header,line
|
||||
2,5
|
||||
2,5
|
||||
3,5
|
||||
4,5
|
||||
2,5'
|
||||
4,5'
|
||||
|
||||
run-test "$input" "$expected"
|
||||
}
|
||||
|
@ -203,6 +207,51 @@ $foo'
|
|||
}
|
||||
|
||||
|
||||
test-directive-stripped()
|
||||
{
|
||||
declare -r input='!DIRECTIVE
|
||||
header, line'
|
||||
|
||||
declare -r expected='header,line'
|
||||
|
||||
run-test "$input" "$expected"
|
||||
}
|
||||
|
||||
|
||||
test-no-sort()
|
||||
{
|
||||
declare -r input='!NOSORT
|
||||
header, line
|
||||
1,1
|
||||
0,0'
|
||||
|
||||
declare -r expected='header,line
|
||||
1,1
|
||||
0,0'
|
||||
|
||||
run-test "$input" "$expected"
|
||||
}
|
||||
|
||||
|
||||
# all directives should be put on a single line
|
||||
test-fail-multi-directive()
|
||||
{
|
||||
declare -r input='!DIRECTIVE1
|
||||
!DIRECTIVE2
|
||||
header, line'
|
||||
|
||||
((testsum++))
|
||||
|
||||
local -r result=$(
|
||||
../csvm2csv 2>&1 <<< "$input" \
|
||||
&& echo '(test failure: expected failure)'
|
||||
)
|
||||
|
||||
grep -q '!DIRECTIVE2' <<< "$result" \
|
||||
|| return 1
|
||||
}
|
||||
|
||||
|
||||
test-fail-unknown-var-ref()
|
||||
{
|
||||
((testsum++))
|
||||
|
@ -254,6 +303,9 @@ test-comment \
|
|||
&& test-var-with-range-delim \
|
||||
&& test-var-with-var \
|
||||
&& test-var-zero-ref \
|
||||
&& test-directive-stripped \
|
||||
&& test-no-sort \
|
||||
&& test-fail-multi-directive \
|
||||
&& test-fail-unknown-var-ref \
|
||||
&& test-fail-non-numeric-range \
|
||||
&& test-fail-invalid-var-dfn \
|
||||
|
@ -263,7 +315,7 @@ test-comment \
|
|||
}
|
||||
|
||||
# safety check
|
||||
test "$testsum" -eq 12 || {
|
||||
test "$testsum" -eq 15 || {
|
||||
echo 'error: did not run all csvm2csv tests!' >&2
|
||||
exit 1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue