build-aux/csvm-expand: Spawn only one date and memoize

A table with a couple hundred thousand rows was taking minutes to
generate.  This gets it down to a few seconds.

* build-aux/csvm-expand (parse_date): New function.
  (parseline): use it.
master
Mike Gerwitz 2019-04-02 10:58:12 -04:00
parent 3d07597f7c
commit 9a1f916486
1 changed files with 31 additions and 3 deletions

View File

@ -44,6 +44,36 @@
# ...
##
BEGIN {
date_cmd = "stdbuf -o0 date -f- +%s"
}
END {
close( date_cmd )
}
# Parse a date string into a Unix timestamp (memoized)
#
# This spawns a single process for date and reads from standard in. Even
# then, though, date parsing is very slow for many thousands of rows, so the
# output is also cached in `date_cache'.
function parse_date( i, orig )
{
src = $i
if ( date_cache[ src ] )
{
$i = date_cache[ src ]
return
}
print $i |& date_cmd
date_cmd |& getline $i
date_cache[ src ] = $i;
}
# Expand variable with its value, if any
function expand_vars( s, value )
@ -83,9 +113,7 @@ function parseline( i, m, j, me, orig )
if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) )
{
cmd = "date --date=" $i " +%s"
cmd |& getline $i
close(cmd)
parse_date( i );
}
# check first for delimiters