build-aux/csvm-expand: Spawn only one date and memoize
A table with a couple hundred thousand rows was taking minutes to generate. This gets it down to a few seconds. * build-aux/csvm-expand (parse_date): New function. (parseline): use it.master
parent
3d07597f7c
commit
9a1f916486
|
@ -44,6 +44,36 @@
|
||||||
# ...
|
# ...
|
||||||
##
|
##
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
date_cmd = "stdbuf -o0 date -f- +%s"
|
||||||
|
}
|
||||||
|
|
||||||
|
END {
|
||||||
|
close( date_cmd )
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Parse a date string into a Unix timestamp (memoized)
|
||||||
|
#
|
||||||
|
# This spawns a single process for date and reads from standard in. Even
|
||||||
|
# then, though, date parsing is very slow for many thousands of rows, so the
|
||||||
|
# output is also cached in `date_cache'.
|
||||||
|
function parse_date( i, orig )
|
||||||
|
{
|
||||||
|
src = $i
|
||||||
|
|
||||||
|
if ( date_cache[ src ] )
|
||||||
|
{
|
||||||
|
$i = date_cache[ src ]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
print $i |& date_cmd
|
||||||
|
date_cmd |& getline $i
|
||||||
|
|
||||||
|
date_cache[ src ] = $i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Expand variable with its value, if any
|
# Expand variable with its value, if any
|
||||||
function expand_vars( s, value )
|
function expand_vars( s, value )
|
||||||
|
@ -83,9 +113,7 @@ function parseline( i, m, j, me, orig )
|
||||||
|
|
||||||
if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) )
|
if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) )
|
||||||
{
|
{
|
||||||
cmd = "date --date=" $i " +%s"
|
parse_date( i );
|
||||||
cmd |& getline $i
|
|
||||||
close(cmd)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# check first for delimiters
|
# check first for delimiters
|
||||||
|
|
Loading…
Reference in New Issue