build-aux/csvm-expand: Spawn only one date and memoize
A table with a couple hundred thousand rows was taking minutes to generate. This gets it down to a few seconds. * build-aux/csvm-expand (parse_date): New function. (parseline): use it.master
parent
3d07597f7c
commit
9a1f916486
|
@ -44,6 +44,36 @@
|
|||
# ...
|
||||
##
|
||||
|
||||
BEGIN {
|
||||
date_cmd = "stdbuf -o0 date -f- +%s"
|
||||
}
|
||||
|
||||
END {
|
||||
close( date_cmd )
|
||||
}
|
||||
|
||||
|
||||
# Parse a date string into a Unix timestamp (memoized)
|
||||
#
|
||||
# This spawns a single process for date and reads from standard in. Even
|
||||
# then, though, date parsing is very slow for many thousands of rows, so the
|
||||
# output is also cached in `date_cache'.
|
||||
function parse_date( i, orig )
|
||||
{
|
||||
src = $i
|
||||
|
||||
if ( date_cache[ src ] )
|
||||
{
|
||||
$i = date_cache[ src ]
|
||||
return
|
||||
}
|
||||
|
||||
print $i |& date_cmd
|
||||
date_cmd |& getline $i
|
||||
|
||||
date_cache[ src ] = $i;
|
||||
}
|
||||
|
||||
|
||||
# Expand variable with its value, if any
|
||||
function expand_vars( s, value )
|
||||
|
@ -83,9 +113,7 @@ function parseline( i, m, j, me, orig )
|
|||
|
||||
if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) )
|
||||
{
|
||||
cmd = "date --date=" $i " +%s"
|
||||
cmd |& getline $i
|
||||
close(cmd)
|
||||
parse_date( i );
|
||||
}
|
||||
|
||||
# check first for delimiters
|
||||
|
|
Loading…
Reference in New Issue