From 9a1f91648644a665b8aae4035ce2a30adc37f834 Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Tue, 2 Apr 2019 10:58:12 -0400 Subject: [PATCH] build-aux/csvm-expand: Spawn only one date and memoize A table with a couple hundred thousand rows was taking minutes to generate. This gets it down to a few seconds. * build-aux/csvm-expand (parse_date): New function. (parseline): use it. --- build-aux/csvm-expand | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/build-aux/csvm-expand b/build-aux/csvm-expand index fe970885..2ec3ca90 100755 --- a/build-aux/csvm-expand +++ b/build-aux/csvm-expand @@ -44,6 +44,36 @@ # ... ## +BEGIN { + date_cmd = "stdbuf -o0 date -f- +%s" +} + +END { + close( date_cmd ) +} + + +# Parse a date string into a Unix timestamp (memoized) +# +# This spawns a single process for date and reads from standard in. Even +# then, though, date parsing is very slow for many thousands of rows, so the +# output is also cached in `date_cache'. +function parse_date( i, orig ) +{ + src = $i + + if ( date_cache[ src ] ) + { + $i = date_cache[ src ] + return + } + + print $i |& date_cmd + date_cmd |& getline $i + + date_cache[ src ] = $i; +} + # Expand variable with its value, if any function expand_vars( s, value ) @@ -83,9 +113,7 @@ function parseline( i, m, j, me, orig ) if ( match( $i, /^([0-9]+\/){2}[0-9]+$/, m ) ) { - cmd = "date --date=" $i " +%s" - cmd |& getline $i - close(cmd) + parse_date( i ); } # check first for delimiters