From 3cec84786639a0def1e22e8dc15be00f37be1230 Mon Sep 17 00:00:00 2001 From: Mike Gerwitz Date: Sun, 10 Mar 2013 21:51:08 -0400 Subject: [PATCH] Added cref-errlog to post-process cref errors rather than priming the hashcache This is more performant, contains additional logging and will properly output invalid crefs. --- git/commit2html | 2 +- msgfmt | 15 +++++++++++++-- processor | 4 ++-- repo2html | 35 +++++++++++++++++++++++++++-------- 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/git/commit2html b/git/commit2html index 69a64df..a8b9acc 100755 --- a/git/commit2html +++ b/git/commit2html @@ -28,6 +28,6 @@ msgfmt="${msgfmt:-./msgfmt}" echo -n '

' git show --pretty=format:%b "$commit" \ - | "$msgfmt" + | "$msgfmt" "$commit" echo '

' diff --git a/msgfmt b/msgfmt index 9f1c07a..4a97cee 100755 --- a/msgfmt +++ b/msgfmt @@ -20,6 +20,9 @@ # along with this program. If not, see . # # +# optional id (for cref errors) +id="$1" + # HTML replacements (default) lquo='\“' rquo='\”' @@ -40,9 +43,8 @@ while getopts n opt; do esac done - # format the commit message, stopping at the diff (if any) -awk -vurl_root="$url_root" ' +awk -vid="$id" -vurl_root="$url_root" -vcref_errlog="$cref_errlog" ' # replace commit refs with generated URL (allows linking to prior commits # without hard-coding the configurable links that could change or be # relative to where the content is hosted); this will then be processed as a @@ -52,6 +54,15 @@ awk -vurl_root="$url_root" ' # (which will be reflected once we print the line) c = "./hashcache " g[1] c | getline result + + # if a cref error logfile path was provided, log unknown refs so that they + # can be re-processed (if commits are processed in reverse order and the + # hashcache is cleared before the run, then this is likely to occur for + # every cref) + if ( result == "" && cref_errlog && id ) { + printf "^" id >cref_errlog + } + gsub(/\[cref:.*?\]/, url_root result) } diff --git a/processor b/processor index da94bdb..f94ce5b 100755 --- a/processor +++ b/processor @@ -44,8 +44,8 @@ EOH prevdate= lastts= -firstyear= -lastyear= +firstyear=0 +lastyear=0 lasthash= # generate index diff --git a/repo2html b/repo2html index bfda50c..67e9466 100755 --- a/repo2html +++ b/repo2html @@ -78,16 +78,35 @@ msgfmt="${msgfmt:-./msgfmt}" # make configuration available to all scripts export title desc copyright license msgfmt url_root -# clear and prime the cachefile (TODO: we could maintain cache files from -# previous runs if we offer a flag that opts out of reprocessing previously -# processed commits) -echo "Priming hashcache..." >&2 +# clear the cachefile (TODO: we could maintain cache files from previous runs if +# we offer a flag that opts out of reprocessing previously processed commits) +echo "Clearing hashcache..." >&2 ./hashcache clear -"$repotype"/list | while read hash _ ts id _; do - ./hashcache "$hash" "$( ./outfgen "$ts" "$id" )" -done -# pass commit list to the HTML and RSS processors +# set the cref error log, which will allow us to re-process *only* those commits +# that actually need to be reprocessed; this avoids the need to prime the +# hashcache, saving cycles +cref_errlog_first=.cref-errlog +export cref_errlog="$cref_errlog_first" +>"$cref_errlog" + +# pass commit list to the HTML and RSS processors (cache list to disk for +# reference and further processing) +listcache=.list "$repotype"/list | tee \ >( ./rss "$repotype" "$url_root" "$rss_count" > "$path_out/rss.xml" ) \ + "$listcache" \ | ./processor "$repotype" "$path_out" + +# re-process cref errors (but only once; any errors at this point will be +# considered to be problem refs) +export cref_errlog=.cref-bad +>"$cref_errlog" +grep -f"$cref_errlog_first" "$listcache" \ + | ./processor "$repotype" "$path_out" \ + 2> >( sed 's/^/[Reprocessing] /g' >&2 ) + +# if any invalid crefs remain, then they're bad +[ -s "$cref_errlog" ] && { + echo "warning: bad cref(s); see $cref_errlog" >&2 +}