1
0
Fork 0
repo2html/msgfmt

130 lines
3.9 KiB
Bash
Executable File

#!/bin/bash
#
# Formats a Git commit message
#
# Copyright (C) 2012 Mike Gerwitz
#
# This file is part of repo2html.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
# optional id (for cref errors)
id="$1"
# HTML replacements (default)
lquo='\&ldquo;'
rquo='\&rdquo;'
mdash='\&mdash;'
opar='<p>'
epar='</p>'
# redefines replacements to yield plain text (instead of HTML entities)
nohtml()
{
lquo=\"
rquo=\"
mdash=---
}
# no paragraph tags should be output
nopar()
{
opar=
epar=
}
while getopts nP opt; do
case "$opt" in
n) nohtml;;
P) nopar;;
esac
done
# calculate this after options have been parsed
refopar="${opar:+${opar%>} id="ref-\\2" class="ref">}"
# format the commit message, stopping at the diff (if any)
awk -vid="$id" -vurl_root="${url_root%/}" -vcref_errlog="$cref_errlog" '
# replace commit refs with generated URL (allows linking to prior commits
# without hard-coding the configurable links that could change or be
# relative to where the content is hosted); this will then be processed as a
# normal URL by the remainder of the script
match($0, /\[cref:(.*?)\]/, g) {
# retrieve the URL from the hashcache and perform the line replacement
# (which will be reflected once we print the line)
c = "./hashcache " g[1]
c | getline result
# if a cref error logfile path was provided, log unknown refs so that they
# can be re-processed (if commits are processed in reverse order and the
# hashcache is cleared before the run, then this is likely to occur for
# every cref)
if ( result == "" && cref_errlog && id ) {
printf id"\n" >>cref_errlog
}
gsub(/\[cref:.*?\]/, (url_root "/" result))
}
# stop printing at diff
/^diff --git/ { exit }
# otherwise, print everything
{ print }
' \
| sed ':a;N;$!ba;
# handle <>-delimited links (strip delimiters)
s#<\([fh]ttps\?://[^ ]\+\)>#\1#g;
# escaping
s/\&/\&amp;/g;
s/</\&lt;/g;
s/>/\&gt;/g;
# quoting (initiated by an indented paragraph and terminated by a new
# paragraph, unless that paragraph is also indented)
s#\n\n \+\(\([^\n]\+\n\(\n \+\)\?\)\+\)#<blockquote>\1</blockquote>#g
# pre-formatted block. markdown-style
s#\n\n \+\(\([^\n]\+\n\(\n \+\)\?\)\+\)#<blockquote>\1</blockquote>#g
# unfortunately, non-greedy matches make it difficult to exclude punctuation
# at the end of a link, so we will handle it in a separate expression
s#[fh]ttps\?://[^]\n )]\+#<a href="&">&</a>#g;
s#<a href="\([^"]\+\)\([.;,!]\)">\([^<]\+\).</a>#<a href="\1">\3</a>\2#g;
# reference definitions (footnotes)
s#\(\n\[\([0-9]\+\)\]\):\?#'"$epar$refopar"'\1#g;
# references in text (note that references that enclose text as a hyperlink
# must not start with a number, otherwise they will be considered to be a
# reference number)
s|\[\([^0-9][^]]\+\)\]\[\([0-9]\+\)\]|<a href="#ref-\2">\1</a>\[\2\]|g
s|\[\([0-9]\+\)\]|<sup><a href="#ref-\1">&</a></sup>|g
# paragraphs
s#\n\n#'"$epar"'&'"$opar"'#g;
/^/i'"$opar"'
/$/a'"$epar"'
# basic formatting
s/---/'"$mdash"'/g;
s#``#'"$lquo"'#g;
s#'\'\''#'"$rquo"'#g;
s#\(\W\|^\)\*\*\([^\*]\+\)\*\*\(\W\)#\1<strong>\2</strong>\3#g;
s#\(\W\)\*\([^\*]\+\)\*\(\W\)#\1<em>\2</em>\3#g;
'