thoughts/src/post2html

193 lines
4.8 KiB
Plaintext
Raw Normal View History

#!/bin/bash
# Generate HTML from post Markdown source
#
# Copyright (C) 2019 Mike Gerwitz
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This script accepts the file name rather than data on stdin because the
# filename encodes the post date.
#
# Note that the `pagetitle' is set to "ignoreme"---it is not used, but is
# needed to suppress the warning pandoc produces without suppressing all
# warnings.
#
# Pandoc is used to generate the HTML and includes a (mostly) static header
# and footer. Note that this duplicates the date logic in `post2meta',
# because that must be run on this output, but the post must also contain
# the date, and we want to do all HTML processing now.
##
declare -r srcref=https://forge.mikegerwitz.com/mikegerwitz/thoughts/commits/branch/master
set -euo pipefail
# Pandoc output format and extensions.
declare -ra ext=(
markdown
smart
footnotes
gfm_auto_identifiers
fancy_lists
startnum
tex_math_dollars
)
# Convert extensions to `+'-delimited string.
pexts()
{
local IFS=+
echo "${ext[*]}"
}
# Wrap h1 in an hgroup along with the post date.
#
# Sometimes this script is used on things that aren't posts (e.g. normal
# pages), in which case a date will be unavailable and the output will be
# unchanged.
hgroup-wrap()
{
local -r date=${1?Missing date}
local -r file=${2?Missing file}
# Abort if this is not a date prefix
[[ $date =~ [0-9]{4}-[0-9]{2}-[0-9]{2} ]] || {
cat
return
}
local -r repo_href="$srcref/$file"
local anchor
printf -vanchor '<a class="muted" href="%s">%s</a>' \
"$repo_href" \
"$date"
sed '/^<h1/{
i<hgroup>
a<h2 class="date">'"$anchor"'</h2></hgroup>
}'
}
# Pre-format Markdown files before they get to Pandoc
#
# These may be able to be implemented as Pandoc filters, but I haven't had
# the time to research that yet.
#
# I had originally switched to Markdown hoping that it would suit my needs
# better than an ad-hoc formatting language of my own design. And yet here
# I am again, adding to the language.
prefmt()
{
# Accepting a filename instead of stdin allows us to output errors
# including the name of the file.
local -r file="$1"
awk '
BEGIN {
infence = 0
gather = 0
nltrim = 0
triml = 0
indent = 0
ex = 0
}
# indentation is the first non-space character
{ indent = match($0, /[^ ]/) - 1 }
# HTML nesting
/^ *<\/details>/ {
triml = indent
}
triml && indent >= 0 {
if (indent < triml) {
printf "error: post2html: %s:%d: expected %d-char indent, found %d\n", \
FILENAME, NR, triml, indent \
> "/dev/stderr"
ex = 1
} else {
# must have `triml` leading spaces
$0 = substr($0, triml + 1)
}
}
# must appear _after_ triml above so we just discard what is left,
# or we mess up the indentation calculation
nltrim && indent {
gsub( /^ +/, "" )
nltrim = 0
}
/^ *<details\>/ {
triml = indent + 2
}
# ties
!infence { $0 = gensub( /([^\\])~/, "\\1 ", "g" ) }
!infence { $0 = gensub( /\\~/, "~", "g" ) }
# TeX-style newline removal
!infence && /%$/ {
gsub( /%$/, "" )
printf "%s", $0
nltrim = 1
next
}
# reference to the content of the last fence (e.g. if a previous
# code block is HTML and we want to render it as an example)
/^```/ {
gather = !gather
infence = gather
if ( gather ) gblock = ""
}
!/^```/ && gather { gblock = gblock $0 "\n" }
!gather && /^ *@LASTFENCE@$/ { print gblock; next }
{ print }
END { exit(ex) }
' "$file"
}
# Generate HTML from post. Note that `pagetitle' is set just to suppress
# Pandoc warnings about it missing; it is unused.
main()
{
local -r file=${1?Missing file name}
local -r base=$( basename "$file" .md )
local -r date=${base:0:10}
pandoc -f"$( pexts )" -thtml5 \
--standalone --template src/pandoc.tpl \
--metadata pagetitle:ignoreme \
--wrap none \
--highlight-style tango \
--mathml \
-B <( src/mkheader post @__PAGE_TITLE__@ ) \
-A src/footer.tpl.htm \
< <( prefmt "$file" ) \
| src/h12title @__PAGE_TITLE__@ \
| hgroup-wrap "$date" "$file"
}
main "$@"