2019-01-08 00:11:20 -05:00
|
|
|
#!/usr/bin/gawk -f
|
|
|
|
# Cache post data in metadata recutils file
|
|
|
|
#
|
|
|
|
# Copyright (C) 2019 Mike Gerwitz
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
# Generates database of metadata for a given post in recutils format for use
|
|
|
|
# by other scripts. The post must have already been converted to HTML using
|
|
|
|
# `post2html' or some equivalent means.
|
|
|
|
#
|
|
|
|
# This script is also responsible for determining what constitutes the
|
|
|
|
# abstract, which we consider to be everything after the subject line but
|
|
|
|
# before the end-of-abstract marker "<!-- more -->". If no such marker
|
|
|
|
# exists then the script exits in error.
|
|
|
|
##
|
|
|
|
|
|
|
|
# Output author and post date derived from the file name.
|
|
|
|
BEGINFILE {
|
|
|
|
match( FILENAME, /[^/]+$/, name )
|
|
|
|
|
|
|
|
# TODO: configurable
|
|
|
|
print "author: Mike Gerwitz <mtg@gnu.org>"
|
|
|
|
|
|
|
|
printf "date: %s\n",
|
|
|
|
gensub( /^(.{10}).*$/, "\\1", "", name[0] )
|
|
|
|
}
|
|
|
|
|
|
|
|
# Wait until after <main>; everything before it is the HTML header.
|
|
|
|
/^ *<main>/ { main=1 }
|
|
|
|
!main { next }
|
|
|
|
|
|
|
|
|
|
|
|
# The first header represents the subject/title and also contains the
|
|
|
|
# unique id for this post (as generated by `post2html').
|
|
|
|
main && /^<h1 / {
|
|
|
|
# Strip header tags from subject.
|
|
|
|
print "subject: " gensub( /<\/?h[^>]+>/, "", "g" )
|
|
|
|
|
|
|
|
# Grab the generated id from the header and use it to
|
|
|
|
# generate a complete slug.
|
2019-01-08 01:07:25 -05:00
|
|
|
printf "slug: %s\n", \
|
|
|
|
gensub( /^([0-9]+)-([0-9]+)-[0-9]+-(.*)\.[a-z]+$/,
|
|
|
|
"\\1/\\2/\\3",
|
|
|
|
"",
|
|
|
|
name[0] )
|
2019-01-08 00:11:20 -05:00
|
|
|
|
|
|
|
# Skip the date line immediately following the header and grab the first
|
|
|
|
# line of the abstract.
|
|
|
|
getline
|
|
|
|
getline
|
|
|
|
|
|
|
|
printf "abstract: %s\n", $0
|
|
|
|
a = 1
|
|
|
|
next
|
|
|
|
}
|
|
|
|
|
|
|
|
# The end-of-abstract marker is "<!-- more -->". Until we reach that point,
|
|
|
|
# output each line of the abstract prefixed by a `+', which is the recutils
|
|
|
|
# line continuation marker.
|
|
|
|
/^<!-- more -->/ { exit }
|
|
|
|
a { printf "+ %s\n", $0 }
|
|
|
|
|
|
|
|
# If we get to this point, that means that there is no end-of-abstract
|
|
|
|
# marker, which we will consider to be an error just to make sure that the
|
|
|
|
# author didn't forget to add one. If the entire post is to be considered
|
|
|
|
# part of the abstract, then the marker can be added at the end of the post.
|
|
|
|
ENDFILE {
|
|
|
|
print "error: missing '<!-- more -->'" > "/dev/stderr"
|
|
|
|
exit 1
|
|
|
|
}
|