#!/usr/bin/gawk -f # Cache post data in metadata recutils file # # Copyright (C) 2019 Mike Gerwitz # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Generates database of metadata for a given post in recutils format for use # by other scripts. The post must have already been converted to HTML using # `post2html' or some equivalent means. # # This script is also responsible for determining what constitutes the # abstract, which we consider to be everything after the subject line but # before the end-of-abstract marker "". If no such marker # exists then the script exits in error. ## # Output author and post date derived from the file name. BEGINFILE { match( FILENAME, /[^/]+$/, name ) # TODO: configurable print "author: Mike Gerwitz " printf "date: %s\n", gensub( /^(.{10}).*$/, "\\1", "", name[0] ) } # Wait until after
; everything before it is the HTML header. /^ *
/ { main=1 } !main { next } # The first header represents the subject/title and also contains the # unique id for this post (as generated by `post2html'). main && /^

]+>/, "", "g" ) # Grab the generated id from the header and use it to # generate a complete slug. printf "slug: %s\n", \ gensub( /^([0-9]+)-([0-9]+)-[0-9]+-(.*)\.[a-z]+$/, "\\1/\\2/\\3", "", name[0] ) # Skip the date line immediately following the header and grab the first # line of the abstract. getline getline printf "abstract: %s\n", $0 a = 1 next } # The end-of-abstract marker is "". Until we reach that point, # output each line of the abstract prefixed by a `+', which is the recutils # line continuation marker. /^/ { exit } a { printf "+ %s\n", $0 } # If we get to this point, that means that there is no end-of-abstract # marker, which we will consider to be an error just to make sure that the # author didn't forget to add one. If the entire post is to be considered # part of the abstract, then the marker can be added at the end of the post. ENDFILE { print "error: missing ''" > "/dev/stderr" exit 1 }