#!/bin/bash # Daemon for accepting TAME commands (compilers, linker, etc) # # Copyright (C) 2014-2022 Ryan Specialty Group, LLC. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . ## set -euo pipefail declare mypath; mypath=$( dirname "$( readlink -f "$0" )" ) readonly mypath declare -ri EX_RUNNING=1 declare -ri EX_NOTRUNNING=2 # tamed is not running declare -ri EX_RUNTAB_LOCK=3 # failed to acquire aggregate runtab lock declare -ri EX_RUNTAB_OUT=4 # failed to write to aggregate runtab declare -ri EX_USAGE=64 # incorrect usage; sysexits.h declare -ri EX_CANTCREAT=73 # cannot create file; sysexits.h # number of seconds of output silence before runners are considered unused # and are subject to termination (see stall-monitor) declare -ri TAMED_STALL_SECONDS="${TAMED_STALL_SECONDS:-1}" # id of process that indirectly spawned tamed (default $PPID) declare -ri TAMED_SPAWNER_PID="${TAMED_SPAWNER_PID:-$PPID}" # options to pass to JVM via dslc declare -r TAMED_JAVA_OPTS="${TAMED_JAVA_OPTS:-}" export JAVA_OPTS="$TAMED_JAVA_OPTS" # set by `main', global for `cleanup' and `runner-report-all' declare root= # non-empty if in TUI (terminal UI) mode (use `in-tui-mode') declare -r TAMED_TUI="${TAMED_TUI:-}" declare tui_mode= # file into which aggregate runner report will be placed (none if empty) declare -r TAMED_RUNTAB_OUT="${TAMED_RUNTAB_OUT:-}" # Create FIFOs for runner # # The FIFOs are intended to be attached to stderr and stdout # of the runner and will be created relative to the given # root path ROOT. # # If a FIFO cannot be created, exit with EX_CANTCREAT. mkfifos() { local -r root="${1?Missing root path}" mkdir -p "$root" # note that there's no stderr; see `add-runner' for n in 0 1; do rm -f "$root-$n" mkfifo -m 0600 "$root/$n" || { log "fatal: failed to create FIFO at $root/n" >&2 exit $EX_CANTCREAT } done # keep FIFOs open so we don't get EOF from writers tail -f >"$root/0" & } # Output a line, clearing the remainder of the line if in TUI mode log() { if in-tui-mode; then echo -en "\e[2K" fi echo "$@" } # Spawn a new runner using the next available runner id # # See `spawn-runner' for more information. spawn-next-runner() { local -r root="${1?Missing root path}" # get the next available id local -ri id=$( < "$root/maxid" ) spawn-runner "$(( id + 1 ))" "$root" } # Spawn a runner # # A new runner is created by spawning dslc and attaching # new FIFOs under the given id ID relative to the given # run path ROOT. The PID of the runner will be stored # alongside the FIFOs in a pidfile `pid'. spawn-runner() { local -ri id="${1?Missing id}" local -r root="${2?Missing root run path}" local -r base="$root/$id" mkfifos "$base" # flag as available (the client will manipulate these) echo 0 > "$base/busy" # runtab is used for reporting, which we will optionally aggregate > "$base/runtab" monitor-runner-runtab "$root" "$base/runtab" & # monitor runner usage and kill when inactive stall-monitor "$base" & # loop to restart runner in case of crash while true; do declare -i job=0 trap 'kill -INT $job' HUP "$mypath/dslc" < "$base/0" &> "$base/1" & job=$! declare -i status=0 wait -n 2>/dev/null || status=$? echo "warning: runner $id exited with code $status; restarting" >&2 done & echo "$!" > "$base/pid" # we assume that this is the new largest runner id echo "$id" > "$root/maxid" log "runner $id ($!): $base" } # Monitor the given runner runtab and append to the aggregate runtab # # The aggregate runtab is append-only and has a row-level lock to support # concurrent writes without having to rely on kernel buffering. monitor-runner-runtab() { local -r root="${1?Missing root run path}" local -r runtab="${2?Missing runtab path}" # no use in aggregating if it was not requested test -n "$TAMED_RUNTAB_OUT" || return 0 while ! spawner-dead; do # this is a shared file, and while buffering _should_ be sufficient, we # may as well avoid potential headaches entirely by locking during the # operation tail -f "$runtab" | while read -r row; do # we want to lock _per row write_, since output will be interleaved # between all the runners ( local -ri timeout=3 flock -w $timeout 7 || { echo "error: failed to acquire lock on aggregate runtab" >&2 exit $EX_RUNTAB_LOCK } echo "$row" >&7 ) 7>> "$TAMED_RUNTAB_OUT" done done } # Check that we can write to the provided runtab, and clear it runtab-check-and-clear() { test -n "$TAMED_RUNTAB_OUT" || return 0 # clear the runtab, and see if we can write to it >"$TAMED_RUNTAB_OUT" || { echo "error: unable to write to '$TAMED_RUNTAB_OUT' (TAMED_RUNTAB_OUT)" exit $EX_RUNTAB_OUT } echo "tamed: aggregating runner runtabs into '$TAMED_RUNTAB_OUT'" } # Kill runner at BASE when it becomes inactive for TAMED_STALL_SECONDS # seconds # # This monitors the modification time on the stdout FIFO. stdin does not # need to be monitored since dslc immediately echoes back commands it # receives. # # dslc is pretty chatty at the time of writing this, so TAMED_STALL_SECONDS # can easily be <=30s even for large packages. This may need to change in # the future if it becomes too much less chatty. Increase that environment # variable if runners stall unexpectedly in the middle of builds. # # If the id of the spawning process has been provided then we will never # consider ourselves to be stalled if that process is still running. This # prevents, for example, tamed from killing itself while a parent make # process is still running. stall-monitor() { local -r base="${1?Missing base}" # monitor output FIFO modification time while true; do local -i since last since=$( date +%s ) sleep "$TAMED_STALL_SECONDS" last=$( stat -c%Y "$base/1" ) # keep waiting if there has been activity since $since test "$last" -le "$since" || continue spawner-dead || continue # no activity; kill local -r pid=$( cat "$base/pid" ) kill "$pid" wait "$pid" 2>/dev/null # this stall subprocess is no longer needed break done } # Check to see if the spawning process has died # # If no spawning process was provided, then this always returns a zero # status. Otherwise, it returns whether the given pid is _not_ running. spawner-dead() { test "$TAMED_SPAWNER_PID" -gt 0 || return 0 ! ps "$TAMED_SPAWNER_PID" &>/dev/null } # Exit if tamed is already running at path ROOT # # If tamed is already running at ROOT, exit with status # EX_RUNNING; otherwise, do nothing except output a warning # if a stale pid file exists. abort-if-running() { local -r root="${1?Missing root rundir}" local -ri pid=$( cat "$root/pid" 2>/dev/null ) test "$pid" -gt 0 || return 0 ! ps "$pid" &>/dev/null || { log "fatal: tamed is already running at $root (pid $pid)!" >&2 exit $EX_RUNNING } test -z "$pid" || { log "warning: clearing stale tamed (pid $pid)" >&2 } } # Exit with EX_NOTRUNNING if tamed is not running at path ROOT # # ROOT must both exist and contain a `pid` file of a running process. abort-if-not-running() { local -r root="${1?Missing root rundir}" test -d "$root" || { log "tamed is not running at $root: path does not exist" >&2 exit $EX_NOTRUNNING } local -ri pid=$( cat "$root/pid" 2>/dev/null ) # this should not happen unless bash crashed ps "$pid" &>/dev/null || { log "tamed is not running at $root: process $pid has terminated" >&2 exit $EX_NOTRUNNING } } # Kill running tamed at path ROOT # # If no pidfile is found at ROOT, do nothing. This sends a # signal only to the parent tamed process, _not_ individual # runners; the target tamed is expected to clean up itself. # Consequently, if a tamed terminated abnormally without # cleaning up, this will not solve that problem. # # Note that this is also called by tame to clean up an old tamed # before spawning a new one. kill-running() { local -r root="${1?Missing root}" test -d "$root" || return 0 local -r pid=$( < "$root"/pid 2>/dev/null ) test -n "$pid" || return 0 log "killing tamed at $root ($pid)..." kill "$pid" } runner-report-all() { local -r root="${1?Missing root}" abort-if-not-running "$root" for-each-runner "$root" runner-report } for-each-runner() { local -r root="${1?Missing root}" local -r cmd="${2?Missing command}" shift 2 local -ri maxid=$(cat "$root/maxid") echo "tamed is running at $root with $((maxid+1)) runner(s)" for runner in $(seq 0 "$maxid"); do echo "$cmd" "$root" "$@" "$runner" done } # Report on the status and current operation of each runner # # This report is generated by tamed rather than delegating to the runners # themselves to avoid the complexity of mitigating output races. runner-report() { local -r root="${1?Missing root}" local -ri id="${2?Missing runner id}" local -r path="$root/$id" test -f "$path/cmdline" || return 0 local cmdline=$(< "$path/cmdline" ) local -a cmdstart cmdstart_fmt cmdstart=( $(< "$path/cmdstart" ) ) cmdstart_fmt=$(date --date=@"${cmdstart[0]}" +%Y-%m-%dT%H:%M:%S) local -i now=$(date +%s) cat < "s…ers/f…baz/quux/quuux.xmlo" result=$( echo "$buffer" \ | sed 's|\([a-zA-Z0-9_-]\)[a-zA-Z0-9_-]\{3,\}\([a-zA-Z9-9_-]\{3\}\)/|\1…\2/|g' ) [ "${#result}" -gt $cols ] || { echo -n "$result" return } # more aggressive: remove all but the first letter if it would save at # least three characters, as in: # "suppliers/foobarbaz/quux/quuux.xmlo" => "s…/f…/quux/quuux.xmlo" result=$( echo "$buffer" | sed 's|\([a-zA-Z0-9_-]\)[^ /]\{3,\}/|\1…/|g' ) [ "${#result}" -gt $cols ] || { echo -n "$result" return } # even more aggressive: elide all but the filename, as in: # "suppliers/foobarbaz/quux/quuux.xmlo" => "…/quuux.xmlo" result=$( echo "$buffer" | sed 's|[a-zA-Z0-9_-/]*/|…/|g' ) [ "${#result}" -gt $cols ] || { echo -n "$result" return } # at this point, it's better to provide _some_ useful information for # _some_ runners, so just truncate the previous result (we probably have # too many runners for the current terminal width) echo -n "${result::$((cols-1))}…" } # Report of all runners' status on a single line # # Idle runners are not output for now, since that increases the likelihood # that we will not output something when runners are done doing their jobs # (including overwriting the PS1). runner-report-line() { local -r root="${1?Missing root}" # buffer output so that our report does not get mixed with normal # runner output local buffer=$( runner-report-all "$root" | awk ' /^command: idle/,/^$/ { next } # skip idle /^command:/ { printf "[%s ", $NF } # e.g. "[foo/bar.xmlo " /^elapsed:/ { printf "%ds] ", $2 } # e.g. "2s] " ' ) # ensure proper empty output without formatting if there is no line test -n "$buffer" || return 0 # bash has checkwinsize, but that runs after every command; try to use # tput, defaulting to 80. Note that we have to check this every time, in # case the terminal has been resized. local -ri cols=$(tput cols || echo 80) # rather than worrying about line wrapping, fit to one line if [[ "${#buffer}" -gt $cols ]]; then buffer=$(elide-paths $cols "$buffer") fi # output in bold, overwrite our line that may already be present here, and # place cursor at beginning of the line so any runner output will # overwrite echo -en "\e[1m$buffer\e[0m\r" } # Clean up child processes before exit # # This should be called before exit (perhaps by a trap). Kills # the entire process group. # # Do not attach this to a SIGTERM trap or it will infinitely # recurse. cleanup() { rm -rf "$root" kill 0 } # Output usage information and exit usage() { cat < runtab.csv Options: --help show this message --kill kill a runing tamed at path RUNPATH --report display runner report (this is subject to change in later versions) Environment Variables: TAMED_STALL_SECONDS number of seconds of runner inactivity before runner is automatically killed (default 1) TAMED_SPAWNER_PID inhibit stalling while this process is running (default PPID) TAMED_JAVA_OPTS opts to pass to dslc, and in turn, the JVM TAMED_TUI run in TUI mode (provide UI features like a dynamic runner status line) TAMED_RUNTAB_OUT file into which aggregate runner report will be written (otherwise reports are only available per-runner while tamed is running) EOF exit $EX_USAGE } # Determine whether to enable TUI mode # # TUI (terminal UI) mode will augment the output with features that only # make sense when running on a user's terminal, such as the runner status # line. tui-check() { test "$TAMED_TUI" == 1 || return 0 tui_mode=1 log "tamed is running in TUI mode (TAMED_TUI=0 to disable)" } # Whether we're running in TUI mode in-tui-mode() { test -n "$tui_mode" } # If in TUI mode, continuously update the last line of output with runner # status # # This is not an easy undertaking with how our build process currently # works. Make is responsible, currently, for echoing lines, and so we must # frequently re-echo our status line in an attempt to redisplay the line # after it is overwritten. # # Further, most output is unaware that the entire line needs to be # overwritten; if output is not properly transformed in the Makefile, then # portions of the status line may remain in the history, partly overwritten # by build output. # # Another concern is that we do not want to keep outputting after the # process is finished, which would overwrite the PS1. To try to avoid this, # we omit idle runner output and only clear the line _once_ when the status # line is empty, in the hope that all runners will be idle for long enough # before the build completes, make exists, exits, and the PS1 is output. # # If not in TUI mode, this does nothing. tui-runner-status-line() { in-tui-mode || return 0 local cache= cleared= while ! spawner-dead; do # this will fail if no runners have been created yet, so just ignore # it; if we fail to output the status line, the build will still work cache=$(runner-report-line "$root" 2>/dev/null) # if the line is empty, clear the output _once_ (to get rid of # whatever was there before), but do not do it again, otherwise we # risk overwriting lines post-build (like the PS1 or late-stage make # targets). if [ -z "$cache" -a -z "$cleared" ]; then log -n "" cleared=1 sleep 1 continue fi cleared= # output the cache frequently to try to overcome build output for i in {0..9}; do log -n "$cache" sleep 0.1 done done } # Run tamed main() { local kill= report= case "${1:-}" in --kill) kill=1; shift;; --report) report=1; shift;; --help) usage;; esac root="${1:-/run/user/$UID/tamed}" # report requested test -z "$report" || { runner-report-all "$root" exit } # kill if requested test -z "$kill" || { kill-running "$root" exit } abort-if-running "$root" tui-check runtab-check-and-clear # clean up background processes before we exit trap exit TERM trap cleanup EXIT # start fresh rm -rf "$root"; mkdir -p "$root" local -i pid=$$ echo $pid > "$root/pid" # start with a single runner; we'll spawn more if requested spawn-runner 0 "$root" trap "spawn-next-runner '$root'" USR1 # status line reporting on runners for TUI mode tui-runner-status-line & # wait for runners to complete or for a signal to be received by this # process that terminates `wait' while true; do wait -n || { status=$? # ignore USR{1,2} if [ $status -ne 138 -a $status -ne 140 ]; then exit $status fi } done } main "$@"