tame{,d}: Reload runner when unresponsive

This tries to be a bit more resilient in case a runner becomes unresponsive,
rather than waiting for tamed to kill itself.

* bin/tame (RUNNER_CMD_WAITTIME): New variable.
  (command-runner): Tell runner to reload if it does not respond in
    RUNNER_CMD_WAITTIME seconds.
  (verify-runner-ack): New function.
* bin/tamed (mkfifos): Only keep stdin open.  stdout isn't necessary, and
    may have actually been causing subtle issues.
  (spawn-runner): Support restarting dslc on SIGHUP.
master v3.3.3
Mike Gerwitz 2018-10-16 08:53:04 -04:00
parent 5679be281a
commit db1c03dfd9
2 changed files with 43 additions and 6 deletions

View File

@ -24,6 +24,10 @@ declare -r mypath=$( dirname "$( readlink -f "$0" )" )
declare -ri EX_NOTAMED=1 # tried to start tamed but failed
declare -ri EX_USAGE=64 # incorrect usage; sysexits.h
# maximum amount of time in seconds to wait for runner to ack
# before forcibly restarting it
declare -ri RUNNER_CMD_WAITTIME=3
# Send a single command to a runner and observe the result
#
@ -48,7 +52,18 @@ command-runner()
trap 'kill -TERM $pid &>/dev/null' INT TERM
# all remaining arguments are passed to the runner
echo "$@" > "$base/0"
echo "$*" > "$base/0"
# we should immediately get a response from the runner;
# if not, then it may have stalled for some reason
verify-runner-ack "$*" < "$base/1" || {
echo "warning: failed runner $id ack; requesting reload" >&2
kill -HUP "$pid"
sleep "$RUNNER_CMD_WAITTIME"
# try once more
verify-runner-ack "$*" < "$base/1" || exit
}
# output lines from runner until we reach a line stating "DONE"
while read line; do
@ -85,6 +100,23 @@ verify-runner()
}
# Wait for command acknowledgment from runner
#
# The runner must respond within RUNNER_CMD_WAITTIME seconds
# and must echo back the command that was given. Otherwise,
# this function returns with a non-zero status.
verify-runner-ack()
{
local -r cmd="${1?Missing command}"
read -t"$RUNNER_CMD_WAITTIME" -r ack || return
test "COMMAND $cmd" == "$ack" || {
# TODO check for ack mismatch once output race condition is fixed
:
}
}
# Wait somewhat impatiently for tamed
#
# Assumes that tamed's runner 0 is running once the pidfile becomes

View File

@ -53,10 +53,10 @@ mkfifos()
echo "fatal: failed to create FIFO at $in"
exit $EX_CANTCREAT
}
# keep FIFOs open so we don't get EOF from writers
tail -f >"$root/$n" &
done
# keep FIFOs open so we don't get EOF from writers
tail -f >"$root/0" &
}
@ -80,8 +80,13 @@ spawn-runner()
# loop to restart runner in case of crash
while true; do
"$mypath/dslc" < "$base/0" &> "$base/1"
echo "warning: runner $id exited with code ${PIPESTATUS[0]}; restarting" >&2
declare -i job=0
trap 'kill -INT $job' HUP
"$mypath/dslc" < "$base/0" &> "$base/1" & job=$!
declare -i status=0
wait -n 2>/dev/null || status=$?
echo "warning: runner $id exited with code $status; restarting" >&2
done &
echo "$!" > "$base/pid"