tamed: Add runtab and TAMED_RUNTAB_OUT

This provides logging that can be used to analyze jobs.  See `tamed --help`
for some examples.  More to come.

You'll notice that one of the examples reprents package build time in
_minutes_.  This is why TAMER is necessary; as of the time of writing, the
longest-building package is nearly five and a half minutes, and there are a
number of packages that take a minute or more.  But, there are potentially
other optimizations that can be done.  And this is _after_ many rounds of
optimizations over the years.  (TAME was not originally built for what it is
currently being used for.)
main
Mike Gerwitz 2022-01-19 16:47:12 -05:00
parent 4a3b86f480
commit 6fd570477a
4 changed files with 148 additions and 6 deletions

View File

@ -22,6 +22,11 @@ Compiler
--------
- Input mappings will no longer emit the destination param as a dependency.
- `tamed --report` and `TAMED_TUI` for analyzing build performance.
- Runners now store start time and duration for each command, available in
the runpath for reporting.
- `TAMED_RUNTAB_OUT`, if set, will aggregate all runners' runtabs into a
single file as jobs are completed. See `tamed --help` for more
information and examples.
Documentation
-------------

View File

@ -83,7 +83,7 @@ command-runner()
# log the provided command line and starting time so that we can determine
# what is currently being compiled and how long it is taking
date +%s > "$base/cmdstart"
millis > "$base/cmdstart"
echo "$*" > "$base/cmdline"
# all remaining arguments are passed to the runner
@ -113,7 +113,9 @@ command-runner()
if [ "${line:0:5}" == "DONE " ]; then
read -r _ code _ <<< "$line"
runtab-append "$base"
mark-available "$base"
return "$code"
fi
@ -237,7 +239,56 @@ mark-available()
echo idle > "$base/cmdline"
# this can be used to determine how long the worker has been idle
date +%s > "$base/cmdstart"
millis > "$base/cmdstart"
}
# Output seconds and milliseconds, space-delimited
millis()
{
local date
date=( $(date '+%s %N') )
# %N returns nanoseconds and it may be 0-prefixed, which would be
# interpreted as octal without the explicit base specification
echo "${date[0]}" "$(( 10#"${date[1]}" / 1000000 ))"
}
# Append data to the runner table (runtab)
#
# This takes information about the most recently executed command and
# appends it to a table representing the work that the runner has
# done. This should be done at the end of processing a particular job but
# before marking the runner as available using `mark-available'.
#
# The columns of this report are, tab-delimited:
# 1. Start date (Unix timestamp, seconds);
# 2. Duration (milliseconds); and
# 3. Runner command line
runtab-append()
{
local -r base=${1?Missing runner base path}
local cmd duration
local -a cmdstart now
cmd=$(< "$base/cmdline")
cmdstart=( $(< "$base/cmdstart") )
now=( $(millis) )
# duration consists of seconds and nanoseconds; let's just deal with
# milliseconds, since any greater precision is not useful to us with how
# slow the system is today, and convert it into a decimal for
# reporting. Nanoseconds may be 0-prefixed, which will be interpreted as
# octal without an explicit base specification.
duration=$((
((now[0] * 1000) + now[1])
- ((cmdstart[0] * 1000) + cmdstart[1])
))
# the duration is in milliseconds
printf "%d\t%s\t%s\n" "$cmdstart" "$duration" "$cmd" >> "$base/runtab"
}

View File

@ -24,6 +24,8 @@ readonly mypath
declare -ri EX_RUNNING=1
declare -ri EX_NOTRUNNING=2 # tamed is not running
declare -ri EX_RUNTAB_LOCK=3 # failed to acquire aggregate runtab lock
declare -ri EX_RUNTAB_OUT=4 # failed to write to aggregate runtab
declare -ri EX_USAGE=64 # incorrect usage; sysexits.h
declare -ri EX_CANTCREAT=73 # cannot create file; sysexits.h
@ -45,6 +47,9 @@ declare root=
declare -r TAMED_TUI="${TAMED_TUI:-}"
declare tui_mode=
# file into which aggregate runner report will be placed (none if empty)
declare -r TAMED_RUNTAB_OUT="${TAMED_RUNTAB_OUT:-}"
# Create FIFOs for runner
#
@ -117,6 +122,10 @@ spawn-runner()
# flag as available (the client will manipulate these)
echo 0 > "$base/busy"
# runtab is used for reporting, which we will optionally aggregate
> "$base/runtab"
monitor-runner-runtab "$root" "$base/runtab" &
# monitor runner usage and kill when inactive
stall-monitor "$base" &
@ -140,6 +149,55 @@ spawn-runner()
}
# Monitor the given runner runtab and append to the aggregate runtab
#
# The aggregate runtab is append-only and has a row-level lock to support
# concurrent writes without having to rely on kernel buffering.
monitor-runner-runtab()
{
local -r root="${1?Missing root run path}"
local -r runtab="${2?Missing runtab path}"
# no use in aggregating if it was not requested
test -n "$TAMED_RUNTAB_OUT" || return 0
while ! spawner-dead; do
# this is a shared file, and while buffering _should_ be sufficient, we
# may as well avoid potential headaches entirely by locking during the
# operation
tail -f "$runtab" | while read -r row; do
# we want to lock _per row write_, since output will be interleaved
# between all the runners
(
local -ri timeout=3
flock -w $timeout 7 || {
echo "error: failed to acquire lock on aggregate runtab" >&2
exit $EX_RUNTAB_LOCK
}
echo "$row" >&7
) 7>> "$TAMED_RUNTAB_OUT"
done
done
}
# Check that we can write to the provided runtab, and clear it
runtab-check-and-clear()
{
test -n "$TAMED_RUNTAB_OUT" || return 0
# clear the runtab, and see if we can write to it
>"$TAMED_RUNTAB_OUT" || {
echo "error: unable to write to '$TAMED_RUNTAB_OUT' (TAMED_RUNTAB_OUT)"
exit $EX_RUNTAB_OUT
}
echo "tamed: aggregating runner runtabs into '$TAMED_RUNTAB_OUT'"
}
# Kill runner at BASE when it becomes inactive for TAMED_STALL_SECONDS
# seconds
#
@ -248,12 +306,15 @@ abort-if-not-running()
# runners; the target tamed is expected to clean up itself.
# Consequently, if a tamed terminated abnormally without
# cleaning up, this will not solve that problem.
#
# Note that this is also called by tame to clean up an old tamed
# before spawning a new one.
kill-running()
{
local -r root="${1?Missing root}"
abort-if-not-running "$root" 2>/dev/null
local -r pid=$( cat "$root"/pid 2>/dev/null )
test -d "$root" || return 0
local -r pid=$( < "$root"/pid 2>/dev/null )
test -n "$pid" || return 0
@ -301,14 +362,17 @@ runner-report()
test -f "$path/cmdline" || return 0
local cmdline=$(< "$path/cmdline" )
local cmdstart=$(< "$path/cmdstart" )
local -a cmdstart cmdstart_fmt
cmdstart=( $(< "$path/cmdstart" ) )
cmdstart_fmt=$(date --date=@"${cmdstart[0]}" +%Y-%m-%dT%H:%M:%S)
local -i now=$(date +%s)
cat <<EOF
runner: $id
command: $cmdline
start: $cmdstart ($(date +%Y-%m-%dT%H:%M:%S <<< "$cmdstart"))
start: ${cmdstart[0]}.${cmdstart[1]} ($cmdstart_fmt)
elapsed: $((now - cmdstart)) seconds
EOF
}
@ -430,6 +494,23 @@ to the process id of make itself. It defaults to the actual
parent process id (PPID), so tamed will not kill itself if
run manually on a shell (unless the shell exits first).
TAMED_RUNTAB_OUT can specify a file in which to write job
start times (as seconds from the Unix epoch); durations
(in milliseconds); and commands from each of the runners.
The table is tab-delimited. Here are some useful examples:
# format nicely into columns and view in pager
$ column runtab | less
# sort by runtime descending (second column)
$ sort -rnk2 runtab
# take the runtime and command columns
$ cut -2,3 runtab
# convert milliseconds into minutes (!) and sort desc
$ awk '{ $2 = $2 / 1000 / 60; print } runtab | sort -nrk2
Options:
--help show this message
--kill kill a runing tamed at path RUNPATH
@ -444,6 +525,9 @@ Environment Variables:
TAMED_JAVA_OPTS opts to pass to dslc, and in turn, the JVM
TAMED_TUI run in TUI mode (provide UI features like a
dynamic runner status line)
TAMED_RUNTAB_OUT file into which aggregate runner report will
be written (otherwise reports are only
available per-runner while tamed is running)
EOF
exit $EX_USAGE
@ -550,6 +634,7 @@ main()
abort-if-running "$root"
tui-check
runtab-check-and-clear
# clean up background processes before we exit
trap exit TERM

View File

@ -115,6 +115,7 @@ export TAME_CMD_WAITTIME
export TAMED_STALL_SECONDS
export TAMED_JAVA_OPTS
export TAMED_TUI
export TAMED_RUNTAB_OUT
TAMED_SPAWNER_PID=$(shell echo $$PPID)
export TAMED_SPAWNER_PID