tamed: Add runtab and TAMED_RUNTAB_OUT

This provides logging that can be used to analyze jobs.  See `tamed --help`
for some examples.  More to come.

You'll notice that one of the examples reprents package build time in
_minutes_.  This is why TAMER is necessary; as of the time of writing, the
longest-building package is nearly five and a half minutes, and there are a
number of packages that take a minute or more.  But, there are potentially
other optimizations that can be done.  And this is _after_ many rounds of
optimizations over the years.  (TAME was not originally built for what it is
currently being used for.)
main
Mike Gerwitz 2022-01-19 16:47:12 -05:00
parent 4a3b86f480
commit 6fd570477a
4 changed files with 148 additions and 6 deletions

View File

@ -22,6 +22,11 @@ Compiler
-------- --------
- Input mappings will no longer emit the destination param as a dependency. - Input mappings will no longer emit the destination param as a dependency.
- `tamed --report` and `TAMED_TUI` for analyzing build performance. - `tamed --report` and `TAMED_TUI` for analyzing build performance.
- Runners now store start time and duration for each command, available in
the runpath for reporting.
- `TAMED_RUNTAB_OUT`, if set, will aggregate all runners' runtabs into a
single file as jobs are completed. See `tamed --help` for more
information and examples.
Documentation Documentation
------------- -------------

View File

@ -83,7 +83,7 @@ command-runner()
# log the provided command line and starting time so that we can determine # log the provided command line and starting time so that we can determine
# what is currently being compiled and how long it is taking # what is currently being compiled and how long it is taking
date +%s > "$base/cmdstart" millis > "$base/cmdstart"
echo "$*" > "$base/cmdline" echo "$*" > "$base/cmdline"
# all remaining arguments are passed to the runner # all remaining arguments are passed to the runner
@ -113,7 +113,9 @@ command-runner()
if [ "${line:0:5}" == "DONE " ]; then if [ "${line:0:5}" == "DONE " ]; then
read -r _ code _ <<< "$line" read -r _ code _ <<< "$line"
runtab-append "$base"
mark-available "$base" mark-available "$base"
return "$code" return "$code"
fi fi
@ -237,7 +239,56 @@ mark-available()
echo idle > "$base/cmdline" echo idle > "$base/cmdline"
# this can be used to determine how long the worker has been idle # this can be used to determine how long the worker has been idle
date +%s > "$base/cmdstart" millis > "$base/cmdstart"
}
# Output seconds and milliseconds, space-delimited
millis()
{
local date
date=( $(date '+%s %N') )
# %N returns nanoseconds and it may be 0-prefixed, which would be
# interpreted as octal without the explicit base specification
echo "${date[0]}" "$(( 10#"${date[1]}" / 1000000 ))"
}
# Append data to the runner table (runtab)
#
# This takes information about the most recently executed command and
# appends it to a table representing the work that the runner has
# done. This should be done at the end of processing a particular job but
# before marking the runner as available using `mark-available'.
#
# The columns of this report are, tab-delimited:
# 1. Start date (Unix timestamp, seconds);
# 2. Duration (milliseconds); and
# 3. Runner command line
runtab-append()
{
local -r base=${1?Missing runner base path}
local cmd duration
local -a cmdstart now
cmd=$(< "$base/cmdline")
cmdstart=( $(< "$base/cmdstart") )
now=( $(millis) )
# duration consists of seconds and nanoseconds; let's just deal with
# milliseconds, since any greater precision is not useful to us with how
# slow the system is today, and convert it into a decimal for
# reporting. Nanoseconds may be 0-prefixed, which will be interpreted as
# octal without an explicit base specification.
duration=$((
((now[0] * 1000) + now[1])
- ((cmdstart[0] * 1000) + cmdstart[1])
))
# the duration is in milliseconds
printf "%d\t%s\t%s\n" "$cmdstart" "$duration" "$cmd" >> "$base/runtab"
} }

View File

@ -24,6 +24,8 @@ readonly mypath
declare -ri EX_RUNNING=1 declare -ri EX_RUNNING=1
declare -ri EX_NOTRUNNING=2 # tamed is not running declare -ri EX_NOTRUNNING=2 # tamed is not running
declare -ri EX_RUNTAB_LOCK=3 # failed to acquire aggregate runtab lock
declare -ri EX_RUNTAB_OUT=4 # failed to write to aggregate runtab
declare -ri EX_USAGE=64 # incorrect usage; sysexits.h declare -ri EX_USAGE=64 # incorrect usage; sysexits.h
declare -ri EX_CANTCREAT=73 # cannot create file; sysexits.h declare -ri EX_CANTCREAT=73 # cannot create file; sysexits.h
@ -45,6 +47,9 @@ declare root=
declare -r TAMED_TUI="${TAMED_TUI:-}" declare -r TAMED_TUI="${TAMED_TUI:-}"
declare tui_mode= declare tui_mode=
# file into which aggregate runner report will be placed (none if empty)
declare -r TAMED_RUNTAB_OUT="${TAMED_RUNTAB_OUT:-}"
# Create FIFOs for runner # Create FIFOs for runner
# #
@ -117,6 +122,10 @@ spawn-runner()
# flag as available (the client will manipulate these) # flag as available (the client will manipulate these)
echo 0 > "$base/busy" echo 0 > "$base/busy"
# runtab is used for reporting, which we will optionally aggregate
> "$base/runtab"
monitor-runner-runtab "$root" "$base/runtab" &
# monitor runner usage and kill when inactive # monitor runner usage and kill when inactive
stall-monitor "$base" & stall-monitor "$base" &
@ -140,6 +149,55 @@ spawn-runner()
} }
# Monitor the given runner runtab and append to the aggregate runtab
#
# The aggregate runtab is append-only and has a row-level lock to support
# concurrent writes without having to rely on kernel buffering.
monitor-runner-runtab()
{
local -r root="${1?Missing root run path}"
local -r runtab="${2?Missing runtab path}"
# no use in aggregating if it was not requested
test -n "$TAMED_RUNTAB_OUT" || return 0
while ! spawner-dead; do
# this is a shared file, and while buffering _should_ be sufficient, we
# may as well avoid potential headaches entirely by locking during the
# operation
tail -f "$runtab" | while read -r row; do
# we want to lock _per row write_, since output will be interleaved
# between all the runners
(
local -ri timeout=3
flock -w $timeout 7 || {
echo "error: failed to acquire lock on aggregate runtab" >&2
exit $EX_RUNTAB_LOCK
}
echo "$row" >&7
) 7>> "$TAMED_RUNTAB_OUT"
done
done
}
# Check that we can write to the provided runtab, and clear it
runtab-check-and-clear()
{
test -n "$TAMED_RUNTAB_OUT" || return 0
# clear the runtab, and see if we can write to it
>"$TAMED_RUNTAB_OUT" || {
echo "error: unable to write to '$TAMED_RUNTAB_OUT' (TAMED_RUNTAB_OUT)"
exit $EX_RUNTAB_OUT
}
echo "tamed: aggregating runner runtabs into '$TAMED_RUNTAB_OUT'"
}
# Kill runner at BASE when it becomes inactive for TAMED_STALL_SECONDS # Kill runner at BASE when it becomes inactive for TAMED_STALL_SECONDS
# seconds # seconds
# #
@ -248,12 +306,15 @@ abort-if-not-running()
# runners; the target tamed is expected to clean up itself. # runners; the target tamed is expected to clean up itself.
# Consequently, if a tamed terminated abnormally without # Consequently, if a tamed terminated abnormally without
# cleaning up, this will not solve that problem. # cleaning up, this will not solve that problem.
#
# Note that this is also called by tame to clean up an old tamed
# before spawning a new one.
kill-running() kill-running()
{ {
local -r root="${1?Missing root}" local -r root="${1?Missing root}"
abort-if-not-running "$root" 2>/dev/null test -d "$root" || return 0
local -r pid=$( cat "$root"/pid 2>/dev/null ) local -r pid=$( < "$root"/pid 2>/dev/null )
test -n "$pid" || return 0 test -n "$pid" || return 0
@ -301,14 +362,17 @@ runner-report()
test -f "$path/cmdline" || return 0 test -f "$path/cmdline" || return 0
local cmdline=$(< "$path/cmdline" ) local cmdline=$(< "$path/cmdline" )
local cmdstart=$(< "$path/cmdstart" ) local -a cmdstart cmdstart_fmt
cmdstart=( $(< "$path/cmdstart" ) )
cmdstart_fmt=$(date --date=@"${cmdstart[0]}" +%Y-%m-%dT%H:%M:%S)
local -i now=$(date +%s) local -i now=$(date +%s)
cat <<EOF cat <<EOF
runner: $id runner: $id
command: $cmdline command: $cmdline
start: $cmdstart ($(date +%Y-%m-%dT%H:%M:%S <<< "$cmdstart")) start: ${cmdstart[0]}.${cmdstart[1]} ($cmdstart_fmt)
elapsed: $((now - cmdstart)) seconds elapsed: $((now - cmdstart)) seconds
EOF EOF
} }
@ -430,6 +494,23 @@ to the process id of make itself. It defaults to the actual
parent process id (PPID), so tamed will not kill itself if parent process id (PPID), so tamed will not kill itself if
run manually on a shell (unless the shell exits first). run manually on a shell (unless the shell exits first).
TAMED_RUNTAB_OUT can specify a file in which to write job
start times (as seconds from the Unix epoch); durations
(in milliseconds); and commands from each of the runners.
The table is tab-delimited. Here are some useful examples:
# format nicely into columns and view in pager
$ column runtab | less
# sort by runtime descending (second column)
$ sort -rnk2 runtab
# take the runtime and command columns
$ cut -2,3 runtab
# convert milliseconds into minutes (!) and sort desc
$ awk '{ $2 = $2 / 1000 / 60; print } runtab | sort -nrk2
Options: Options:
--help show this message --help show this message
--kill kill a runing tamed at path RUNPATH --kill kill a runing tamed at path RUNPATH
@ -444,6 +525,9 @@ Environment Variables:
TAMED_JAVA_OPTS opts to pass to dslc, and in turn, the JVM TAMED_JAVA_OPTS opts to pass to dslc, and in turn, the JVM
TAMED_TUI run in TUI mode (provide UI features like a TAMED_TUI run in TUI mode (provide UI features like a
dynamic runner status line) dynamic runner status line)
TAMED_RUNTAB_OUT file into which aggregate runner report will
be written (otherwise reports are only
available per-runner while tamed is running)
EOF EOF
exit $EX_USAGE exit $EX_USAGE
@ -550,6 +634,7 @@ main()
abort-if-running "$root" abort-if-running "$root"
tui-check tui-check
runtab-check-and-clear
# clean up background processes before we exit # clean up background processes before we exit
trap exit TERM trap exit TERM

View File

@ -115,6 +115,7 @@ export TAME_CMD_WAITTIME
export TAMED_STALL_SECONDS export TAMED_STALL_SECONDS
export TAMED_JAVA_OPTS export TAMED_JAVA_OPTS
export TAMED_TUI export TAMED_TUI
export TAMED_RUNTAB_OUT
TAMED_SPAWNER_PID=$(shell echo $$PPID) TAMED_SPAWNER_PID=$(shell echo $$PPID)
export TAMED_SPAWNER_PID export TAMED_SPAWNER_PID