tamed: Add runtab and TAMED_RUNTAB_OUT
This provides logging that can be used to analyze jobs. See `tamed --help` for some examples. More to come. You'll notice that one of the examples reprents package build time in _minutes_. This is why TAMER is necessary; as of the time of writing, the longest-building package is nearly five and a half minutes, and there are a number of packages that take a minute or more. But, there are potentially other optimizations that can be done. And this is _after_ many rounds of optimizations over the years. (TAME was not originally built for what it is currently being used for.)main
parent
4a3b86f480
commit
6fd570477a
|
@ -22,6 +22,11 @@ Compiler
|
|||
--------
|
||||
- Input mappings will no longer emit the destination param as a dependency.
|
||||
- `tamed --report` and `TAMED_TUI` for analyzing build performance.
|
||||
- Runners now store start time and duration for each command, available in
|
||||
the runpath for reporting.
|
||||
- `TAMED_RUNTAB_OUT`, if set, will aggregate all runners' runtabs into a
|
||||
single file as jobs are completed. See `tamed --help` for more
|
||||
information and examples.
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
|
55
bin/tame
55
bin/tame
|
@ -83,7 +83,7 @@ command-runner()
|
|||
|
||||
# log the provided command line and starting time so that we can determine
|
||||
# what is currently being compiled and how long it is taking
|
||||
date +%s > "$base/cmdstart"
|
||||
millis > "$base/cmdstart"
|
||||
echo "$*" > "$base/cmdline"
|
||||
|
||||
# all remaining arguments are passed to the runner
|
||||
|
@ -113,7 +113,9 @@ command-runner()
|
|||
if [ "${line:0:5}" == "DONE " ]; then
|
||||
read -r _ code _ <<< "$line"
|
||||
|
||||
runtab-append "$base"
|
||||
mark-available "$base"
|
||||
|
||||
return "$code"
|
||||
fi
|
||||
|
||||
|
@ -237,7 +239,56 @@ mark-available()
|
|||
echo idle > "$base/cmdline"
|
||||
|
||||
# this can be used to determine how long the worker has been idle
|
||||
date +%s > "$base/cmdstart"
|
||||
millis > "$base/cmdstart"
|
||||
}
|
||||
|
||||
|
||||
# Output seconds and milliseconds, space-delimited
|
||||
millis()
|
||||
{
|
||||
local date
|
||||
date=( $(date '+%s %N') )
|
||||
|
||||
# %N returns nanoseconds and it may be 0-prefixed, which would be
|
||||
# interpreted as octal without the explicit base specification
|
||||
echo "${date[0]}" "$(( 10#"${date[1]}" / 1000000 ))"
|
||||
}
|
||||
|
||||
|
||||
# Append data to the runner table (runtab)
|
||||
#
|
||||
# This takes information about the most recently executed command and
|
||||
# appends it to a table representing the work that the runner has
|
||||
# done. This should be done at the end of processing a particular job but
|
||||
# before marking the runner as available using `mark-available'.
|
||||
#
|
||||
# The columns of this report are, tab-delimited:
|
||||
# 1. Start date (Unix timestamp, seconds);
|
||||
# 2. Duration (milliseconds); and
|
||||
# 3. Runner command line
|
||||
runtab-append()
|
||||
{
|
||||
local -r base=${1?Missing runner base path}
|
||||
|
||||
local cmd duration
|
||||
local -a cmdstart now
|
||||
|
||||
cmd=$(< "$base/cmdline")
|
||||
cmdstart=( $(< "$base/cmdstart") )
|
||||
now=( $(millis) )
|
||||
|
||||
# duration consists of seconds and nanoseconds; let's just deal with
|
||||
# milliseconds, since any greater precision is not useful to us with how
|
||||
# slow the system is today, and convert it into a decimal for
|
||||
# reporting. Nanoseconds may be 0-prefixed, which will be interpreted as
|
||||
# octal without an explicit base specification.
|
||||
duration=$((
|
||||
((now[0] * 1000) + now[1])
|
||||
- ((cmdstart[0] * 1000) + cmdstart[1])
|
||||
))
|
||||
|
||||
# the duration is in milliseconds
|
||||
printf "%d\t%s\t%s\n" "$cmdstart" "$duration" "$cmd" >> "$base/runtab"
|
||||
}
|
||||
|
||||
|
||||
|
|
93
bin/tamed
93
bin/tamed
|
@ -24,6 +24,8 @@ readonly mypath
|
|||
|
||||
declare -ri EX_RUNNING=1
|
||||
declare -ri EX_NOTRUNNING=2 # tamed is not running
|
||||
declare -ri EX_RUNTAB_LOCK=3 # failed to acquire aggregate runtab lock
|
||||
declare -ri EX_RUNTAB_OUT=4 # failed to write to aggregate runtab
|
||||
declare -ri EX_USAGE=64 # incorrect usage; sysexits.h
|
||||
declare -ri EX_CANTCREAT=73 # cannot create file; sysexits.h
|
||||
|
||||
|
@ -45,6 +47,9 @@ declare root=
|
|||
declare -r TAMED_TUI="${TAMED_TUI:-}"
|
||||
declare tui_mode=
|
||||
|
||||
# file into which aggregate runner report will be placed (none if empty)
|
||||
declare -r TAMED_RUNTAB_OUT="${TAMED_RUNTAB_OUT:-}"
|
||||
|
||||
|
||||
# Create FIFOs for runner
|
||||
#
|
||||
|
@ -117,6 +122,10 @@ spawn-runner()
|
|||
# flag as available (the client will manipulate these)
|
||||
echo 0 > "$base/busy"
|
||||
|
||||
# runtab is used for reporting, which we will optionally aggregate
|
||||
> "$base/runtab"
|
||||
monitor-runner-runtab "$root" "$base/runtab" &
|
||||
|
||||
# monitor runner usage and kill when inactive
|
||||
stall-monitor "$base" &
|
||||
|
||||
|
@ -140,6 +149,55 @@ spawn-runner()
|
|||
}
|
||||
|
||||
|
||||
# Monitor the given runner runtab and append to the aggregate runtab
|
||||
#
|
||||
# The aggregate runtab is append-only and has a row-level lock to support
|
||||
# concurrent writes without having to rely on kernel buffering.
|
||||
monitor-runner-runtab()
|
||||
{
|
||||
local -r root="${1?Missing root run path}"
|
||||
local -r runtab="${2?Missing runtab path}"
|
||||
|
||||
# no use in aggregating if it was not requested
|
||||
test -n "$TAMED_RUNTAB_OUT" || return 0
|
||||
|
||||
while ! spawner-dead; do
|
||||
# this is a shared file, and while buffering _should_ be sufficient, we
|
||||
# may as well avoid potential headaches entirely by locking during the
|
||||
# operation
|
||||
tail -f "$runtab" | while read -r row; do
|
||||
# we want to lock _per row write_, since output will be interleaved
|
||||
# between all the runners
|
||||
(
|
||||
local -ri timeout=3
|
||||
|
||||
flock -w $timeout 7 || {
|
||||
echo "error: failed to acquire lock on aggregate runtab" >&2
|
||||
exit $EX_RUNTAB_LOCK
|
||||
}
|
||||
|
||||
echo "$row" >&7
|
||||
) 7>> "$TAMED_RUNTAB_OUT"
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
# Check that we can write to the provided runtab, and clear it
|
||||
runtab-check-and-clear()
|
||||
{
|
||||
test -n "$TAMED_RUNTAB_OUT" || return 0
|
||||
|
||||
# clear the runtab, and see if we can write to it
|
||||
>"$TAMED_RUNTAB_OUT" || {
|
||||
echo "error: unable to write to '$TAMED_RUNTAB_OUT' (TAMED_RUNTAB_OUT)"
|
||||
exit $EX_RUNTAB_OUT
|
||||
}
|
||||
|
||||
echo "tamed: aggregating runner runtabs into '$TAMED_RUNTAB_OUT'"
|
||||
}
|
||||
|
||||
|
||||
# Kill runner at BASE when it becomes inactive for TAMED_STALL_SECONDS
|
||||
# seconds
|
||||
#
|
||||
|
@ -248,12 +306,15 @@ abort-if-not-running()
|
|||
# runners; the target tamed is expected to clean up itself.
|
||||
# Consequently, if a tamed terminated abnormally without
|
||||
# cleaning up, this will not solve that problem.
|
||||
#
|
||||
# Note that this is also called by tame to clean up an old tamed
|
||||
# before spawning a new one.
|
||||
kill-running()
|
||||
{
|
||||
local -r root="${1?Missing root}"
|
||||
|
||||
abort-if-not-running "$root" 2>/dev/null
|
||||
local -r pid=$( cat "$root"/pid 2>/dev/null )
|
||||
test -d "$root" || return 0
|
||||
local -r pid=$( < "$root"/pid 2>/dev/null )
|
||||
|
||||
test -n "$pid" || return 0
|
||||
|
||||
|
@ -301,14 +362,17 @@ runner-report()
|
|||
test -f "$path/cmdline" || return 0
|
||||
|
||||
local cmdline=$(< "$path/cmdline" )
|
||||
local cmdstart=$(< "$path/cmdstart" )
|
||||
local -a cmdstart cmdstart_fmt
|
||||
|
||||
cmdstart=( $(< "$path/cmdstart" ) )
|
||||
cmdstart_fmt=$(date --date=@"${cmdstart[0]}" +%Y-%m-%dT%H:%M:%S)
|
||||
|
||||
local -i now=$(date +%s)
|
||||
|
||||
cat <<EOF
|
||||
runner: $id
|
||||
command: $cmdline
|
||||
start: $cmdstart ($(date +%Y-%m-%dT%H:%M:%S <<< "$cmdstart"))
|
||||
start: ${cmdstart[0]}.${cmdstart[1]} ($cmdstart_fmt)
|
||||
elapsed: $((now - cmdstart)) seconds
|
||||
EOF
|
||||
}
|
||||
|
@ -430,6 +494,23 @@ to the process id of make itself. It defaults to the actual
|
|||
parent process id (PPID), so tamed will not kill itself if
|
||||
run manually on a shell (unless the shell exits first).
|
||||
|
||||
TAMED_RUNTAB_OUT can specify a file in which to write job
|
||||
start times (as seconds from the Unix epoch); durations
|
||||
(in milliseconds); and commands from each of the runners.
|
||||
The table is tab-delimited. Here are some useful examples:
|
||||
|
||||
# format nicely into columns and view in pager
|
||||
$ column runtab | less
|
||||
|
||||
# sort by runtime descending (second column)
|
||||
$ sort -rnk2 runtab
|
||||
|
||||
# take the runtime and command columns
|
||||
$ cut -2,3 runtab
|
||||
|
||||
# convert milliseconds into minutes (!) and sort desc
|
||||
$ awk '{ $2 = $2 / 1000 / 60; print } runtab | sort -nrk2
|
||||
|
||||
Options:
|
||||
--help show this message
|
||||
--kill kill a runing tamed at path RUNPATH
|
||||
|
@ -444,6 +525,9 @@ Environment Variables:
|
|||
TAMED_JAVA_OPTS opts to pass to dslc, and in turn, the JVM
|
||||
TAMED_TUI run in TUI mode (provide UI features like a
|
||||
dynamic runner status line)
|
||||
TAMED_RUNTAB_OUT file into which aggregate runner report will
|
||||
be written (otherwise reports are only
|
||||
available per-runner while tamed is running)
|
||||
EOF
|
||||
|
||||
exit $EX_USAGE
|
||||
|
@ -550,6 +634,7 @@ main()
|
|||
|
||||
abort-if-running "$root"
|
||||
tui-check
|
||||
runtab-check-and-clear
|
||||
|
||||
# clean up background processes before we exit
|
||||
trap exit TERM
|
||||
|
|
|
@ -115,6 +115,7 @@ export TAME_CMD_WAITTIME
|
|||
export TAMED_STALL_SECONDS
|
||||
export TAMED_JAVA_OPTS
|
||||
export TAMED_TUI
|
||||
export TAMED_RUNTAB_OUT
|
||||
|
||||
TAMED_SPAWNER_PID=$(shell echo $$PPID)
|
||||
export TAMED_SPAWNER_PID
|
||||
|
|
Loading…
Reference in New Issue