tamed: Add runtab and TAMED_RUNTAB_OUT
This provides logging that can be used to analyze jobs. See `tamed --help` for some examples. More to come. You'll notice that one of the examples reprents package build time in _minutes_. This is why TAMER is necessary; as of the time of writing, the longest-building package is nearly five and a half minutes, and there are a number of packages that take a minute or more. But, there are potentially other optimizations that can be done. And this is _after_ many rounds of optimizations over the years. (TAME was not originally built for what it is currently being used for.)main
parent
4a3b86f480
commit
6fd570477a
|
@ -22,6 +22,11 @@ Compiler
|
||||||
--------
|
--------
|
||||||
- Input mappings will no longer emit the destination param as a dependency.
|
- Input mappings will no longer emit the destination param as a dependency.
|
||||||
- `tamed --report` and `TAMED_TUI` for analyzing build performance.
|
- `tamed --report` and `TAMED_TUI` for analyzing build performance.
|
||||||
|
- Runners now store start time and duration for each command, available in
|
||||||
|
the runpath for reporting.
|
||||||
|
- `TAMED_RUNTAB_OUT`, if set, will aggregate all runners' runtabs into a
|
||||||
|
single file as jobs are completed. See `tamed --help` for more
|
||||||
|
information and examples.
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
-------------
|
-------------
|
||||||
|
|
55
bin/tame
55
bin/tame
|
@ -83,7 +83,7 @@ command-runner()
|
||||||
|
|
||||||
# log the provided command line and starting time so that we can determine
|
# log the provided command line and starting time so that we can determine
|
||||||
# what is currently being compiled and how long it is taking
|
# what is currently being compiled and how long it is taking
|
||||||
date +%s > "$base/cmdstart"
|
millis > "$base/cmdstart"
|
||||||
echo "$*" > "$base/cmdline"
|
echo "$*" > "$base/cmdline"
|
||||||
|
|
||||||
# all remaining arguments are passed to the runner
|
# all remaining arguments are passed to the runner
|
||||||
|
@ -113,7 +113,9 @@ command-runner()
|
||||||
if [ "${line:0:5}" == "DONE " ]; then
|
if [ "${line:0:5}" == "DONE " ]; then
|
||||||
read -r _ code _ <<< "$line"
|
read -r _ code _ <<< "$line"
|
||||||
|
|
||||||
|
runtab-append "$base"
|
||||||
mark-available "$base"
|
mark-available "$base"
|
||||||
|
|
||||||
return "$code"
|
return "$code"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -237,7 +239,56 @@ mark-available()
|
||||||
echo idle > "$base/cmdline"
|
echo idle > "$base/cmdline"
|
||||||
|
|
||||||
# this can be used to determine how long the worker has been idle
|
# this can be used to determine how long the worker has been idle
|
||||||
date +%s > "$base/cmdstart"
|
millis > "$base/cmdstart"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Output seconds and milliseconds, space-delimited
|
||||||
|
millis()
|
||||||
|
{
|
||||||
|
local date
|
||||||
|
date=( $(date '+%s %N') )
|
||||||
|
|
||||||
|
# %N returns nanoseconds and it may be 0-prefixed, which would be
|
||||||
|
# interpreted as octal without the explicit base specification
|
||||||
|
echo "${date[0]}" "$(( 10#"${date[1]}" / 1000000 ))"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Append data to the runner table (runtab)
|
||||||
|
#
|
||||||
|
# This takes information about the most recently executed command and
|
||||||
|
# appends it to a table representing the work that the runner has
|
||||||
|
# done. This should be done at the end of processing a particular job but
|
||||||
|
# before marking the runner as available using `mark-available'.
|
||||||
|
#
|
||||||
|
# The columns of this report are, tab-delimited:
|
||||||
|
# 1. Start date (Unix timestamp, seconds);
|
||||||
|
# 2. Duration (milliseconds); and
|
||||||
|
# 3. Runner command line
|
||||||
|
runtab-append()
|
||||||
|
{
|
||||||
|
local -r base=${1?Missing runner base path}
|
||||||
|
|
||||||
|
local cmd duration
|
||||||
|
local -a cmdstart now
|
||||||
|
|
||||||
|
cmd=$(< "$base/cmdline")
|
||||||
|
cmdstart=( $(< "$base/cmdstart") )
|
||||||
|
now=( $(millis) )
|
||||||
|
|
||||||
|
# duration consists of seconds and nanoseconds; let's just deal with
|
||||||
|
# milliseconds, since any greater precision is not useful to us with how
|
||||||
|
# slow the system is today, and convert it into a decimal for
|
||||||
|
# reporting. Nanoseconds may be 0-prefixed, which will be interpreted as
|
||||||
|
# octal without an explicit base specification.
|
||||||
|
duration=$((
|
||||||
|
((now[0] * 1000) + now[1])
|
||||||
|
- ((cmdstart[0] * 1000) + cmdstart[1])
|
||||||
|
))
|
||||||
|
|
||||||
|
# the duration is in milliseconds
|
||||||
|
printf "%d\t%s\t%s\n" "$cmdstart" "$duration" "$cmd" >> "$base/runtab"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
93
bin/tamed
93
bin/tamed
|
@ -24,6 +24,8 @@ readonly mypath
|
||||||
|
|
||||||
declare -ri EX_RUNNING=1
|
declare -ri EX_RUNNING=1
|
||||||
declare -ri EX_NOTRUNNING=2 # tamed is not running
|
declare -ri EX_NOTRUNNING=2 # tamed is not running
|
||||||
|
declare -ri EX_RUNTAB_LOCK=3 # failed to acquire aggregate runtab lock
|
||||||
|
declare -ri EX_RUNTAB_OUT=4 # failed to write to aggregate runtab
|
||||||
declare -ri EX_USAGE=64 # incorrect usage; sysexits.h
|
declare -ri EX_USAGE=64 # incorrect usage; sysexits.h
|
||||||
declare -ri EX_CANTCREAT=73 # cannot create file; sysexits.h
|
declare -ri EX_CANTCREAT=73 # cannot create file; sysexits.h
|
||||||
|
|
||||||
|
@ -45,6 +47,9 @@ declare root=
|
||||||
declare -r TAMED_TUI="${TAMED_TUI:-}"
|
declare -r TAMED_TUI="${TAMED_TUI:-}"
|
||||||
declare tui_mode=
|
declare tui_mode=
|
||||||
|
|
||||||
|
# file into which aggregate runner report will be placed (none if empty)
|
||||||
|
declare -r TAMED_RUNTAB_OUT="${TAMED_RUNTAB_OUT:-}"
|
||||||
|
|
||||||
|
|
||||||
# Create FIFOs for runner
|
# Create FIFOs for runner
|
||||||
#
|
#
|
||||||
|
@ -117,6 +122,10 @@ spawn-runner()
|
||||||
# flag as available (the client will manipulate these)
|
# flag as available (the client will manipulate these)
|
||||||
echo 0 > "$base/busy"
|
echo 0 > "$base/busy"
|
||||||
|
|
||||||
|
# runtab is used for reporting, which we will optionally aggregate
|
||||||
|
> "$base/runtab"
|
||||||
|
monitor-runner-runtab "$root" "$base/runtab" &
|
||||||
|
|
||||||
# monitor runner usage and kill when inactive
|
# monitor runner usage and kill when inactive
|
||||||
stall-monitor "$base" &
|
stall-monitor "$base" &
|
||||||
|
|
||||||
|
@ -140,6 +149,55 @@ spawn-runner()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Monitor the given runner runtab and append to the aggregate runtab
|
||||||
|
#
|
||||||
|
# The aggregate runtab is append-only and has a row-level lock to support
|
||||||
|
# concurrent writes without having to rely on kernel buffering.
|
||||||
|
monitor-runner-runtab()
|
||||||
|
{
|
||||||
|
local -r root="${1?Missing root run path}"
|
||||||
|
local -r runtab="${2?Missing runtab path}"
|
||||||
|
|
||||||
|
# no use in aggregating if it was not requested
|
||||||
|
test -n "$TAMED_RUNTAB_OUT" || return 0
|
||||||
|
|
||||||
|
while ! spawner-dead; do
|
||||||
|
# this is a shared file, and while buffering _should_ be sufficient, we
|
||||||
|
# may as well avoid potential headaches entirely by locking during the
|
||||||
|
# operation
|
||||||
|
tail -f "$runtab" | while read -r row; do
|
||||||
|
# we want to lock _per row write_, since output will be interleaved
|
||||||
|
# between all the runners
|
||||||
|
(
|
||||||
|
local -ri timeout=3
|
||||||
|
|
||||||
|
flock -w $timeout 7 || {
|
||||||
|
echo "error: failed to acquire lock on aggregate runtab" >&2
|
||||||
|
exit $EX_RUNTAB_LOCK
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "$row" >&7
|
||||||
|
) 7>> "$TAMED_RUNTAB_OUT"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Check that we can write to the provided runtab, and clear it
|
||||||
|
runtab-check-and-clear()
|
||||||
|
{
|
||||||
|
test -n "$TAMED_RUNTAB_OUT" || return 0
|
||||||
|
|
||||||
|
# clear the runtab, and see if we can write to it
|
||||||
|
>"$TAMED_RUNTAB_OUT" || {
|
||||||
|
echo "error: unable to write to '$TAMED_RUNTAB_OUT' (TAMED_RUNTAB_OUT)"
|
||||||
|
exit $EX_RUNTAB_OUT
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "tamed: aggregating runner runtabs into '$TAMED_RUNTAB_OUT'"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Kill runner at BASE when it becomes inactive for TAMED_STALL_SECONDS
|
# Kill runner at BASE when it becomes inactive for TAMED_STALL_SECONDS
|
||||||
# seconds
|
# seconds
|
||||||
#
|
#
|
||||||
|
@ -248,12 +306,15 @@ abort-if-not-running()
|
||||||
# runners; the target tamed is expected to clean up itself.
|
# runners; the target tamed is expected to clean up itself.
|
||||||
# Consequently, if a tamed terminated abnormally without
|
# Consequently, if a tamed terminated abnormally without
|
||||||
# cleaning up, this will not solve that problem.
|
# cleaning up, this will not solve that problem.
|
||||||
|
#
|
||||||
|
# Note that this is also called by tame to clean up an old tamed
|
||||||
|
# before spawning a new one.
|
||||||
kill-running()
|
kill-running()
|
||||||
{
|
{
|
||||||
local -r root="${1?Missing root}"
|
local -r root="${1?Missing root}"
|
||||||
|
|
||||||
abort-if-not-running "$root" 2>/dev/null
|
test -d "$root" || return 0
|
||||||
local -r pid=$( cat "$root"/pid 2>/dev/null )
|
local -r pid=$( < "$root"/pid 2>/dev/null )
|
||||||
|
|
||||||
test -n "$pid" || return 0
|
test -n "$pid" || return 0
|
||||||
|
|
||||||
|
@ -301,14 +362,17 @@ runner-report()
|
||||||
test -f "$path/cmdline" || return 0
|
test -f "$path/cmdline" || return 0
|
||||||
|
|
||||||
local cmdline=$(< "$path/cmdline" )
|
local cmdline=$(< "$path/cmdline" )
|
||||||
local cmdstart=$(< "$path/cmdstart" )
|
local -a cmdstart cmdstart_fmt
|
||||||
|
|
||||||
|
cmdstart=( $(< "$path/cmdstart" ) )
|
||||||
|
cmdstart_fmt=$(date --date=@"${cmdstart[0]}" +%Y-%m-%dT%H:%M:%S)
|
||||||
|
|
||||||
local -i now=$(date +%s)
|
local -i now=$(date +%s)
|
||||||
|
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
runner: $id
|
runner: $id
|
||||||
command: $cmdline
|
command: $cmdline
|
||||||
start: $cmdstart ($(date +%Y-%m-%dT%H:%M:%S <<< "$cmdstart"))
|
start: ${cmdstart[0]}.${cmdstart[1]} ($cmdstart_fmt)
|
||||||
elapsed: $((now - cmdstart)) seconds
|
elapsed: $((now - cmdstart)) seconds
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
@ -430,6 +494,23 @@ to the process id of make itself. It defaults to the actual
|
||||||
parent process id (PPID), so tamed will not kill itself if
|
parent process id (PPID), so tamed will not kill itself if
|
||||||
run manually on a shell (unless the shell exits first).
|
run manually on a shell (unless the shell exits first).
|
||||||
|
|
||||||
|
TAMED_RUNTAB_OUT can specify a file in which to write job
|
||||||
|
start times (as seconds from the Unix epoch); durations
|
||||||
|
(in milliseconds); and commands from each of the runners.
|
||||||
|
The table is tab-delimited. Here are some useful examples:
|
||||||
|
|
||||||
|
# format nicely into columns and view in pager
|
||||||
|
$ column runtab | less
|
||||||
|
|
||||||
|
# sort by runtime descending (second column)
|
||||||
|
$ sort -rnk2 runtab
|
||||||
|
|
||||||
|
# take the runtime and command columns
|
||||||
|
$ cut -2,3 runtab
|
||||||
|
|
||||||
|
# convert milliseconds into minutes (!) and sort desc
|
||||||
|
$ awk '{ $2 = $2 / 1000 / 60; print } runtab | sort -nrk2
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--help show this message
|
--help show this message
|
||||||
--kill kill a runing tamed at path RUNPATH
|
--kill kill a runing tamed at path RUNPATH
|
||||||
|
@ -444,6 +525,9 @@ Environment Variables:
|
||||||
TAMED_JAVA_OPTS opts to pass to dslc, and in turn, the JVM
|
TAMED_JAVA_OPTS opts to pass to dslc, and in turn, the JVM
|
||||||
TAMED_TUI run in TUI mode (provide UI features like a
|
TAMED_TUI run in TUI mode (provide UI features like a
|
||||||
dynamic runner status line)
|
dynamic runner status line)
|
||||||
|
TAMED_RUNTAB_OUT file into which aggregate runner report will
|
||||||
|
be written (otherwise reports are only
|
||||||
|
available per-runner while tamed is running)
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
exit $EX_USAGE
|
exit $EX_USAGE
|
||||||
|
@ -550,6 +634,7 @@ main()
|
||||||
|
|
||||||
abort-if-running "$root"
|
abort-if-running "$root"
|
||||||
tui-check
|
tui-check
|
||||||
|
runtab-check-and-clear
|
||||||
|
|
||||||
# clean up background processes before we exit
|
# clean up background processes before we exit
|
||||||
trap exit TERM
|
trap exit TERM
|
||||||
|
|
|
@ -115,6 +115,7 @@ export TAME_CMD_WAITTIME
|
||||||
export TAMED_STALL_SECONDS
|
export TAMED_STALL_SECONDS
|
||||||
export TAMED_JAVA_OPTS
|
export TAMED_JAVA_OPTS
|
||||||
export TAMED_TUI
|
export TAMED_TUI
|
||||||
|
export TAMED_RUNTAB_OUT
|
||||||
|
|
||||||
TAMED_SPAWNER_PID=$(shell echo $$PPID)
|
TAMED_SPAWNER_PID=$(shell echo $$PPID)
|
||||||
export TAMED_SPAWNER_PID
|
export TAMED_SPAWNER_PID
|
||||||
|
|
Loading…
Reference in New Issue