#!/bin/bash # # This reports on the progress of a set of distributed jobs. # The particular batch is determined as the one whose base # directory is the directory containing this script. # The details reported are determined by an obligatory # command-line argument to this script: the possibilities # are currently just "hosts", "jobs" or "findur". # echo " " basedir="$(dirname "$(realpath "$0" )" )" if ! cd "$basedir" then echo "Problem trying to change directory to " echo "$basedir/" echo "Exiting." exit 3 fi case $1 in hosts) find logs/ -name host_\*.log \ | while read f do host="`basename "$f" \ | sed -e 's/host_\([a-z0-9-]*\)_\([0-9]*\).*/\1,\2/' `" printf "%10s " "$host" endfile="${f//\.log/.ended}" if [ -f "$endfile" ] then endtime="` awk '{ printf "%s %s", $2, $3 }' $endfile `" echo "ended $endtime" else echo "RUNNING" fi done ;; jobs) find work/ -maxdepth 1 -mindepth 1 -type d | sort \ | while read f do job="` basename "$f" `" echo -n "$job: " begfile="logs/$job.begun" endfile="logs/$job.ended" if [ ! -f "$begfile" ] then echo "[pending]" else begtime="` head -n1 $begfile \ | awk '{ printf "%s %s, on %s", $2, $3, $1 }' `" if [ -f "$endfile" ] then endtime="` head -n1 $endfile \ | awk '{ printf "%s %s, on %9s", $2, $3, $1 }' `" begsec="` date +%s -d \ "\`echo $begtime | cut -d, -f1\`" `" endsec="` date +%s -d \ "\`echo $endtime | cut -d, -f1\`" `" duration=$((endsec-begsec)) printf "completed: %s (took %5d s)\n" "$endtime" $duration else echo "=RUNNING=: since $begtime" fi fi done ;; findur) # Get the list of the jobs, hosts and durations into a variable # (the list spans multiple lines, so "..." is needed). myself="./` basename "$0" `" stats="`$myself jobs`" # Get just host,instance and duration for each complete job, # sorted by host and instance jobsort="`echo "$stats" | grep completed \ | awk '{ printf "%10s %5s\n", $6, $8 }' | sort`" # Get from the above a list of all host,instance pairs on which # jobs have been completed. hostinst="`echo "$jobsort" | awk '{ print $1 }' | sort | uniq`" # For each host,instance, find all jobs completed on that one, # sum their times, print the duration and host,instance, and sort # these lines by the duration. echo "Total running time of completed jobs per thread:" echo "$hostinst" \ | while read do hijobs="`echo "$jobsort" | grep "$REPLY" | awk '{ print $2 }'`" nhijobs="`echo "$hijobs" | wc -l `" hitime="$(( 0`echo "$hijobs" | xargs -n1 | sed -e 's/^/+/' | xargs`+0 ))" hitimeperjob="$((hitime/nhijobs))" printf " %10s: total %5d s, mean %5d s, %4d jobs\n" \ $REPLY $hitime $hitimeperjob $nhijobs done | sort -n -r -k 8 echo " " # Now do about the same thing, but sorting only by host, # not including the instance (so multiple CPUs are combined). echo "Total running time of completed jobs per host:" echo "$hostinst" | cut -d, -f1 | uniq \ | while read do hjobs="`echo "$jobsort" | grep "$REPLY" | awk '{ print $2 }'`" nhjobs="`echo "$hjobs" | wc -l`" htime="$(( 0`echo "$hjobs" | xargs -n1 | sed -e 's/^/+/' | xargs`+0 ))" htimeperjob="$((htime/nhjobs))" printf " %10s: total %5d s, mean %5d s, %4d jobs\n" \ $REPLY $htime $htimeperjob $nhjobs done | sort -n -r -k 8 echo " " ;; *) echo "Argument \"$argin\" not recognised." echo -e '\t hosts: shows hosts in use' echo -e '\t jobs: shows running and ended jobs' echo -e '\t findur: shows total running times on hosts' ;; esac echo " "