#!/bin/bash 
#
# Starts nproc instances of the local_job_coordinator, in 
# the background, with all stdin, stdout, stderr connected 
# to local files.
# This script therefore returns almost immediately, letting 
# a remote login exit and leave the local_job_coordinator
# running.

echo "`hostname`, `date +'%H:%M:%S'`:  $0"

# the value nproc is passed as the first command-line argument
nproc="$1"
if [ -z "$nproc" ]; then
	nproc=1
	echo "WARNING: nproc (argument) was empty: default=1" 1>&2
fi

# determine this batch's base directory
basedir="$(dirname "$(realpath "$0" )" )"

# try to change to this base directory, else exit
if ! cd "$basedir"
then
	echo "error from cd to $basedir/" 1>&2
	echo "exiting without starting local_job_coordinator" 1>&2
	exit 1
fi

# unset DISPLAY to avoid any attempt at starting GUI parts
export DISPLAY=""

# the executable for this host's coordinator
coordinator_exe="./local_job_coordinator"

# start nproc coordinator processes
for instance in `seq 1 $nproc`
do
	coordinator_logfile="logs/host_`hostname`_$instance.log"
	
	# check that the coordinator hasn't already been started on this host
	if [ -e "$coordinator_logfile" ]
	then
		echo "logfile $coordinator_logfile already exists" 1>&2
		echo "exiting, without starting the coodinator script" 1>&2
		exit 2
	fi
	
	# run the coordinator, with redirections and backgrounding
	export instance
	$coordinator_exe  >"$coordinator_logfile" 2>&1 </dev/null &
	
	# log the process ID of the backgrounded coordinator
	echo "`hostname`: $coordinator_exe, instance $instance: pid=$!"

done