HPC · pan14001 · May 7, 2019 · Apr 16, 2019 · Apr 16, 2019 · Apr 16, 2019
diff --git a/README.md b/README.md
@@ -9,45 +9,107 @@ the SLURM scheduler.  Namely one has to:
   and CPU counts that have been assigned by SLURM.
 - Export the environment, including the current directory.
 
-The `parallel-slurm-setup.sh` takes care of both these and provides an
-environmental variable `$parallel` for you to run the parallel
-executable with some sensible options.
+The `parallel_opts.sh` takes care of both these job setup steps and
+adds sensible default options to GNU parallel.
 
 ## Usage
 
-Clone this Git repository e.g. in your home directory:
+Clone this Git repository into your home directory:
 
 ``` sh
 # From the command-line
-cd
+cd				# Go to home directory
 git clone https://github.uconn.edu/HPC/parallel-slurm.git
 ```
 
-Add the following 2 lines to your SLURM job submission file
+Add the following 3 lines to your SLURM job submission file
 
 ``` sh
 # Inside your SLURM submission file
-source ~/parallel-slurm/parallel-slurm-setup.sh
-$parallel YOUR_PROGRAM ...
+parallel_opts=$(~/parallel-slurm/parallel_opts.sh)
+module load parallel
+parallel $parallel_opts ... YOUR_PROGRAM ...
 ```
 
-## Example
+## Examples
 
-See the `submit.slurm` example file.  Run it using:
+See the `*.slurm` example files.  Run each of them using `sbatch` as
+explained below:
+
+### Example 01: Hostname
+
+This minimal example simply outputs the compute node names in
+`submit.out`.
 
 ``` sh
 # From the command-line
-sbatch submit.slurm
+cd ~/parallel-slurm/examples
+sbatch 01-submit-hostname.slurm
+touch submit.out && tail -f submit.out
+# Hit Ctrl+C to exit
 ```
 
-You should see the output of the compute node names in submit.out.
-For example:
+The last few lines of your output should show on which nodes your 5
+CPUs were allocated and the `hostname` command was run; for example:
 
 ``` sh
-# Inside your submit.out
 cn328
 cn327
 cn327
 cn328
 cn327
 ```
+
+### Example 02: Resumable
+
+A typical problem that parallel tasks need to deal with is recovering
+from failure.  Tasks can fail when they hit the SLURM job time limit.
+Or they can fail due to the stochastic nature of a simulation
+intermittently not converging; in other words re-running the job can
+produce success.
+
+This example shows how to automatically resume jobs and retry only
+failed tasks.  This works using the `--joblog` and `--resume` options
+to GNU Parallel.  Using `--resume` tells GNU Parallel to ignore
+completed jobs.  The joblog remembers that the work was complete and
+does not needlessly re-run completed tasks.  If for some reason you
+need to re-run the a completed task you would need to delete the
+*.joblog file.
+
+To run the example:
+
+``` sh
+# From the command-line
+cd ~/parallel-slurm/examples
+rm -f joblog submit.out
+for i in {1..5}; do sbatch 02-submit-resumable.slurm; done
+touch submit.out && tail -f submit.out
+# Hit Ctrl+C to exit
+```
+
+The output shows that some tasks intermittently failing and some
+succeeding.  But always by the 5th job all of them succeed.
+
+```
+Started SLURM job 2339006
+Task 5 started (seed 2339006, random number 0) ... succeeded!
+Task 1 started (seed 2339006, random number 1) ... failed!
+Task 2 started (seed 2339006, random number 2) ... failed!
+Task 3 started (seed 2339006, random number 3) ... failed!
+Task 4 started (seed 2339006, random number 4) ... succeeded!
+Completed SLURM job 2339006 in   00:00:05
+Started SLURM job 2339007
+Task 3 started (seed 2339007, random number 1) ... failed!
+Task 1 started (seed 2339007, random number 2) ... failed!
+Task 2 started (seed 2339007, random number 4) ... succeeded!
+Completed SLURM job 2339007 in   00:00:05
+Started SLURM job 2339008
+Task 1 started (seed 2339008, random number 3) ... failed!
+Task 3 started (seed 2339008, random number 4) ... succeeded!
+Completed SLURM job 2339008 in   00:00:05
+Started SLURM job 2339009
+Task 1 started (seed 2339009, random number 4) ... succeeded!
+Completed SLURM job 2339009 in   00:00:04
+Started SLURM job 2339010
+Completed SLURM job 2339010 in   00:00:00
+```
diff --git a/examples/01-submit-hostname.slurm b/examples/01-submit-hostname.slurm
@@ -0,0 +1,13 @@
+#!/bin/bash -x
+#SBATCH --nodes 2
+#SBATCH --ntasks 5
+#SBATCH --output submit.out
+
+# Overwrite instead of appending to output file.
+echo -n > submit.out
+
+parallel_opts=$(~/parallel-slurm/parallel_opts.sh)
+module load parallel
+
+# Print the name of each host that GNU Parallel is running on.
+parallel $parallel_opts -n0 hostname ::: $(seq $SLURM_NTASKS)
diff --git a/examples/02-submit-resumable.slurm b/examples/02-submit-resumable.slurm
@@ -0,0 +1,21 @@
+#!/bin/bash
+#SBATCH --ntasks 5
+#SBATCH --output submit.out
+
+#SBATCH --dependency singleton
+#SBATCH --job-name unambiguous-name-for-resumable-job
+# Kill job after 15 seconds to show resuming feature.
+#SBATCH --time 0:15
+
+parallel_opts=$(~/parallel-slurm/parallel_opts.sh)
+module load parallel
+
+# Run a failure prone program.
+echo "Started SLURM job $SLURM_JOB_ID"
+parallel $parallel_opts \
+	 --joblog joblog \
+	 --resume-failed \
+	 --line-buffer \
+	 ./script_that_sometimes_fails.sh \
+	 ::: $(seq $SLURM_NTASKS)
+echo "Completed SLURM job $SLURM_JOB_ID in $(sacct -nXj $SLURM_JOB_ID -o elapsed)"
diff --git a/examples/script_that_sometimes_fails.sh b/examples/script_that_sometimes_fails.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+seed=$SLURM_JOB_ID
+ID=$1
+
+prng () {
+    # Use the linear conguential generator algorithm:
+    # https://en.wikipedia.org/wiki/Random_number_generation#Computational_methods
+    #
+    # We seed b with the SLURM_JOB_ID so that we independently have
+    # the same seed for all tasks for a given job.
+
+    x_n=0
+    a=1
+    b=$seed
+    m=$SLURM_NTASKS
+    # Recur as many times as the task id to generate different numbers
+    # for each SLURM task.
+    for i in $(seq 1 $ID)
+    do
+	x_n=$(( $(( a * $((x_n + b)) )) % m))
+    done
+    echo $x_n
+}
+
+main () {
+    # Randomly fail half of the tasks.
+    random_int=$(prng)
+    echo -n "Task $ID started (seed $seed, random number $random_int) ... "
+    sleep "$random_int"
+    if (( $random_int % 4 == 0 ))
+    then
+	echo "succeeded!"
+	exit 0
+    fi
+    echo "failed!"
+    exit 1
+}
+
+[ "$0" != "${BASH_SOURCE[0]}" ] || main "$@"
diff --git a/parallel-slurm-setup.sh b/parallel-slurm-setup.sh
diff --git a/parallel_opts.sh b/parallel_opts.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+# GNU Parallel setup for SLURM
+#
+# Author: Pariksheet Nanda <hpc@uconn.edu> 2016-2017,2019
+#
+# License: Public Domain / CC0
+#
+# To the extent possible under law, Pariksheet Nanda has waived all
+# copyright and related or neighboring rights to GNU Parallel setup
+# for SLURM.
+
+# This directive applies to the entire script.
+# shellcheck disable=2039
+true
+
+is_slurm_env () {
+    if [[ -n "$SLURM_JOB_ID" ]]
+    then			# yes
+	return 0
+    else
+	return 1
+    fi
+}
+
+# Helper to expand hostnames
+has_clustershell () {
+    if python -m ClusterShell.CLI.Nodeset -h &> /dev/null
+    then
+	return 0
+    else
+	return 1
+    fi
+}
+
+install_clustershell () {
+    python -m pip install --user clustershell
+}
+
+setup_on_cluster () {
+    # Allow export of environment using `--env` option
+    if [[ ! -e ~/.parallel/ignored_vars ]]; then
+	# Create an empty ignored_vars file to pass all the environment
+	# variables to the SSH instance
+	mkdir -p ~/.parallel
+	touch ~/.parallel/ignored_vars
+    fi
+}
+
+# Expand tasks from "2,5(x1),3(x2)" to "2 5 3 3 "
+expand_slurm_tasks_per_node () {
+    [[ -z "${SLURM_TASKS_PER_NODE}" ]] && return
+
+    local tasks
+    # shellcheck disable=2207
+    tasks=( $(echo "${SLURM_TASKS_PER_NODE}" | tr ',' ' ') )
+
+    local num count
+    for val in ${tasks[*]}; do
+	num="${val/(*)/}"
+	if [[ -z "${val%%*)}" ]]; then
+	    count=$(echo "$val" | sed -E 's#[0-9]+\(x([0-9]+)\)#\1#')
+	else
+	    count=1
+	fi
+	# shellcheck disable=2046
+	printf "$num%.0s " $(seq $count)
+    done
+}
+
+# Make list in the form of "cpu/host"
+cpu_host_array () {
+    local nodeset hosts cpus
+    nodeset="python -m ClusterShell.CLI.Nodeset"
+
+    # shellcheck disable=2207
+    hosts=( $($nodeset -e "${SLURM_NODELIST}") )
+    # shellcheck disable=2207
+    cpus=( $(expand_slurm_tasks_per_node) )
+    for ((i=0; i<${#hosts[*]}; ++i)); do
+	echo "${cpus[i]}/${hosts[i]}"
+    done
+}
+
+prefix () {
+    echo "${SLURM_JOB_NAME%.*}"
+}
+
+machinefile () {
+    echo "$(prefix).sshloginfile"
+}
+
+write_machinefile () {
+    cpu_host_array > "$(machinefile)"
+}
+
+parallel_opts () {
+    local machinefile
+    machinefile=$(machinefile)
+
+    echo "
+ --env _
+ --sshdelay 0.1
+ --sshloginfile $machinefile
+ --workdir .
+"
+}
+
+main () {
+    is_slurm_env && setup_on_cluster
+    ! has_clustershell && install_clustershell
+    write_machinefile
+    parallel_opts
+}
+
+[[ "$0" != "${BASH_SOURCE[0]}" ]] || main "$@"
diff --git a/submit.slurm b/submit.slurm