diff --git a/README.md b/README.md index 2d74b25..668441b 100644 --- a/README.md +++ b/README.md @@ -9,45 +9,107 @@ the SLURM scheduler. Namely one has to: and CPU counts that have been assigned by SLURM. - Export the environment, including the current directory. -The `parallel-slurm-setup.sh` takes care of both these and provides an -environmental variable `$parallel` for you to run the parallel -executable with some sensible options. +The `parallel_opts.sh` takes care of both these job setup steps and +adds sensible default options to GNU parallel. ## Usage -Clone this Git repository e.g. in your home directory: +Clone this Git repository into your home directory: ``` sh # From the command-line -cd +cd # Go to home directory git clone https://github.uconn.edu/HPC/parallel-slurm.git ``` -Add the following 2 lines to your SLURM job submission file +Add the following 3 lines to your SLURM job submission file ``` sh # Inside your SLURM submission file -source ~/parallel-slurm/parallel-slurm-setup.sh -$parallel YOUR_PROGRAM ... +parallel_opts=$(~/parallel-slurm/parallel_opts.sh) +module load parallel +parallel $parallel_opts ... YOUR_PROGRAM ... ``` -## Example +## Examples -See the `submit.slurm` example file. Run it using: +See the `*.slurm` example files. Run each of them using `sbatch` as +explained below: + +### Example 01: Hostname + +This minimal example simply outputs the compute node names in +`submit.out`. ``` sh # From the command-line -sbatch submit.slurm +cd ~/parallel-slurm/examples +sbatch 01-submit-hostname.slurm +touch submit.out && tail -f submit.out +# Hit Ctrl+C to exit ``` -You should see the output of the compute node names in submit.out. -For example: +The last few lines of your output should show on which nodes your 5 +CPUs were allocated and the `hostname` command was run; for example: ``` sh -# Inside your submit.out cn328 cn327 cn327 cn328 cn327 ``` + +### Example 02: Resumable + +A typical problem that parallel tasks need to deal with is recovering +from failure. Tasks can fail when they hit the SLURM job time limit. +Or they can fail due to the stochastic nature of a simulation +intermittently not converging; in other words re-running the job can +produce success. + +This example shows how to automatically resume jobs and retry only +failed tasks. This works using the `--joblog` and `--resume` options +to GNU Parallel. Using `--resume` tells GNU Parallel to ignore +completed jobs. The joblog remembers that the work was complete and +does not needlessly re-run completed tasks. If for some reason you +need to re-run the a completed task you would need to delete the +*.joblog file. + +To run the example: + +``` sh +# From the command-line +cd ~/parallel-slurm/examples +rm -f joblog submit.out +for i in {1..5}; do sbatch 02-submit-resumable.slurm; done +touch submit.out && tail -f submit.out +# Hit Ctrl+C to exit +``` + +The output shows that some tasks intermittently failing and some +succeeding. But always by the 5th job all of them succeed. + +``` +Started SLURM job 2339006 +Task 5 started (seed 2339006, random number 0) ... succeeded! +Task 1 started (seed 2339006, random number 1) ... failed! +Task 2 started (seed 2339006, random number 2) ... failed! +Task 3 started (seed 2339006, random number 3) ... failed! +Task 4 started (seed 2339006, random number 4) ... succeeded! +Completed SLURM job 2339006 in 00:00:05 +Started SLURM job 2339007 +Task 3 started (seed 2339007, random number 1) ... failed! +Task 1 started (seed 2339007, random number 2) ... failed! +Task 2 started (seed 2339007, random number 4) ... succeeded! +Completed SLURM job 2339007 in 00:00:05 +Started SLURM job 2339008 +Task 1 started (seed 2339008, random number 3) ... failed! +Task 3 started (seed 2339008, random number 4) ... succeeded! +Completed SLURM job 2339008 in 00:00:05 +Started SLURM job 2339009 +Task 1 started (seed 2339009, random number 4) ... succeeded! +Completed SLURM job 2339009 in 00:00:04 +Started SLURM job 2339010 +Completed SLURM job 2339010 in 00:00:00 +``` diff --git a/examples/01-submit-hostname.slurm b/examples/01-submit-hostname.slurm new file mode 100644 index 0000000..3749b27 --- /dev/null +++ b/examples/01-submit-hostname.slurm @@ -0,0 +1,13 @@ +#!/bin/bash -x +#SBATCH --nodes 2 +#SBATCH --ntasks 5 +#SBATCH --output submit.out + +# Overwrite instead of appending to output file. +echo -n > submit.out + +parallel_opts=$(~/parallel-slurm/parallel_opts.sh) +module load parallel + +# Print the name of each host that GNU Parallel is running on. +parallel $parallel_opts -n0 hostname ::: $(seq $SLURM_NTASKS) diff --git a/examples/02-submit-resumable.slurm b/examples/02-submit-resumable.slurm new file mode 100644 index 0000000..a94a684 --- /dev/null +++ b/examples/02-submit-resumable.slurm @@ -0,0 +1,21 @@ +#!/bin/bash +#SBATCH --ntasks 5 +#SBATCH --output submit.out + +#SBATCH --dependency singleton +#SBATCH --job-name unambiguous-name-for-resumable-job +# Kill job after 15 seconds to show resuming feature. +#SBATCH --time 0:15 + +parallel_opts=$(~/parallel-slurm/parallel_opts.sh) +module load parallel + +# Run a failure prone program. +echo "Started SLURM job $SLURM_JOB_ID" +parallel $parallel_opts \ + --joblog joblog \ + --resume-failed \ + --line-buffer \ + ./script_that_sometimes_fails.sh \ + ::: $(seq $SLURM_NTASKS) +echo "Completed SLURM job $SLURM_JOB_ID in $(sacct -nXj $SLURM_JOB_ID -o elapsed)" diff --git a/examples/script_that_sometimes_fails.sh b/examples/script_that_sometimes_fails.sh new file mode 100755 index 0000000..9617495 --- /dev/null +++ b/examples/script_that_sometimes_fails.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +seed=$SLURM_JOB_ID +ID=$1 + +prng () { + # Use the linear conguential generator algorithm: + # https://en.wikipedia.org/wiki/Random_number_generation#Computational_methods + # + # We seed b with the SLURM_JOB_ID so that we independently have + # the same seed for all tasks for a given job. + + x_n=0 + a=1 + b=$seed + m=$SLURM_NTASKS + # Recur as many times as the task id to generate different numbers + # for each SLURM task. + for i in $(seq 1 $ID) + do + x_n=$(( $(( a * $((x_n + b)) )) % m)) + done + echo $x_n +} + +main () { + # Randomly fail half of the tasks. + random_int=$(prng) + echo -n "Task $ID started (seed $seed, random number $random_int) ... " + sleep "$random_int" + if (( $random_int % 4 == 0 )) + then + echo "succeeded!" + exit 0 + fi + echo "failed!" + exit 1 +} + +[ "$0" != "${BASH_SOURCE[0]}" ] || main "$@" diff --git a/parallel-slurm-setup.sh b/parallel-slurm-setup.sh deleted file mode 100644 index 15e884d..0000000 --- a/parallel-slurm-setup.sh +++ /dev/null @@ -1,66 +0,0 @@ -# GNU Parallel setup for SLURM -# -# Author: Pariksheet Nanda 2016-2017 -# -# License: Public Domain / CC0 -# -# To the extent possible under law, Pariksheet Nanda has waived all -# copyright and related or neighboring rights to GNU Parallel setup -# for SLURM. - -# Load the modules -source /etc/profile.d/modules.sh -module load parallel - -# Allow export of environment using `--env` option -if [[ ! -e ~/.parallel/ignored_vars ]]; then - # Create an empty ignored_vars file to pass all the environment - # variables to the SSH instance - mkdir -p ~/.parallel - touch ~/.parallel/ignored_vars -fi - -# Create the machine file for this job -prefix=${SLURM_JOB_NAME%.*} -machine_file=${prefix}.sshloginfile - -function expand_slurm_tasks_per_node () { - local tasks - tasks=( $(echo "${SLURM_TASKS_PER_NODE}" | tr ',' ' ') ) - - local num count - for val in ${tasks[*]}; do - num="${val/(*)/}" - if [[ -z "${val%%*)}" ]]; then - count=$(echo $val | sed -E 's#[0-9]+\(x([0-9]+)\)#\1#') - else - count=1 - fi - printf "$num%.0s " $(seq $count) - done -} - -# Make list in the form of "cpu/host" -function cpu_host_array () { - local hostlist hosts cpus -# The SLURM `hostlist` executable is part of the Python PIP -# "python-hostlist" package - hostlist=/apps2/python/2.7.6-gcc/bin/hostlist - hosts=( $($hostlist -e ${SLURM_NODELIST}) ) - cpus=( $(expand_slurm_tasks_per_node) ) - for ((i=0; i<${#hosts[*]}; ++i)); do - echo ${cpus[i]}/${hosts[i]} - done -} -arr=( $(cpu_host_array) ) -printf "%s\n" ${arr[*]} > $machine_file - -# Pass default options to GNU Parallel -parallel="parallel - --env _ - --sshdelay 0.1 - --sshloginfile $machine_file - --workdir . -" -# --joblog ${prefix}.joblog -# --resume diff --git a/parallel_opts.sh b/parallel_opts.sh new file mode 100755 index 0000000..fedde1a --- /dev/null +++ b/parallel_opts.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +# GNU Parallel setup for SLURM +# +# Author: Pariksheet Nanda 2016-2017,2019 +# +# License: Public Domain / CC0 +# +# To the extent possible under law, Pariksheet Nanda has waived all +# copyright and related or neighboring rights to GNU Parallel setup +# for SLURM. + +# This directive applies to the entire script. +# shellcheck disable=2039 +true + +is_slurm_env () { + if [[ -n "$SLURM_JOB_ID" ]] + then # yes + return 0 + else + return 1 + fi +} + +# Helper to expand hostnames +has_clustershell () { + if python -m ClusterShell.CLI.Nodeset -h &> /dev/null + then + return 0 + else + return 1 + fi +} + +install_clustershell () { + python -m pip install --user clustershell +} + +setup_on_cluster () { + # Allow export of environment using `--env` option + if [[ ! -e ~/.parallel/ignored_vars ]]; then + # Create an empty ignored_vars file to pass all the environment + # variables to the SSH instance + mkdir -p ~/.parallel + touch ~/.parallel/ignored_vars + fi +} + +# Expand tasks from "2,5(x1),3(x2)" to "2 5 3 3 " +expand_slurm_tasks_per_node () { + [[ -z "${SLURM_TASKS_PER_NODE}" ]] && return + + local tasks + # shellcheck disable=2207 + tasks=( $(echo "${SLURM_TASKS_PER_NODE}" | tr ',' ' ') ) + + local num count + for val in ${tasks[*]}; do + num="${val/(*)/}" + if [[ -z "${val%%*)}" ]]; then + count=$(echo "$val" | sed -E 's#[0-9]+\(x([0-9]+)\)#\1#') + else + count=1 + fi + # shellcheck disable=2046 + printf "$num%.0s " $(seq $count) + done +} + +# Make list in the form of "cpu/host" +cpu_host_array () { + local nodeset hosts cpus + nodeset="python -m ClusterShell.CLI.Nodeset" + + # shellcheck disable=2207 + hosts=( $($nodeset -e "${SLURM_NODELIST}") ) + # shellcheck disable=2207 + cpus=( $(expand_slurm_tasks_per_node) ) + for ((i=0; i<${#hosts[*]}; ++i)); do + echo "${cpus[i]}/${hosts[i]}" + done +} + +prefix () { + echo "${SLURM_JOB_NAME%.*}" +} + +machinefile () { + echo "$(prefix).sshloginfile" +} + +write_machinefile () { + cpu_host_array > "$(machinefile)" +} + +parallel_opts () { + local machinefile + machinefile=$(machinefile) + + echo " + --env _ + --sshdelay 0.1 + --sshloginfile $machinefile + --workdir . +" +} + +main () { + is_slurm_env && setup_on_cluster + ! has_clustershell && install_clustershell + write_machinefile + parallel_opts +} + +[[ "$0" != "${BASH_SOURCE[0]}" ]] || main "$@" diff --git a/submit.slurm b/submit.slurm deleted file mode 100644 index f98278c..0000000 --- a/submit.slurm +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -x -#SBATCH --nodes=2 -#SBATCH --ntasks=5 -#SBATCH --output=submit.out - -# Overwrite instead of appending to output file. -echo > submit.out - -# Print the name of each host that GNU Parallel is running on. -source parallel-slurm-setup.sh -$parallel -n0 hostname ::: $(seq $SLURM_NTASKS) diff --git a/tests/parallel.bats b/tests/parallel.bats new file mode 100644 index 0000000..d09701b --- /dev/null +++ b/tests/parallel.bats @@ -0,0 +1,13 @@ +#!/usr/bin/env bats + +# shellcheck disable=1083 +true + +# shellcheck source=../parallel_opts.sh +. "${BATS_TEST_DIRNAME}/../parallel_opts.sh" + +@test 'parallel_opts env var is generated' { + [[ -z "${parallel_opts}" ]] + parallel_opts=$(parallel_opts) + [[ -n "${parallel_opts}" ]] +} diff --git a/tests/slurm_fixture.bats b/tests/slurm_fixture.bats new file mode 100644 index 0000000..47c8711 --- /dev/null +++ b/tests/slurm_fixture.bats @@ -0,0 +1,71 @@ +#!/usr/bin/env bats + +# shellcheck disable=1083 +true + +# shellcheck source=../parallel_opts.sh +. "${BATS_TEST_DIRNAME}/../parallel_opts.sh" + +@test 'machinefile is generated from environment' { + SLURM_JOB_NAME="submit.slurm" + SLURM_NODELIST="cn[100-102]" + SLURM_TASKS_PER_NODE="2(x3)" + machinefile=$(machinefile) + write_machinefile + [[ $(cat "$machinefile") == "2/cn100 +2/cn101 +2/cn102" ]] + rm -f "$machinefile" +} + +@test 'prefix is generated from SLURM_JOB_NAME' { + unset SLURM_JOB_NAME + result=$(prefix) + [[ -z ${result} ]] + + # shellcheck disable=2034 + SLURM_JOB_NAME="submit.slurm" + result=$(prefix) + echo "${result}" + [[ ${result} == "submit" ]] +} + +@test 'SLURM tasks are paired with nodes' { + unset SLURM_NODELIST + result=$(cpu_host_array) + [[ -z "${result}" ]] + + SLURM_TASKS_PER_NODE="2(x3)" + SLURM_NODELIST="cn[100-102]" + result=$(cpu_host_array) + echo "${result}" + [[ -n "${result}" ]] + [[ "${result}" == "2/cn100 +2/cn101 +2/cn102" ]] +} + +@test 'SLURM host list is expanded' { + unset SLURM_TASKS_PER_NODE + result=$(expand_slurm_tasks_per_node) + [[ -z "${result}" ]] + + SLURM_TASKS_PER_NODE="2(x3)" + result=$(expand_slurm_tasks_per_node) + echo "${result}" + [[ -n "${result}" ]] + [[ ${result} == "2 2 2 " ]] + + SLURM_TASKS_PER_NODE="2,5(x1),3(x2)" + result=$(expand_slurm_tasks_per_node) + echo "${result}" + [[ -n "${result}" ]] + [[ ${result} == "2 5 3 3 " ]] +} + +@test 'SLURM environment is recognized' { + unset SLURM_JOB_ID + ! is_slurm_env + SLURM_JOB_ID=12345 + is_slurm_env +}