diff --git a/README.md b/README.md new file mode 100644 index 0000000..2d74b25 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# GNU Parallel setup for SLURM + +## Summary + +There is a little bit of setup work to get GNU Parallel to work with +the SLURM scheduler. Namely one has to: + +- Create an `*.sshloginfile` containing a list of hostnames + and CPU counts that have been assigned by SLURM. +- Export the environment, including the current directory. + +The `parallel-slurm-setup.sh` takes care of both these and provides an +environmental variable `$parallel` for you to run the parallel +executable with some sensible options. + +## Usage + +Clone this Git repository e.g. in your home directory: + +``` sh +# From the command-line +cd +git clone https://github.uconn.edu/HPC/parallel-slurm.git +``` + +Add the following 2 lines to your SLURM job submission file + +``` sh +# Inside your SLURM submission file +source ~/parallel-slurm/parallel-slurm-setup.sh +$parallel YOUR_PROGRAM ... +``` + +## Example + +See the `submit.slurm` example file. Run it using: + +``` sh +# From the command-line +sbatch submit.slurm +``` + +You should see the output of the compute node names in submit.out. +For example: + +``` sh +# Inside your submit.out +cn328 +cn327 +cn327 +cn328 +cn327 +``` diff --git a/parallel-slurm-setup.sh b/parallel-slurm-setup.sh new file mode 100644 index 0000000..15e884d --- /dev/null +++ b/parallel-slurm-setup.sh @@ -0,0 +1,66 @@ +# GNU Parallel setup for SLURM +# +# Author: Pariksheet Nanda 2016-2017 +# +# License: Public Domain / CC0 +# +# To the extent possible under law, Pariksheet Nanda has waived all +# copyright and related or neighboring rights to GNU Parallel setup +# for SLURM. + +# Load the modules +source /etc/profile.d/modules.sh +module load parallel + +# Allow export of environment using `--env` option +if [[ ! -e ~/.parallel/ignored_vars ]]; then + # Create an empty ignored_vars file to pass all the environment + # variables to the SSH instance + mkdir -p ~/.parallel + touch ~/.parallel/ignored_vars +fi + +# Create the machine file for this job +prefix=${SLURM_JOB_NAME%.*} +machine_file=${prefix}.sshloginfile + +function expand_slurm_tasks_per_node () { + local tasks + tasks=( $(echo "${SLURM_TASKS_PER_NODE}" | tr ',' ' ') ) + + local num count + for val in ${tasks[*]}; do + num="${val/(*)/}" + if [[ -z "${val%%*)}" ]]; then + count=$(echo $val | sed -E 's#[0-9]+\(x([0-9]+)\)#\1#') + else + count=1 + fi + printf "$num%.0s " $(seq $count) + done +} + +# Make list in the form of "cpu/host" +function cpu_host_array () { + local hostlist hosts cpus +# The SLURM `hostlist` executable is part of the Python PIP +# "python-hostlist" package + hostlist=/apps2/python/2.7.6-gcc/bin/hostlist + hosts=( $($hostlist -e ${SLURM_NODELIST}) ) + cpus=( $(expand_slurm_tasks_per_node) ) + for ((i=0; i<${#hosts[*]}; ++i)); do + echo ${cpus[i]}/${hosts[i]} + done +} +arr=( $(cpu_host_array) ) +printf "%s\n" ${arr[*]} > $machine_file + +# Pass default options to GNU Parallel +parallel="parallel + --env _ + --sshdelay 0.1 + --sshloginfile $machine_file + --workdir . +" +# --joblog ${prefix}.joblog +# --resume diff --git a/submit.slurm b/submit.slurm new file mode 100644 index 0000000..f98278c --- /dev/null +++ b/submit.slurm @@ -0,0 +1,11 @@ +#!/bin/bash -x +#SBATCH --nodes=2 +#SBATCH --ntasks=5 +#SBATCH --output=submit.out + +# Overwrite instead of appending to output file. +echo > submit.out + +# Print the name of each host that GNU Parallel is running on. +source parallel-slurm-setup.sh +$parallel -n0 hostname ::: $(seq $SLURM_NTASKS)