ENH: Initial commit

HPC · Jan 19, 2017 · ec362b0 · ec362b0
commit ec362b0
Show file tree

Hide file tree

Showing 3 changed files with 130 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,53 @@
+# GNU Parallel setup for SLURM
+
+## Summary
+
+There is a little bit of setup work to get GNU Parallel to work with
+the SLURM scheduler.  Namely one has to:
+
+- Create an `*.sshloginfile` containing a list of hostnames
+  and CPU counts that have been assigned by SLURM.
+- Export the environment, including the current directory.
+
+The `parallel-slurm-setup.sh` takes care of both these and provides an
+environmental variable `$parallel` for you to run the parallel
+executable with some sensible options.
+
+## Usage
+
+Clone this Git repository e.g. in your home directory:
+
+``` sh
+# From the command-line
+cd
+git clone https://github.uconn.edu/HPC/parallel-slurm.git
+```
+
+Add the following 2 lines to your SLURM job submission file
+
+``` sh
+# Inside your SLURM submission file
+source ~/parallel-slurm/parallel-slurm-setup.sh
+$parallel YOUR_PROGRAM ...
+```
+
+## Example
+
+See the `submit.slurm` example file.  Run it using:
+
+``` sh
+# From the command-line
+sbatch submit.slurm
+```
+
+You should see the output of the compute node names in submit.out.
+For example:
+
+``` sh
+# Inside your submit.out
+cn328
+cn327
+cn327
+cn328
+cn327
+```
diff --git a/parallel-slurm-setup.sh b/parallel-slurm-setup.sh
@@ -0,0 +1,66 @@
+# GNU Parallel setup for SLURM
+# 
+# Author: Pariksheet Nanda <hpc@uconn.edu> 2016-2017
+#
+# License: Public Domain / CC0
+#
+# To the extent possible under law, Pariksheet Nanda has waived all
+# copyright and related or neighboring rights to GNU Parallel setup
+# for SLURM.
+
+# Load the modules
+source /etc/profile.d/modules.sh
+module load parallel
+
+# Allow export of environment using `--env` option
+if [[ ! -e ~/.parallel/ignored_vars ]]; then
+    # Create an empty ignored_vars file to pass all the environment
+    # variables to the SSH instance
+    mkdir -p ~/.parallel
+    touch ~/.parallel/ignored_vars
+fi
+
+# Create the machine file for this job
+prefix=${SLURM_JOB_NAME%.*}
+machine_file=${prefix}.sshloginfile
+
+function expand_slurm_tasks_per_node () {
+    local tasks
+    tasks=( $(echo "${SLURM_TASKS_PER_NODE}" | tr ',' ' ') )
+
+    local num count
+    for val in ${tasks[*]}; do
+	num="${val/(*)/}"
+	if [[ -z "${val%%*)}" ]]; then
+	    count=$(echo $val | sed -E 's#[0-9]+\(x([0-9]+)\)#\1#')
+	else
+	    count=1
+	fi
+	printf "$num%.0s " $(seq $count)
+    done
+}
+
+# Make list in the form of "cpu/host"
+function cpu_host_array () {
+    local hostlist hosts cpus
+# The SLURM `hostlist` executable is part of the Python PIP
+# "python-hostlist" package
+    hostlist=/apps2/python/2.7.6-gcc/bin/hostlist
+    hosts=( $($hostlist -e ${SLURM_NODELIST}) )
+    cpus=( $(expand_slurm_tasks_per_node) )
+    for ((i=0; i<${#hosts[*]}; ++i)); do
+	echo ${cpus[i]}/${hosts[i]}
+    done
+}
+arr=( $(cpu_host_array) )
+printf "%s\n" ${arr[*]} > $machine_file
+
+# Pass default options to GNU Parallel
+parallel="parallel
+ --env _
+ --sshdelay 0.1
+ --sshloginfile $machine_file
+ --workdir .
+"
+# --joblog ${prefix}.joblog
+# --resume
diff --git a/submit.slurm b/submit.slurm
@@ -0,0 +1,11 @@
+#!/bin/bash -x
+#SBATCH --nodes=2          
+#SBATCH --ntasks=5
+#SBATCH --output=submit.out
+
+# Overwrite instead of appending to output file.
+echo > submit.out
+
+# Print the name of each host that GNU Parallel is running on.
+source parallel-slurm-setup.sh
+$parallel -n0 hostname ::: $(seq $SLURM_NTASKS)