From 620a1d7d1667d888ad6f142846b3df2a5d1660c4 Mon Sep 17 00:00:00 2001 From: Pariksheet Nanda Date: Mon, 20 Mar 2017 13:25:14 -0400 Subject: [PATCH] Initial commit --- aliases-functions/job-aliases.csh | 3 ++ aliases-functions/job-aliases.sh | 3 ++ aliases-functions/stail | 47 ++++++++++++++++++++++++ aliases-functions/watch-sbatch.sh | 44 ++++++++++++++++++++++ install-apps/R-commands.R | 7 ++++ install-apps/R-example | 5 +++ install-apps/python-example | 5 +++ slurm-troubleshooting/bash-debug.slurm | 7 ++++ slurm-troubleshooting/bash-fail.slurm | 6 +++ slurm-troubleshooting/bash-pending.slurm | 6 +++ slurm-troubleshooting/bash-success.slurm | 6 +++ 11 files changed, 139 insertions(+) create mode 100644 aliases-functions/job-aliases.csh create mode 100644 aliases-functions/job-aliases.sh create mode 100755 aliases-functions/stail create mode 100644 aliases-functions/watch-sbatch.sh create mode 100644 install-apps/R-commands.R create mode 100644 install-apps/R-example create mode 100644 install-apps/python-example create mode 100644 slurm-troubleshooting/bash-debug.slurm create mode 100644 slurm-troubleshooting/bash-fail.slurm create mode 100644 slurm-troubleshooting/bash-pending.slurm create mode 100644 slurm-troubleshooting/bash-success.slurm diff --git a/aliases-functions/job-aliases.csh b/aliases-functions/job-aliases.csh new file mode 100644 index 0000000..23db949 --- /dev/null +++ b/aliases-functions/job-aliases.csh @@ -0,0 +1,3 @@ +alias job-cpu 'sacct -o jobid,jobname,ncpus,elapsed,mincpu,cputime%12 -j' +alias job-disk 'sacct -o jobid,jobname,elapsed,maxdiskread,maxdiskwrite -j' +alias job-mem 'sacct -o jobid,jobname,maxvmsize,maxrss,averss,maxpages -j' diff --git a/aliases-functions/job-aliases.sh b/aliases-functions/job-aliases.sh new file mode 100644 index 0000000..02037bf --- /dev/null +++ b/aliases-functions/job-aliases.sh @@ -0,0 +1,3 @@ +alias job-cpu='sacct -o jobid,jobname,ncpus,elapsed,mincpu,cputime%12 -j' +alias job-disk='sacct -o jobid,jobname,elapsed,maxdiskread,maxdiskwrite -j' +alias job-mem='sacct -o jobid,jobname,maxvmsize,maxrss,averss,maxpages -j' diff --git a/aliases-functions/stail b/aliases-functions/stail new file mode 100755 index 0000000..2c1f2c8 --- /dev/null +++ b/aliases-functions/stail @@ -0,0 +1,47 @@ +#!/bin/csh + + # Validate input. + if ($# < 1) then + echo "Usage: $argv[0] SLURM_SUBMISSION_FILE" + exit 1 + else if (! -e $argv[1]) then + echo "Usage: $argv[0] SLURM_SUBMISSION_FILE" + exit 1 + endif + # Submit job using `sbatch`. + set sbatch_out = `sbatch $argv[1]` + # Exit if submission is unsuccessful. + if (-z "$sbatch_out") then + exit 1 + endif + echo $sbatch_out + # Grab job metadata. + set job_id = `echo $sbatch_out | awk '{ print $NF }'` + set out_file = `scontrol -ao show job $job_id | sed -E 's#.* StdOut=([^ ]+).*#\1#'` + # Wait for the job to start running or fail. + set message = "Waiting for job $job_id" + foreach i (`seq 1 120`) + set state = `\sacct -PXnj $job_id -o state` + if ("$state" == "PENDING") then + if ("$message" != "") then + echo "$message" + set message = "" + endif + else if ("$state" == "RUNNING") then + echo "Job $job_id is running. Showing output file $out_file (hit Ctrl + C to exit):\ +" + tail -f $out_file + break + else if ("$state" == "FAILED") then + echo "Error: Job $job_id failed. Showing end of output file $out_file :\ +" + tail $out_file + break + else if ("$state" == "COMPLETED") then + echo "Job $job_id completed. Showing end of output file $out_file :\ +" + tail $out_file + break + endif + sleep 1 + end diff --git a/aliases-functions/watch-sbatch.sh b/aliases-functions/watch-sbatch.sh new file mode 100644 index 0000000..49f01d6 --- /dev/null +++ b/aliases-functions/watch-sbatch.sh @@ -0,0 +1,44 @@ +function stail() { + # Validate input. + if ! [[ -e "$1" ]]; then + echo "Usage: ${FUNCNAME} SLURM_SUBMISSION_FILE" + return 1 + fi + # Submit job using `sbatch`. + sbatch_out=$(sbatch $1) + # Exit if submission is unsuccessful. + if [[ -z "$sbatch_out" ]]; then + return 1 + fi + echo $sbatch_out + # Grab job metadata. + job_id=${sbatch_out##* } + out_file=$(scontrol -ao show job $job_id | sed -E 's#.* StdOut=([^ ]+).*#\1#') + # Wait for the job to start running or fail. + message="Waiting for job $job_id" + for i in {1..120}; do + state=$(command sacct -PXnj $job_id -o state) + if [[ "$state" == "PENDING" ]]; then + if ! [[ -z "$message" ]]; then + echo "$message" + message= + fi + elif [[ "$state" == "RUNNING" ]]; then + echo "Job $job_id is running. Showing output file $out_file (hit Ctrl + C to exit): +" + tail -f $out_file + break + elif [[ "$state" == "FAILED" ]]; then + echo "Error: Job $job_id failed. Showing end of output file $out_file : +" + tail $out_file + break + elif [[ "$state" == "COMPLETED" ]]; then + echo "Job $job_id completed. Showing end of output file $out_file : +" + tail $out_file + break + fi + sleep 1 + done +} diff --git a/install-apps/R-commands.R b/install-apps/R-commands.R new file mode 100644 index 0000000..d2cf188 --- /dev/null +++ b/install-apps/R-commands.R @@ -0,0 +1,7 @@ +# The usual `install.packages("rPython")` doesn't work because we also +# need to interactively at least once tell R to use the user library +# and to select a mirror. + +dir.create(Sys.getenv("R_LIBS_USER"), recursive = TRUE) +.libPaths(Sys.getenv("R_LIBS_USER")) +install.packages("rPython", repos = "http://cran.us.r-project.org") diff --git a/install-apps/R-example b/install-apps/R-example new file mode 100644 index 0000000..428c7cf --- /dev/null +++ b/install-apps/R-example @@ -0,0 +1,5 @@ +source /etc/profile.d/modules.sh # Needed for bash +module purge +module load r/3.1.1 +Rscript R-commands.R +# Then we will setup the paths diff --git a/install-apps/python-example b/install-apps/python-example new file mode 100644 index 0000000..ae05524 --- /dev/null +++ b/install-apps/python-example @@ -0,0 +1,5 @@ +source /etc/profile.d/modules.sh # Needed for bash +module purge +module load python/2.7.6 +pip install --user cutadapt +# Then we will setup the paths diff --git a/slurm-troubleshooting/bash-debug.slurm b/slurm-troubleshooting/bash-debug.slurm new file mode 100644 index 0000000..abd1603 --- /dev/null +++ b/slurm-troubleshooting/bash-debug.slurm @@ -0,0 +1,7 @@ +#!/bin/bash -x +# Submit a 1 minute job. +#SBATCH --partition=phi +#SBATCH --time=1:00 + +scontrol show job $SLURM_JOB_ID +hostname diff --git a/slurm-troubleshooting/bash-fail.slurm b/slurm-troubleshooting/bash-fail.slurm new file mode 100644 index 0000000..3e68cdc --- /dev/null +++ b/slurm-troubleshooting/bash-fail.slurm @@ -0,0 +1,6 @@ +#!/bin /bash +# Submit a 1 minute job. +#SBATCH --partition=phi +#SBATCH --time=1:00 + +hostname diff --git a/slurm-troubleshooting/bash-pending.slurm b/slurm-troubleshooting/bash-pending.slurm new file mode 100644 index 0000000..9db0acd --- /dev/null +++ b/slurm-troubleshooting/bash-pending.slurm @@ -0,0 +1,6 @@ +#!/bin/bash +# Submit a 1 hour job. +#SBATCH --partition=debug +#SBATCH --time=1:00:00 + +hostname diff --git a/slurm-troubleshooting/bash-success.slurm b/slurm-troubleshooting/bash-success.slurm new file mode 100644 index 0000000..2b7efa1 --- /dev/null +++ b/slurm-troubleshooting/bash-success.slurm @@ -0,0 +1,6 @@ +#!/bin/bash +# Submit a 1 minute job. +#SBATCH --partition=phi +#SBATCH --time=1:00 + +hostname