diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..70cf1ae --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# GNU parallel dependency. +apps/ +src/ + +# Input file. +copy_input_dirs + +# Output files. +logs/ +copy.joblog +copy.log diff --git a/README b/README new file mode 100644 index 0000000..15c78f1 --- /dev/null +++ b/README @@ -0,0 +1,3 @@ +Create full backup of gpfs2 at /gpfs/scratchfs1/gpfs2_full_backup/ + +Run `copy` to do this. diff --git a/copy b/copy new file mode 100755 index 0000000..122eb44 --- /dev/null +++ b/copy @@ -0,0 +1,107 @@ +# -*- mode: sh;-*- + +# Copy all gpfs2 home and shared data to /scratch/gpfs2_full_backup. + +# Global variables +#----------------- +file_dirlist=./copy_input_dirs +file_sentinel=ORIG_COPY_TO_SCRATCH_COMPLETED +dir_logs=logs +log=copy.log +joblog=copy.joblog +np=6 # parallel rsync processes to run. +prefix_src=/gpfs/gpfs2 +prefix_dest=/gpfs/scratchfs1/gpfs2_full_backup +parallel=apps/bin/parallel +simulate= # test without copying. + +# Functions +#---------- + +# Generate file with list of directories to be copied. This is +# necessary to "parallelize" rsync. +create_dirlist() { + # Find does not order directories, so do not generate the file if + # it already exists to preserve the copying order and support + # predictable resuming. + if ! [[ -f $file_list ]]; then + find $prefix_src/{home,shared} \ + -mindepth 1 \ + -maxdepth 1 \ + -type d \ + > $file_dirlist + fi + # Ensure target sub-directories exist with same ownership, + # permissions and modification times as source sub-directories. + for dir in home shared; do + src=$prefix_src/$dir + dest=$prefix_dest/$dir + mkdir -p $dest + chown --reference=$src $dest + chmod --reference=$src $dest + touch --reference=$src $dest + done +} + +# Prefix timestamp before "echo" message +msg() { + echo "[$(date '+%Y-%m-%dT%H:%M:%S')] $1" +} + +# Copy a single directory. +copy_dir() { + local src dest + src=$1 + dest=${src/#$prefix_src/$prefix_dest} + + msg "Started copying directory $src -> $dest" + if ! [[ -z "$simulate" ]]; then + sleep $(( ( $RANDOM % 3 ) + 1 ))s + else + path_sentinel=$dest/$file_sentinel + if [[ -f $path_sentinel ]]; then + msg "Skipping already copied directory $dest" + else + rsync -aX $src $dest 2>&1 + touch $path_sentinel + fi + fi + msg "Finished copying directory $src -> $dest" +} + +# Use GNU parallel to parallelize copying instead of xargs, because +# GNU parallel has nicer support for running exported bash functions +# which allows us to have a single, self-contained script. +copy_dirs() { + # Export bash functions and variables for GNU parallel to inherit + # with --env. + export -f copy_dir msg + export simulate prefix_src prefix_dest file_sentinel + # Use --line-buffer so that output lines don't split. + cat $file_dirlist | + $parallel \ + --will-cite \ + --env \ + --line-buffer \ + --max-args 1 \ + --max-procs $np \ + --output-as-files \ + --results $dir_logs/{} \ + --eta \ + --joblog $joblog \ + copy_dir +} + +# Main +#----- + +main() { + msg "---" + msg "BEGIN copy" + create_dirlist + copy_dirs + msg "END copy" +} + +# Boilerplate for running `main()`. +[[ "$0" != "$BASH_SOURCE" ]] || main "$@" #|& tee -a $log