Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
ENH: Initial commit
  • Loading branch information
root committed Dec 26, 2017
0 parents commit e3db3d9
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .gitignore
@@ -0,0 +1,11 @@
# GNU parallel dependency.
apps/
src/

# Input file.
copy_input_dirs

# Output files.
logs/
copy.joblog
copy.log
3 changes: 3 additions & 0 deletions README
@@ -0,0 +1,3 @@
Create full backup of gpfs2 at /gpfs/scratchfs1/gpfs2_full_backup/

Run `copy` to do this.
107 changes: 107 additions & 0 deletions copy
@@ -0,0 +1,107 @@
# -*- mode: sh;-*-

# Copy all gpfs2 home and shared data to /scratch/gpfs2_full_backup.

# Global variables
#-----------------
file_dirlist=./copy_input_dirs
file_sentinel=ORIG_COPY_TO_SCRATCH_COMPLETED
dir_logs=logs
log=copy.log
joblog=copy.joblog
np=6 # parallel rsync processes to run.
prefix_src=/gpfs/gpfs2
prefix_dest=/gpfs/scratchfs1/gpfs2_full_backup
parallel=apps/bin/parallel
simulate= # test without copying.

# Functions
#----------

# Generate file with list of directories to be copied. This is
# necessary to "parallelize" rsync.
create_dirlist() {
# Find does not order directories, so do not generate the file if
# it already exists to preserve the copying order and support
# predictable resuming.
if ! [[ -f $file_list ]]; then
find $prefix_src/{home,shared} \
-mindepth 1 \
-maxdepth 1 \
-type d \
> $file_dirlist
fi
# Ensure target sub-directories exist with same ownership,
# permissions and modification times as source sub-directories.
for dir in home shared; do
src=$prefix_src/$dir
dest=$prefix_dest/$dir
mkdir -p $dest
chown --reference=$src $dest
chmod --reference=$src $dest
touch --reference=$src $dest
done
}

# Prefix timestamp before "echo" message
msg() {
echo "[$(date '+%Y-%m-%dT%H:%M:%S')] $1"
}

# Copy a single directory.
copy_dir() {
local src dest
src=$1
dest=${src/#$prefix_src/$prefix_dest}

msg "Started copying directory $src -> $dest"
if ! [[ -z "$simulate" ]]; then
sleep $(( ( $RANDOM % 3 ) + 1 ))s
else
path_sentinel=$dest/$file_sentinel
if [[ -f $path_sentinel ]]; then
msg "Skipping already copied directory $dest"
else
rsync -aX $src $dest 2>&1
touch $path_sentinel
fi
fi
msg "Finished copying directory $src -> $dest"
}

# Use GNU parallel to parallelize copying instead of xargs, because
# GNU parallel has nicer support for running exported bash functions
# which allows us to have a single, self-contained script.
copy_dirs() {
# Export bash functions and variables for GNU parallel to inherit
# with --env.
export -f copy_dir msg
export simulate prefix_src prefix_dest file_sentinel
# Use --line-buffer so that output lines don't split.
cat $file_dirlist |
$parallel \
--will-cite \
--env \
--line-buffer \
--max-args 1 \
--max-procs $np \
--output-as-files \
--results $dir_logs/{} \
--eta \
--joblog $joblog \
copy_dir
}

# Main
#-----

main() {
msg "---"
msg "BEGIN copy"
create_dirlist
copy_dirs
msg "END copy"
}

# Boilerplate for running `main()`.
[[ "$0" != "$BASH_SOURCE" ]] || main "$@" #|& tee -a $log

0 comments on commit e3db3d9

Please sign in to comment.