Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 107 lines (96 sloc) 2.61 KB
# -*- mode: sh;-*-
# Copy all gpfs2 home and shared data to /scratch/gpfs2_full_backup.
# Global variables
#-----------------
file_dirlist=./copy_input_dirs
file_sentinel=ORIG_COPY_TO_SCRATCH_COMPLETED
dir_logs=logs
log=copy.log
joblog=copy.joblog
np=6 # parallel rsync processes to run.
prefix_src=/gpfs/gpfs2
prefix_dest=/gpfs/scratchfs1/gpfs2_full_backup
parallel=apps/bin/parallel
simulate= # test without copying.
# Functions
#----------
# Generate file with list of directories to be copied. This is
# necessary to "parallelize" rsync.
create_dirlist() {
# Find does not order directories, so do not generate the file if
# it already exists to preserve the copying order and support
# predictable resuming.
if ! [[ -f $file_list ]]; then
find $prefix_src/{home,shared} \
-mindepth 1 \
-maxdepth 1 \
-type d \
> $file_dirlist
fi
# Ensure target sub-directories exist with same ownership,
# permissions and modification times as source sub-directories.
for dir in home shared; do
src=$prefix_src/$dir
dest=$prefix_dest/$dir
mkdir -p $dest
chown --reference=$src $dest
chmod --reference=$src $dest
touch --reference=$src $dest
done
}
# Prefix timestamp before "echo" message
msg() {
echo "[$(date '+%Y-%m-%dT%H:%M:%S')] $1"
}
# Copy a single directory.
copy_dir() {
local src dest
src=$1
dest=${src/#$prefix_src/$prefix_dest}
msg "Started copying directory $src -> $dest"
if ! [[ -z "$simulate" ]]; then
sleep $(( ( $RANDOM % 3 ) + 1 ))s
else
path_sentinel=$dest/$file_sentinel
if [[ -f $path_sentinel ]]; then
msg "Skipping already copied directory $dest"
else
rsync -aX $src $dest 2>&1
touch $path_sentinel
fi
fi
msg "Finished copying directory $src -> $dest"
}
# Use GNU parallel to parallelize copying instead of xargs, because
# GNU parallel has nicer support for running exported bash functions
# which allows us to have a single, self-contained script.
copy_dirs() {
# Export bash functions and variables for GNU parallel to inherit
# with --env.
export -f copy_dir msg
export simulate prefix_src prefix_dest file_sentinel
# Use --line-buffer so that output lines don't split.
cat $file_dirlist |
$parallel \
--will-cite \
--env \
--line-buffer \
--max-args 1 \
--max-procs $np \
--output-as-files \
--results $dir_logs/{} \
--eta \
--joblog $joblog \
copy_dir
}
# Main
#-----
main() {
msg "---"
msg "BEGIN copy"
create_dirlist
copy_dirs
msg "END copy"
}
# Boilerplate for running `main()`.
[[ "$0" != "$BASH_SOURCE" ]] || main "$@" #|& tee -a $log