Skip to content
Permalink
Browse files

BUG: Fix h2o launch waiter

  • Loading branch information
pan14001 committed Jun 23, 2017
1 parent cc8544b commit 9c53962e6a615624afb700692d970ffbed518228
Showing with 10 additions and 3 deletions.
  1. +10 −3 dietslurm-network.sh
@@ -129,7 +129,7 @@ java \
elapsed_sec=0
increment=1
while [[ $elapsed_sec -le $timeout ]]; do
jobs -r %+ || break
jobs %2 || break
sleep $increment
elapsed_sec=$(( $elapsed_sec + $increment ))
done
@@ -148,14 +148,21 @@ See output in ${file_out}"
# Output file from h2o cluster.
# Returns: None
wait_till_h2o_running() {
local file_out magic_string
local file_out magic_string node_count
file_out="$1"
magic_string="Cloud of size ${SLURM_JOB_NUM_NODES} formed"
node_count=0

# Wait until we see the number of tasks. See https://superuser.com/a/449307
tail -f ${file_out} | while read LINE
do
[[ "${LINE}" == *"$magic_string"* ]] && pkill -P $$ tail
# Wait for all nodes to register as connected.
if [[ "${LINE}" == *"$magic_string"* ]]; then
node_count=$(( $node_count + 1 ))
if [[ ${node_count} -eq ${SLURM_JOB_NUM_NODES} ]]; then
pkill -f "tail -f ${file_out}"
fi
fi
done
}

0 comments on commit 9c53962

Please sign in to comment.
You can’t perform that action at this time.