Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
executable file 224 lines (171 sloc) 6.68 KB
#!/usr/bin/python
from __future__ import print_function
import sys, time, getopt, subprocess
import ClusterShell.NodeSet
SACCT = "/gpfs/gpfs1/slurm/bin/sacct"
NODESET = "/usr/bin/nodeset"
SCONTROL = "/gpfs/gpfs1/slurm/bin/scontrol"
VERBOSE = False
def Usage(msg=None):
print("""
Usage: reserve_idle [-ax] -v RESERVATION_NAME
Usage: reserve_idle -s
Reserve idle nodes that do not belong to a preemptable partition.
Reservation is named RESERVATION_NAME.
Reservation expires in ten minutes (to extend, see OTHER USEFUL COMMANDS below)
OPTIONS
-v Verbose
-s Show current reservations and quit
-m MAXCNT Only reserve this many nodes
-a NODESTR Only check idle nodes in NODESTR. Cannot combine with -x
-x NODESTR Do not include NODESTR in idle nodes. Cannot combine with -a
OTHER USEFUL COMMANDS:
- To show current reservations
scontrol show reservations
- To remove the reservation before it expires, run
scontrol delete reservation=RESERVATION_NAME
- To extend the time limit for current reservation to 2 hours
scontrol update reservation reservation=RESERVATION_NAME duration=02:00:00
""")
if msg:
print()
print(" ",msg)
print()
sys.exit()
# Used for Python < 2.7
def _check_output(command,shell=False):
if type(command)==type(''):
command = command.split()
p = subprocess.Popen(command, stdout=subprocess.PIPE)
return p.communicate()[0]
def print_verbose(verbose,*msgs):
if verbose:
tstamp = time.strftime("%H:%M:%S")
print("%s: %s" % (tstamp," ".join(msgs)))
def get_idle_nodes():
# If integer %256 is too small, then nodelist string will not be well-formed
# and the program will throw and exception at the statment below
# ClusterShell.NodeSet.NodeSet(nodestr)
nodelist = _check_output([SACCT, '-anX', '-sR', '-o', 'nodelist%256']).split()
nodestr = ",".join(nodelist)
print_verbose(VERBOSE, "Nodestr is %s" % nodestr)
compute = ClusterShell.NodeSet.NodeSet("@compute")
preempt = ClusterShell.NodeSet.NodeSet("@preempt")
active = ClusterShell.NodeSet.NodeSet(nodestr)
idle = compute - active - preempt
return idle
# Remove nodes described by string 'nodestr' from nodes
def remove_nodes(nodes, nodestr):
remove_these = ClusterShell.NodeSet.NodeSet(nodestr)
return nodes - remove_these
# Return nodes that appear in both nodes and string 'nodestr'
def mask_nodes(nodes, nodestr):
mask_nodes = ClusterShell.NodeSet.NodeSet(nodestr)
return nodes.intersection(mask_nodes)
# Get list of nodes that are currently reserved on the system
def get_reserved_nodes():
reservations = _check_output([SCONTROL, 'show', 'reservations', '-o'])
nodes = []
for reservation in reservations.split("\n"):
reservation = reservation.rstrip()
if not reservation: continue
pos1 = reservation.find(" Nodes=")+7
if pos1>=0:
pos2 = reservation.find(" ", pos1+1)
if pos2==-1:
pos2 = len(reservation)
nodes.append( reservation[pos1:pos2] )
nodestr = ','.join(nodes)
return ClusterShell.NodeSet.NodeSet(nodestr)
def show_reservations():
cmd = "%s show reservations" % SCONTROL
output = _check_output(cmd)
return output
def does_reservation_exist(name):
cmd = "%s show reservationname=%s" % (SCONTROL, name)
output = _check_output(cmd)
return not output.rstrip().endswith('not found')
def reserve_nodes(name,nodes):
cmd = "%s create reservation reservationname=%s account=root flags=ignore_jobs starttime=now duration=00:10:00 nodes=%s" % (
SCONTROL, name, nodes)
_check_output(cmd)
def update_reservation(name,nodes):
cmd = "%s update reservation reservationname=%s nodes=%s" % (SCONTROL, name, nodes)
_check_output(cmd)
def keep_first(n,nodes):
new_nodes = ClusterShell.NodeSet.NodeSet()
for i,node in enumerate(nodes.nsiter()):
if i>=n: break
new_nodes.update(node)
return new_nodes
def get_opts_args(argsin):
opts, args = getopt.getopt(argsin,'svm:a:x:')
opts = dict(opts)
if (not len(args)==1) and (not '-s' in opts): Usage()
if '-a' in opts and '-x' in opts: Usage("ERROR: You cannot use both -a and -x")
return opts, args
def main():
global VERBOSE
opts, args = get_opts_args(sys.argv[1:])
# Show reserations and quit
if '-s' in opts:
print()
print(show_reservations())
print()
sys.exit()
VERBOSE = '-v' in opts
try:
maxcnt = int(opts['-m'])
except KeyError, ValueError:
maxcnt = None
print_verbose(VERBOSE, "Starting ...")
name = args[0]
# Does reservation already exist?
if does_reservation_exist(name):
print("\n ERROR: Reservation with name '%s' already exists\n" % name)
sys.exit()
# Get list of idle nodes
idle1_nodes = get_idle_nodes()
# Remove nodes already in reservations
idle1_nodes -= get_reserved_nodes()
print_verbose(VERBOSE,"Initial set of idle nodes (%d):" % len(idle1_nodes), str(idle1_nodes) )
# Remove nodes
if '-x' in opts:
idle1_nodes = remove_nodes(idle1_nodes, opts['-x'])
print_verbose(VERBOSE,"First idle nodes after excludsion (%d): " % len(idle1_nodes) , str(idle1_nodes) )
if '-a' in opts:
idle1_nodes = mask_nodes(idle1_nodes, opts['-a'])
print_verbose(VERBOSE,"First idle nodes after map (%d): " % len(idle1_nodes) , str(idle1_nodes) )
if len(idle1_nodes)==0:
print()
print("ERROR: There are no idle nodes available")
print()
sys.exit()
# Reserve idle nodes
reserve_nodes(name,idle1_nodes)
# Get updated list of idle nodes - in case some nodes reactived before they could be reserved
idle2_nodes = get_idle_nodes()
freshly_active = idle1_nodes - idle2_nodes
print_verbose(VERBOSE,"Freshly activiated nodes (%d): " % len(freshly_active), str(freshly_active) )
# Only keep idle nodes that are in original list and updated list
still_idle_nodes = idle1_nodes and idle2_nodes
# Reduce number of nodes
print_verbose(VERBOSE,"Still idle nodes (%d): " % len(still_idle_nodes) , str(still_idle_nodes) )
# Remove nodes
if '-x' in opts:
still_idle_nodes = remove_nodes(still_idle_nodes, opts['-x'])
print_verbose(VERBOSE,"Still idle nodes after excludsion (%d): " % len(still_idle_nodes) , str(still_idle_nodes) )
if '-a' in opts:
still_idle_nodes = mask_nodes(still_idle_nodes, opts['-a'])
print_verbose(VERBOSE,"Still idle nodes after map (%d): " % len(still_idle_nodes) , str(still_idle_nodes) )
# Reduce number of nodes if requested
if maxcnt:
still_idle_nodes = keep_first(maxcnt, still_idle_nodes)
print_verbose(VERBOSE,"Reduced still idle nodes (%d)" % len(still_idle_nodes), str(still_idle_nodes))
# Make final reservation if idle nodes have changed
if not still_idle_nodes==idle1_nodes:
update_reservation(name,still_idle_nodes)
# Print list of newly idle nodes
print(str(still_idle_nodes))
if __name__=="__main__":
main()