Skip to content
Permalink
b93db79cf4
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 224 lines (171 sloc) 6.68 KB
#!/usr/bin/python
from __future__ import print_function
import sys, time, getopt, subprocess
import ClusterShell.NodeSet
SACCT = "/gpfs/gpfs1/slurm/bin/sacct"
NODESET = "/usr/bin/nodeset"
SCONTROL = "/gpfs/gpfs1/slurm/bin/scontrol"
VERBOSE = False
def Usage(msg=None):
print("""
Usage: reserve_idle [-ax] -v RESERVATION_NAME
Usage: reserve_idle -s
Reserve idle nodes that do not belong to a preemptable partition.
Reservation is named RESERVATION_NAME.
Reservation expires in ten minutes (to extend, see OTHER USEFUL COMMANDS below)
OPTIONS
-v Verbose
-s Show current reservations and quit
-m MAXCNT Only reserve this many nodes
-a NODESTR Only check idle nodes in NODESTR. Cannot combine with -x
-x NODESTR Do not include NODESTR in idle nodes. Cannot combine with -a
OTHER USEFUL COMMANDS:
- To show current reservations
scontrol show reservations
- To remove the reservation before it expires, run
scontrol delete reservation=RESERVATION_NAME
- To extend the time limit for current reservation to 2 hours
scontrol update reservation reservation=RESERVATION_NAME duration=02:00:00
""")
if msg:
print()
print(" ",msg)
print()
sys.exit()
# Used for Python < 2.7
def _check_output(command,shell=False):
if type(command)==type(''):
command = command.split()
p = subprocess.Popen(command, stdout=subprocess.PIPE)
return p.communicate()[0]
def print_verbose(verbose,*msgs):
if verbose:
tstamp = time.strftime("%H:%M:%S")
print("%s: %s" % (tstamp," ".join(msgs)))
def get_idle_nodes():
# If integer %256 is too small, then nodelist string will not be well-formed
# and the program will throw and exception at the statment below
# ClusterShell.NodeSet.NodeSet(nodestr)
nodelist = _check_output([SACCT, '-anX', '-sR', '-o', 'nodelist%256']).split()
nodestr = ",".join(nodelist)
print_verbose(VERBOSE, "Nodestr is %s" % nodestr)
compute = ClusterShell.NodeSet.NodeSet("@compute")
preempt = ClusterShell.NodeSet.NodeSet("@preempt")
active = ClusterShell.NodeSet.NodeSet(nodestr)
idle = compute - active - preempt
return idle
# Remove nodes described by string 'nodestr' from nodes
def remove_nodes(nodes, nodestr):
remove_these = ClusterShell.NodeSet.NodeSet(nodestr)
return nodes - remove_these
# Return nodes that appear in both nodes and string 'nodestr'
def mask_nodes(nodes, nodestr):
mask_nodes = ClusterShell.NodeSet.NodeSet(nodestr)
return nodes.intersection(mask_nodes)
# Get list of nodes that are currently reserved on the system
def get_reserved_nodes():
reservations = _check_output([SCONTROL, 'show', 'reservations', '-o'])
nodes = []
for reservation in reservations.split("\n"):
reservation = reservation.rstrip()
if not reservation: continue
pos1 = reservation.find(" Nodes=")+7
if pos1>=0:
pos2 = reservation.find(" ", pos1+1)
if pos2==-1:
pos2 = len(reservation)
nodes.append( reservation[pos1:pos2] )
nodestr = ','.join(nodes)
return ClusterShell.NodeSet.NodeSet(nodestr)
def show_reservations():
cmd = "%s show reservations" % SCONTROL
output = _check_output(cmd)
return output
def does_reservation_exist(name):
cmd = "%s show reservationname=%s" % (SCONTROL, name)
output = _check_output(cmd)
return not output.rstrip().endswith('not found')
def reserve_nodes(name,nodes):
cmd = "%s create reservation reservationname=%s account=root flags=ignore_jobs starttime=now duration=00:10:00 nodes=%s" % (
SCONTROL, name, nodes)
_check_output(cmd)
def update_reservation(name,nodes):
cmd = "%s update reservation reservationname=%s nodes=%s" % (SCONTROL, name, nodes)
_check_output(cmd)
def keep_first(n,nodes):
new_nodes = ClusterShell.NodeSet.NodeSet()
for i,node in enumerate(nodes.nsiter()):
if i>=n: break
new_nodes.update(node)
return new_nodes
def get_opts_args(argsin):
opts, args = getopt.getopt(argsin,'svm:a:x:')
opts = dict(opts)
if (not len(args)==1) and (not '-s' in opts): Usage()
if '-a' in opts and '-x' in opts: Usage("ERROR: You cannot use both -a and -x")
return opts, args
def main():
global VERBOSE
opts, args = get_opts_args(sys.argv[1:])
# Show reserations and quit
if '-s' in opts:
print()
print(show_reservations())
print()
sys.exit()
VERBOSE = '-v' in opts
try:
maxcnt = int(opts['-m'])
except KeyError, ValueError:
maxcnt = None
print_verbose(VERBOSE, "Starting ...")
name = args[0]
# Does reservation already exist?
if does_reservation_exist(name):
print("\n ERROR: Reservation with name '%s' already exists\n" % name)
sys.exit()
# Get list of idle nodes
idle1_nodes = get_idle_nodes()
# Remove nodes already in reservations
idle1_nodes -= get_reserved_nodes()
print_verbose(VERBOSE,"Initial set of idle nodes (%d):" % len(idle1_nodes), str(idle1_nodes) )
# Remove nodes
if '-x' in opts:
idle1_nodes = remove_nodes(idle1_nodes, opts['-x'])
print_verbose(VERBOSE,"First idle nodes after excludsion (%d): " % len(idle1_nodes) , str(idle1_nodes) )
if '-a' in opts:
idle1_nodes = mask_nodes(idle1_nodes, opts['-a'])
print_verbose(VERBOSE,"First idle nodes after map (%d): " % len(idle1_nodes) , str(idle1_nodes) )
if len(idle1_nodes)==0:
print()
print("ERROR: There are no idle nodes available")
print()
sys.exit()
# Reserve idle nodes
reserve_nodes(name,idle1_nodes)
# Get updated list of idle nodes - in case some nodes reactived before they could be reserved
idle2_nodes = get_idle_nodes()
freshly_active = idle1_nodes - idle2_nodes
print_verbose(VERBOSE,"Freshly activiated nodes (%d): " % len(freshly_active), str(freshly_active) )
# Only keep idle nodes that are in original list and updated list
still_idle_nodes = idle1_nodes and idle2_nodes
# Reduce number of nodes
print_verbose(VERBOSE,"Still idle nodes (%d): " % len(still_idle_nodes) , str(still_idle_nodes) )
# Remove nodes
if '-x' in opts:
still_idle_nodes = remove_nodes(still_idle_nodes, opts['-x'])
print_verbose(VERBOSE,"Still idle nodes after excludsion (%d): " % len(still_idle_nodes) , str(still_idle_nodes) )
if '-a' in opts:
still_idle_nodes = mask_nodes(still_idle_nodes, opts['-a'])
print_verbose(VERBOSE,"Still idle nodes after map (%d): " % len(still_idle_nodes) , str(still_idle_nodes) )
# Reduce number of nodes if requested
if maxcnt:
still_idle_nodes = keep_first(maxcnt, still_idle_nodes)
print_verbose(VERBOSE,"Reduced still idle nodes (%d)" % len(still_idle_nodes), str(still_idle_nodes))
# Make final reservation if idle nodes have changed
if not still_idle_nodes==idle1_nodes:
update_reservation(name,still_idle_nodes)
# Print list of newly idle nodes
print(str(still_idle_nodes))
if __name__=="__main__":
main()