Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
reserve_idle/reserve_idle
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
224 lines (171 sloc)
6.68 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import print_function | |
import sys, time, getopt, subprocess | |
import ClusterShell.NodeSet | |
SACCT = "/gpfs/gpfs1/slurm/bin/sacct" | |
NODESET = "/usr/bin/nodeset" | |
SCONTROL = "/gpfs/gpfs1/slurm/bin/scontrol" | |
VERBOSE = False | |
def Usage(msg=None): | |
print(""" | |
Usage: reserve_idle [-ax] -v RESERVATION_NAME | |
Usage: reserve_idle -s | |
Reserve idle nodes that do not belong to a preemptable partition. | |
Reservation is named RESERVATION_NAME. | |
Reservation expires in ten minutes (to extend, see OTHER USEFUL COMMANDS below) | |
OPTIONS | |
-v Verbose | |
-s Show current reservations and quit | |
-m MAXCNT Only reserve this many nodes | |
-a NODESTR Only check idle nodes in NODESTR. Cannot combine with -x | |
-x NODESTR Do not include NODESTR in idle nodes. Cannot combine with -a | |
OTHER USEFUL COMMANDS: | |
- To show current reservations | |
scontrol show reservations | |
- To remove the reservation before it expires, run | |
scontrol delete reservation=RESERVATION_NAME | |
- To extend the time limit for current reservation to 2 hours | |
scontrol update reservation reservation=RESERVATION_NAME duration=02:00:00 | |
""") | |
if msg: | |
print() | |
print(" ",msg) | |
print() | |
sys.exit() | |
# Used for Python < 2.7 | |
def _check_output(command,shell=False): | |
if type(command)==type(''): | |
command = command.split() | |
p = subprocess.Popen(command, stdout=subprocess.PIPE) | |
return p.communicate()[0] | |
def print_verbose(verbose,*msgs): | |
if verbose: | |
tstamp = time.strftime("%H:%M:%S") | |
print("%s: %s" % (tstamp," ".join(msgs))) | |
def get_idle_nodes(): | |
# If integer %256 is too small, then nodelist string will not be well-formed | |
# and the program will throw and exception at the statment below | |
# ClusterShell.NodeSet.NodeSet(nodestr) | |
nodelist = _check_output([SACCT, '-anX', '-sR', '-o', 'nodelist%256']).split() | |
nodestr = ",".join(nodelist) | |
print_verbose(VERBOSE, "Nodestr is %s" % nodestr) | |
compute = ClusterShell.NodeSet.NodeSet("@compute") | |
preempt = ClusterShell.NodeSet.NodeSet("@preempt") | |
active = ClusterShell.NodeSet.NodeSet(nodestr) | |
idle = compute - active - preempt | |
return idle | |
# Remove nodes described by string 'nodestr' from nodes | |
def remove_nodes(nodes, nodestr): | |
remove_these = ClusterShell.NodeSet.NodeSet(nodestr) | |
return nodes - remove_these | |
# Return nodes that appear in both nodes and string 'nodestr' | |
def mask_nodes(nodes, nodestr): | |
mask_nodes = ClusterShell.NodeSet.NodeSet(nodestr) | |
return nodes.intersection(mask_nodes) | |
# Get list of nodes that are currently reserved on the system | |
def get_reserved_nodes(): | |
reservations = _check_output([SCONTROL, 'show', 'reservations', '-o']) | |
nodes = [] | |
for reservation in reservations.split("\n"): | |
reservation = reservation.rstrip() | |
if not reservation: continue | |
pos1 = reservation.find(" Nodes=")+7 | |
if pos1>=0: | |
pos2 = reservation.find(" ", pos1+1) | |
if pos2==-1: | |
pos2 = len(reservation) | |
nodes.append( reservation[pos1:pos2] ) | |
nodestr = ','.join(nodes) | |
return ClusterShell.NodeSet.NodeSet(nodestr) | |
def show_reservations(): | |
cmd = "%s show reservations" % SCONTROL | |
output = _check_output(cmd) | |
return output | |
def does_reservation_exist(name): | |
cmd = "%s show reservationname=%s" % (SCONTROL, name) | |
output = _check_output(cmd) | |
return not output.rstrip().endswith('not found') | |
def reserve_nodes(name,nodes): | |
cmd = "%s create reservation reservationname=%s account=root flags=ignore_jobs starttime=now duration=00:10:00 nodes=%s" % ( | |
SCONTROL, name, nodes) | |
_check_output(cmd) | |
def update_reservation(name,nodes): | |
cmd = "%s update reservation reservationname=%s nodes=%s" % (SCONTROL, name, nodes) | |
_check_output(cmd) | |
def keep_first(n,nodes): | |
new_nodes = ClusterShell.NodeSet.NodeSet() | |
for i,node in enumerate(nodes.nsiter()): | |
if i>=n: break | |
new_nodes.update(node) | |
return new_nodes | |
def get_opts_args(argsin): | |
opts, args = getopt.getopt(argsin,'svm:a:x:') | |
opts = dict(opts) | |
if (not len(args)==1) and (not '-s' in opts): Usage() | |
if '-a' in opts and '-x' in opts: Usage("ERROR: You cannot use both -a and -x") | |
return opts, args | |
def main(): | |
global VERBOSE | |
opts, args = get_opts_args(sys.argv[1:]) | |
# Show reserations and quit | |
if '-s' in opts: | |
print() | |
print(show_reservations()) | |
print() | |
sys.exit() | |
VERBOSE = '-v' in opts | |
try: | |
maxcnt = int(opts['-m']) | |
except KeyError, ValueError: | |
maxcnt = None | |
print_verbose(VERBOSE, "Starting ...") | |
name = args[0] | |
# Does reservation already exist? | |
if does_reservation_exist(name): | |
print("\n ERROR: Reservation with name '%s' already exists\n" % name) | |
sys.exit() | |
# Get list of idle nodes | |
idle1_nodes = get_idle_nodes() | |
# Remove nodes already in reservations | |
idle1_nodes -= get_reserved_nodes() | |
print_verbose(VERBOSE,"Initial set of idle nodes (%d):" % len(idle1_nodes), str(idle1_nodes) ) | |
# Remove nodes | |
if '-x' in opts: | |
idle1_nodes = remove_nodes(idle1_nodes, opts['-x']) | |
print_verbose(VERBOSE,"First idle nodes after excludsion (%d): " % len(idle1_nodes) , str(idle1_nodes) ) | |
if '-a' in opts: | |
idle1_nodes = mask_nodes(idle1_nodes, opts['-a']) | |
print_verbose(VERBOSE,"First idle nodes after map (%d): " % len(idle1_nodes) , str(idle1_nodes) ) | |
if len(idle1_nodes)==0: | |
print() | |
print("ERROR: There are no idle nodes available") | |
print() | |
sys.exit() | |
# Reserve idle nodes | |
reserve_nodes(name,idle1_nodes) | |
# Get updated list of idle nodes - in case some nodes reactived before they could be reserved | |
idle2_nodes = get_idle_nodes() | |
freshly_active = idle1_nodes - idle2_nodes | |
print_verbose(VERBOSE,"Freshly activiated nodes (%d): " % len(freshly_active), str(freshly_active) ) | |
# Only keep idle nodes that are in original list and updated list | |
still_idle_nodes = idle1_nodes and idle2_nodes | |
# Reduce number of nodes | |
print_verbose(VERBOSE,"Still idle nodes (%d): " % len(still_idle_nodes) , str(still_idle_nodes) ) | |
# Remove nodes | |
if '-x' in opts: | |
still_idle_nodes = remove_nodes(still_idle_nodes, opts['-x']) | |
print_verbose(VERBOSE,"Still idle nodes after excludsion (%d): " % len(still_idle_nodes) , str(still_idle_nodes) ) | |
if '-a' in opts: | |
still_idle_nodes = mask_nodes(still_idle_nodes, opts['-a']) | |
print_verbose(VERBOSE,"Still idle nodes after map (%d): " % len(still_idle_nodes) , str(still_idle_nodes) ) | |
# Reduce number of nodes if requested | |
if maxcnt: | |
still_idle_nodes = keep_first(maxcnt, still_idle_nodes) | |
print_verbose(VERBOSE,"Reduced still idle nodes (%d)" % len(still_idle_nodes), str(still_idle_nodes)) | |
# Make final reservation if idle nodes have changed | |
if not still_idle_nodes==idle1_nodes: | |
update_reservation(name,still_idle_nodes) | |
# Print list of newly idle nodes | |
print(str(still_idle_nodes)) | |
if __name__=="__main__": | |
main() |