#!/usr/bin/env python3
#from __future__ import print_function
# (_) Verify data is correct - compare to ssgunix
# (_) Add / to -F argument
# (_) Check -F for :
# (_) Add cmd-line options for Other and Stacking
# (_) Add stacking to gnuplot output
# (_) Ability to make synthetic attributes (eg: general node group)
# (_) When using file, default time range to entire file
# (_) Verify that time mathces sacct time (local or gmt)
# Imports
# Standard modules
import sys, time, getopt, calendar, subprocess
# Configuration
SACCT = '/gpfs/gpfs1/slurm/bin/sacct'
DEF_START = '7d'
DEF_PLOTSIZE = (1280,360)
REQUIRED_HEADERS_STR = 'jobid,partition,qos,account,user,submit,start,end,ncpus,state,nodelist'
('-t','NO TITLE'),
('-n',DEF_NTOP ),
('-a','partition'), # attribute
('-I',''), # include
('-F',''), # filter
('-C',''), # csv
('-G',''), # gnuplot
# Functions
def Usage(msg=None):
Usage: [-S START] [-E END] -Q
Write out sact command line that is used by the program [-S START] [-E END] -W SACCT.DAT
Write sacct data into file SACCT.DAT for subsequent use [-SE] [-g GRAPH.PNG ] [SACCT.DAT [ SACCT.DAT ...] ]
Read sacct data (from file(s) if give, otherwise from sacct live),
and writ graph on X terminal, or optionally into PNG file -g.
-Q Write shell command line that calls 'sacct' with
correct options to generate the output needed for this
program to parse
-S START Start time (see TIME FORMAT below). Default is '1d'
-E END End time (see TIME FORMAT below). Default is the time when
command is run
-i SPAN Time interval to divide data. Default is 30 minutes.
(this is given in "Relative Time", see TIME FORMAT below)
-W FILE Read Slurm's sacct and write to FILE, with metadata
stored in comments.
-g GRAPH Name of file to write graph. If unspecified, graph
displays on X terminal
-t TITLE Plot title
-C FNAME Write plot data into file FNAME.csv
-G FNAME Write Gnuplot plotting file FNAME.gnuplot, and data into
-a CATEGORY Draw individuals curves for each item in this category.
CATEGORY can be partition,qos,account, or user.
-F FILTER Only consider data which passes the filter. For example:
Here, data only used for user equal to jar02014,
gms02014 or alf02013. User above can be replaced with any
category. Also, -F can be used multiple times, to apply
multiple filters.
-n NTOP Only print NTOP of the CATEGORY curves, and one additional
curve called "OTHER" with the sum of the valuse for
the non-top categories. If there are no non-top categories,
then "OTHER" is omitted.
Relative time: Value is a number appended with either 's','m','h','d'
For -i, this time is taken to be the time interval.
For -S and -E, this time is subtracted from the current
time, and rounded to value of time interval (-i).
Absolute time: A single string with between 3 and 6 integers, delimited
by non-numeric characters, like '2017-4-17-12:51'.
If you specify just a date, the time is 00:00:00.
Any unspecified integers (i.e. hour, minute, second) default
to zero.
# Default 'sacct' command-line - for previous 24 hours period
> -Q -S 30m
sacct -aXP -S2018-04-22-00:00:00 -E2018-04-23-16:18:50 -o jobid,partition,qos,account,user,submit,start,end,ncpus,state
# Dump 'sacct data'
> -S 7d -E 30m -W sacct.dat
if msg: print("\n"+msg+"\n")
# Yield data values, while handling begining and ending edge conditions
def get_datapoints(series,first_marker,last_marker):
# Find first data point inside marker range
for ix in range(len(series)):
if series[ix][0]>first_marker:
prev_t = first_marker
prev_v = 0 if ix==0 else series[ix-1][1]
while ix<len(series):
yield (prev_t, prev_v), series[ix]
prev_t, prev_v = series[ix]
ix += 1
yield (prev_t,prev_v), [last_marker, prev_v]
# Generator to yield alternate data points and markers
# Returns
# prev_t, prev_v, t, is_marker
# prev_t,prev_t = position and value for previous point
# t = position for current point
# is_marker = True if current position is a marker, otherise data position
def next_point(series,marker,last_marker,span):
datapoints = get_datapoints(series,marker,last_marker)
(prev_t, prev_v), (t,v) = next(datapoints)
while True:
# Next point is a data point
if t<marker+span:
yield prev_t, prev_v, t, False
(prev_t, prev_v), (t,v) = next(datapoints)
except StopIteration:
# Next point is a marker
marker += span
yield prev_t, prev_v, marker, True
prev_t = marker
# A generator. Reads an irregularly spaced *step* function (passed in array
# series [ (t,v),(t,v).. ]) and coverts it into a regularly spaced function
# (not a step function). New function runs from postions 'first_marker' to
# 'last_marker', with values at intervals of 'span'
def stepify(series,first_marker,last_marker,span):
# If markers and data series do not overlap, return all zero values
if last_marker<=series[0][0] or first_marker>=series[-1][0]:
for i in range(int((last_marker-first_marker)/span)):
yield (first_marker+i*span,0)
total = 0
for prev_t,prev_v,t,is_marker in next_point(series,first_marker,last_marker,span):
if is_marker:
yield t-span, (total + (t-prev_t)*prev_v)/float(span)
total = 0
total += (t-prev_t)*prev_v
def print_error(msg):
print(" ERROR: ", msg, file=sys.stderr)
def print_error_plot(title,msg,sizes,plotname):
import datetime, matplotlib
if plotname!=None: matplotlib.use('Agg') # Do not use the default X backend when writing file
import matplotlib.dates
import matplotlib.pyplot as plt
except ImportError:
print_error("Interactive graphics not available. You need tcl loaded first")
fig = plt.figure()
fig.suptitle(title,fontsize=32, fontweight='bold')
dpi = float(fig.get_dpi())
set_fig_size_pixels(fig, sizes[0], sizes[1])
fig.set_size_inches(sizes[0]/dpi, sizes[1]/dpi)
# Used for Python < 2.7
def _check_output(command,shell=False):
if type(command)==type(''):
command = command.split()
p = subprocess.Popen(command, stdout=subprocess.PIPE)
return p.communicate()[0]
def set_defaults(args, defaults):
for k,v in defaults:
if not k in args: args[k] =v
return args
def get_pasttime(delstr,now=time.time(),interval=3600*24):
p = parse_reltime(delstr)
if p:
p = now - p
p -= (p % interval)
return p
return None
# Convert time string to epoch seconds. As time string is a series of
# integers representing year, month, day, hour, min, sec in that order, with
# at least one number, the remaining being set to 0 (or the first month or day)
# Each integer is seperated by one or more non-integers.
# Examples: "2018-01-20" "1997-7-17T21:17:05" "1999/12/31 23:59:59"
def timekey2sec(timekey):
s = ''
timearray = []
for c in timekey.strip():
if c >= '0' and c <= '9':
s += c
elif s!='':
s = ''
if s:
if not timearray: return None
if len(timearray)<3: return None
timearray += (9-len(timearray)) * [0] # Pad array with 0s to make a 9 element time array
return calendar.timegm(timearray) # Like time.mktime(), but works correctly when tuple is GMT
# Convert epoch seconds to date format
def sec2timekey(secs,sacctfmt=False,gm=False):
timearray = time.gmtime(secs) if gm else time.localtime(secs)
if sacctfmt:
return time.strftime("%Y-%m-%dT%H:%M:%S", timearray)
return time.strftime("%Y-%m-%d-%H:%M:%S", timearray)
# Convert integer and time unit string (i.e. 1s, 5d, 3m) to
# seconds
def parse_reltime(timestr):
if type(timestr)==type(1):
return timestr
if timestr[-1] in 'dhmsDHMS':
timestr, unitstr = timestr[:-1], timestr[-1].lower()
timestr, unitstr = timestr[:-1], 's'
return int(timestr) * {'s':1, 'm': 60, 'h': 3600, 'd':86400}[unitstr]
except ValueError:
return None
def parse_filter_string(filter, filterstr):
if not filter: filter = {}
if filterstr.rstrip():
filterstr = filterstr.lower()
if not ':' in filterstr:
k,v = filterstr, None
k,v = filterstr.split(":")
v = v.split(",")
filter.update( ((k,v),) )
return filter
def entry_passes_filter(filter,entry):
if not filter: return True
for k in filter:
if not k in entry:
return False
for v in filter[k]:
if entry[k].lower() == v:
return False
return True
def get_opts(argsin):
live_opts, args = getopt.getopt(argsin, 'QS:R:W:i:hg:t:n:a:I:E:F:C:G:') # a-attributes, I include, F filter
def_start_str = sec2timekey(get_pasttime(DEF_START) or timekey2sec(DEF_START), gm=True)
opts = {}
opts.update( OPTION_DEFAULTS )
# Use live_opts to update filter (convert all -F options)
filter = None
for k,v in live_opts:
if k=='-F':
filter = parse_filter_string(filter,v.lower())
opts['filter'] = filter
opts.update( ( ('-S',def_start_str),('-E',sec2timekey(time.time(),sacctfmt=True)) ) )
opts.update( live_opts)
if '-h' in opts: Usage()
# Sanity check
intersection = set(opts.keys()).intersection(set(('-Q',)))
if '-h' in opts:
Usage(" ERROR: You must specify a database file")
# Convert options
interval = parse_reltime(opts['-i'])
opts['interval' ] = interval
opts['start'] = get_pasttime(opts['-S'],interval=interval) or timekey2sec(opts['-S']) or None
opts['end' ] = get_pasttime(opts['-E'],interval=interval) or timekey2sec(opts['-E']) or None
opts['-S' ] = sec2timekey(opts['start'])
opts['-E' ] = sec2timekey(opts['end'])
opts['-a' ] = opts['-a'].lower()
if opts['-I']:
opts['include'] = opts['-I'].split(',')
opts['include'] = None
opts['ntop' ] = int(opts['-n'])
except ValueError:
print_error("Value for NTOP (-n) must be an integer")
if opts['start']>opts['end']: Usage(" ERROR: Start time must precede end time ")
return opts, args
# Return next non-comment/non-blank line, along with line cnt
def read_next_noncomment(fnames):
for fname in fnames:
fname_pretty = 'stdin' if fname=='-' else fname
fin = sys.stdin if fname=='-' else open(fname)
datacnt = 0
for linecnt, line in enumerate(fin):
sline = line.lstrip()
# reject comments and blank lines
if not sline or sline[0]=='#': continue
datacnt += 1
yield (fname_pretty,linecnt,datacnt), line
if fname=='-': fin.close()
def read_job_run_entries_from_file(startsec, endsec, fnames, filter=None, limit=None, delimiter="|"):
# Convert field labels to lower case
requested_headers = [f.lower() for f in REQUIRED_HEADERS_STR.split(',')]
# Read non-comments from file(s)
for (fname,linecnt,datacnt),line in read_next_noncomment(fnames):
# Get column headers from first non-comment
sline = line.rstrip()
if datacnt==1:
found_headers = sline.lower().split(delimiter)
found_headers = dict( [ (i,f) for i,f in enumerate(found_headers) if f in requested_headers ] )
# Not all columns were found
if len(found_headers)<len(requested_headers):
missing_fields = [ f for f in requested_headers if not f in found_headers.values() ]
print_error("Input file is missing following columns:" + " ".join(missing_fields))
# Yield dict parsed from data line
if limit and datacnt==limit: break
if not sline: continue
entry = dict( [ (found_headers[i], value) for i,value in enumerate(sline.split('|')) if i<len(found_headers) ] )
if not entry_passes_filter(filter,entry): continue
entry['startstr'], entry['endstr'] = entry['start'],entry['end']
if not entry['start']=='Unknown':
entry['start'] = timekey2sec(entry['start'])
entry['end' ] = endsec if entry['end']=='Unknown' else timekey2sec(entry['end'])
# Does job entry overlap the time range
if entry['start']<endsec and entry['end']>=startsec:
entry['elapsed'] = endsec - startsec
yield entry
def get_sacct_cmd(start,end,in_secs=True):
if in_secs:
startstr = sec2timekey(start)
endstr = sec2timekey(end)
return "sacct -aXP -S %s -E %s -o %s" % (startstr,endstr,REQUIRED_HEADERS_STR)
return "sacct -aXP -S %s -E %s -o %s" % (start,end,REQUIRED_HEADERS_STR)
def get_sacct_output(startstr,endstr):
cmd = get_sacct_cmd(startstr,endstr,False).split()
return subprocess.check_output(cmd).decode('UTF-8')
except FileNotFoundError:
print_error("Sacct does not appear to be installed on this machine")
# Read job run info from sacct
# startstr, endstr - datetimes in 'sact' format: ex 2018-04-30T15:21:45 (note 'T' in string)
# returns jobs entries from subprocess
def read_job_run_entries_from_sacct(startsec,endsec,filter=None):
cmd = get_sacct_cmd(startsec,endsec).split()
for i,line in enumerate(subprocess.check_output(cmd).decode('UTF-8').split("\n")):
if i==0:
headers = line.split('|')
sline = line.rstrip()
if not sline: continue
entry = dict( [ (headers[i].lower(), value) for i,value in enumerate(sline.split('|')) if i<len(headers) ] )
if entry['start']!='Unknown':
entry['start'] = timekey2sec(entry['start'])
entry['end' ] = endsec if entry['end']=='Unknown' else timekey2sec(entry['end'])
if not entry_passes_filter(filter,entry): continue
yield entry
def stack_values(oldvalues):
values = []
#for points in zip(oldvalues):
for points in oldvalues:
# When points contain two element, for reasons unknown it is a tuple,
# otherwise it is a list
points = list(points)
for i in range(len(points)-1):
points[i+1] += points[i]
return zip(*values)
def fill_plot(ax, xs, ys, ylevel=0, color='k', label='', filled=True):
if filled:
if ylevel==None:
ax.fill( xs, ys, color=color, label=label)
ax.fill( [xs[0]] + list(xs) + [xs[-1]], [ylevel] + list(ys) + [ylevel], color=color, label=label )
ax.plot( xs, ys, color=color, label=label )
def set_fig_size_pixels(fig, x, y):
dpi = float(fig.get_dpi())
fig.set_size_inches(x/dpi, y/dpi)
# data - Plot lines: format is list of points ( (t0,(y0,y1,y2)), (t1,(y0,y1,y2)), ... )
# curve_names - Names of curves 0,1,2, etc, used for labels
# plotname - Output file - if None, write to screen
# filled - If True, draw filled curves
# title - Plot title
def plot_matplotlib(data, curve_names, sizes=DEF_PLOTSIZE,plotname=None,filled=False,stacked=False,opts={}):
import datetime, matplotlib
if plotname!=None: matplotlib.use('Agg') # Do not use the default X backend when writing file
import matplotlib.dates
import matplotlib.pyplot as plt
except ImportError:
print_error("Interactive graphics not available. You have tcl loaded first")
FILL_COLORS = ['b','r','g','y','k']
# Reorganize data: from data = ( t1,(a1,b1,c1) ), (t2,(a2,b2,c2)), (t3,(a3,b3,c3)) to
# time = (t1,t2,t3), values = ((a1,a2,a3),(b1,b2,b3),(c1,c2,c3))
times, values = zip(*data)
# Stack values if requested
values = list(stack_values(values) if stacked else zip(*values))
# Convert times to datetime type
times = [datetime.datetime.strptime(t,"%Y-%m-%d-%H:%M:%S") for t in times]
# Draw graph
fig, ax = plt.subplots()
for cnt in reversed(range(len(values))):
# Top user is filled curve
color = FILL_COLORS[cnt % len(FILL_COLORS)]
#label = "%04d - %s" % (int(values[cnt][-1]), curve_names[cnt])
label = curve_names[cnt][:11]
fill_plot( ax, times, values[cnt], color=color, label=label, filled=filled)
# Can use plt.legend() also
#ax.legend(loc='center left', prop={'size':8})
ax.legend(bbox_to_anchor=(1.00, 0.8), loc=2, borderaxespad=0.0, prop={'size':9})
# Format x axis
#yearsFmt = matplotlib.dates.DateFormatter('%b %d')
# Format labels and plot
if 'title' in opts:
title = plt.title(opts['title'])
plt.setp(title, size='large', color='b')
if 'xlabel' in opts:
xlabel = plt.xlabel(opts['xlabel'])
plt.setp(xlabel, size='medium', weight='bold', color='g')
if 'ylabel' in opts:
ylabel = plt.ylabel(opts['ylabel'])
plt.setp(ylabel, size='medium', weight='bold', color='g')
plt.figtext(1.0, 1.0, time.strftime("%Y-%m-%d %H:%M:%S"),color='gray',horizontalalignment='right',verticalalignment='top',size=10)
yearsFmt = matplotlib.dates.DateFormatter('%a\n%b %d')
set_fig_size_pixels(fig, sizes[0], sizes[1])
if plotname:
dpi = float(fig.get_dpi())
fig.set_size_inches(sizes[0]/dpi, sizes[1]/dpi)
# Show plot on terminal
def combine_top0(data,ntop=None,other=None):
# Find combined values
k_sum = {}
for dt, items in data.items():
for k,val in items:
k_sum[k] = k_sum[k] + val if k in k_sum else val
# Sort by sum, get top
toplist = [k for k,v in sorted(k_sum.items(), key=lambda l: l[1])]
if ntop:
if ntop<len(toplist):
toplist = toplist[-ntop:]
other = None
other = None
# Order items
newdata = []
for dt, items in data.items():
items = dict(items)
top_vals = [ items.get(p,0) for p in toplist ]
if other:
top_vals.append( sum(items.values()) - sum(top_vals) )
newdata.append( (dt, top_vals) )
if other: toplist.append(other)
return toplist, sorted( newdata )
# Read: categories { category:[v0,v1,v2,..], category:[v0,v1,v2,...], ... }, ntop, other
# Return [ top_names, data_series [ (v1,v2,v3,...), (v1,v2,v3,...), ... ]
def combine_top(category_series,ntop=None,other=None,include=None):
# Find combined values
sums = [ (category,sum(series)) for category,series in category_series.items() ]
if include:
top_names = include
top_names = [ k for k,v in sorted(sums, key=lambda l: l[1]) ]
# Re-order items
time_points = list(zip(*[category_series[category] for category in top_names])) # => [ (v0,v0,v0..), (v1,v1,v1..) .. ]
# Truncate list of names to top names)
if ntop and ntop<len(top_names):
top_names = top_names[-ntop:]
# Use all names
other = None
# Add value of 'other' to names, points
if ntop and other:
time_points = [ list(values[-ntop:]) + [sum(values)-sum(values[-ntop:])] for values in time_points ]
return top_names, time_points
# Configuration - which data column will we key data on
def filter_entry(entry,opts):
attribute = opts['-a']
if not attribute in entry: print_error("Attribute %s does not exist" % attribute)
#if opts['include'] and not entry[attribute] in opts['include']: return None
return entry[attribute]
def csv_quoted_vals(vals):
return '"' + '","'.join([str(v) for v in vals]) + '"'
# Main
def main():
opts, args = get_opts(sys.argv[1:])
# Print out sacct command line
if '-Q' in opts:
# Run sacct and write to file
if '-W' in opts:
with open(opts['-W'],"w") as fout:
print( "# SACCT READ ON: ", time.strftime("%Y-%m-%d %H:%M:%S"), file=fout )
print( "# SACCT COMMAND: ", get_sacct_cmd(opts['-S'],opts['-E'],False), file=fout )
print( get_sacct_output(opts['-S'],opts['-E']), file=fout )
if args:
entry_generator = read_job_run_entries_from_file(opts['start'],opts['end'],args,filter=opts['filter'])
entry_generator = read_job_run_entries_from_sacct(opts['start'],opts['end'],filter=opts['filter'])
job_changes = {}
for entry in entry_generator:
attribute = filter_entry(entry,opts)
if not attribute: continue
if not attribute in job_changes: job_changes[attribute] = []
job_changes[attribute].append( ( entry['start'], int(entry['ncpus']) ) )
job_changes[attribute].append( ( entry['end' ], -int(entry['ncpus']) ) )
if not job_changes:
msg = "No data was eligible for plotting"
title = opts['-t']
plotname = opts['-g']
if not opts['-C'] and not opts['-G']:
sys.exit() # End program - no graph to make
# For attribute, combine job start,end info above to get total cores used over time
timeseries = {}
startsec = opts['start']
for attribute, series in list(job_changes.items()):
timeseries[attribute] = [(startsec,0)]
value = 0
prevt = None
for t,v in sorted(series):
value += v
# This prevent time value from repeating
if t==prevt:
timeseries[attribute][-1] = (t,value)
prevt = t
# Divide irregularly spaced time series data into regulary spaced intervals
for attribute in timeseries:
generator = stepify(timeseries[attribute],opts['start'],opts['end'],opts['interval'])
times, timeseries[attribute] = zip( *generator ) # All lists times[] are identical
times = [sec2timekey(t) for t in times]
# Determine subgroup order and choose top curves
curve_names, timeseries = combine_top(timeseries,opts['ntop'],'OTHER',opts['include'])
# CSV output
if opts['-C'] or opts['-G']:
fname = opts['-C'] or opts['-G']
with open(fname + ".csv", "w") as fout:
print(csv_quoted_vals(['Date'] + list(curve_names)), file=fout)
for t,vals in zip(times,timeseries):
print(csv_quoted_vals([t]+list(vals)), file=fout)
if opts['-G']:
with open(opts['-G'] + ".gnuplot", "w") as fout:
print("""set datafile separator ","
set timefmt "%%Y-%%m-%%d %%H:%%M:%%S"
set xdata time
set title "%s"
plot for [col=2:%d] "%s.csv" using 1:col with lp ls col title columnhead(col) at end
""" % (opts['-t'], len(curve_names)+1, opts['-G']), file=fout)
if not opts['-C'] and not opts['-G']:
# Plot data
title = opts['-t']
plot_matplotlib( zip(times,timeseries), curve_names, plotname=opts['-g'],filled=True,stacked=True,opts={'title':title,'ylabel':'Core Hours'})
if __name__=='__main__':