Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
get_inactive_users/get_inactive_users.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
280 lines (229 sloc)
9.35 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import print_function | |
import re, pwd, sys, ldap, time, getopt, subprocess | |
#----------------------------------------------------------------------- | |
# Configuration | |
#----------------------------------------------------------------------- | |
# Create file get_inactive_users_config.py containing LDAP info | |
# formatted like this | |
# LDAP_HOST = "xxxxxxx" | |
# LDAP_BINDDN = "xxxxxxx" | |
# LDAP_BASE = "xxxxxxx" | |
# LDAP_BINDPASS = "xxxxxxx" | |
from get_inactive_users_config import * | |
#----------------------------------------------------------------------- | |
# LDAP Functions | |
#----------------------------------------------------------------------- | |
def _decode(bstrs): | |
return [ bstr.decode('utf-8') for bstr in bstrs ] | |
# Convert values in dict from byte to string | |
def _decode_dict(bdict): | |
return dict( [ (k,_decode(v)) for k,v in bdict.items() ] ) | |
def Search_Netid(netid, attrs): | |
if type(attrs)==type(''): # Convert string to list of one | |
attrs = [attrs] | |
l= ldap.initialize("ldap://%s:389" % LDAP_HOST) | |
l.set_option(ldap.OPT_REFERRALS, 0) | |
l.set_option(ldap.OPT_PROTOCOL_VERSION, 3) | |
l.set_option(ldap.OPT_X_TLS,ldap.OPT_X_TLS_DEMAND) | |
l.set_option(ldap.OPT_X_TLS_DEMAND, True) | |
l.set_option(ldap.OPT_DEBUG_LEVEL, 255) | |
l.start_tls_s() | |
l.simple_bind_s(LDAP_BINDDN, LDAP_BINDPASS) | |
filtr = '(samaccountname=%s)' % netid | |
results = l.search_s( LDAP_BASE, ldap.SCOPE_SUBTREE, filtr, attrs ) | |
# Decode byte in dict values to strings | |
return [ (k,_decode_dict(v)) for k,v in results ] | |
# Check eduPersonAffiliation attribute to determine if NetID is active | |
# Calls Search_Netid() above | |
def Is_Netid_Active(netid): | |
results = Search_Netid(netid,'samaccountname') | |
return len(results)!=0 | |
#----------------------------------------------------------------------- | |
# Functions | |
#----------------------------------------------------------------------- | |
def Usage(msg=None): | |
print(""" | |
Usage: get_inactive_users.py [-v] DAYS | |
Get list of netids, emails and name for users who have | |
not run jobs or logged into the cluster in the past DAYS, | |
*and* who are also inactive in ldap. | |
OPTIONS | |
-v Print progress output to stderr | |
WARNINGS | |
- It may take 2 minutes or more to return. | |
- Output is written to stdout | |
""") | |
if msg: | |
print() | |
print(msg) | |
print() | |
sys.exit() | |
# nckeck_output("ls -lt | sort -r") | |
# or | |
# ncheck_output(["ls -lt", "sort -r" ]) | |
# or | |
# ncheck_output( [ ["ls", "-lt"], ["sort", "-r" ] ] ) | |
# | |
# will all produce the same output | |
def ncheck_output(cmds, content=None, shell=False): | |
# Following will take either of these three arguments as cmds: | |
# "ls -lt | sort -r" | |
# [ "ls -lt", "sort -r" ] | |
# [ ["ls", "-lt"], ["sort", "-r"] ] | |
# and return the last form. This last form is used for the subprocess.Popen command | |
cmds = [ _split_if_string(cmd) for cmd in _split_if_string(cmds,"|") ] | |
ncmds = len(cmds) | |
ps = ncmds*[None] | |
for i,cmd in enumerate(cmds): | |
if i==0: | |
ps[i] = subprocess.Popen(cmds[i], stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=shell) | |
else: | |
# Note: stdin below is the stdout of the previously run command (ps[i-1]) | |
ps[i] = subprocess.Popen(cmds[i], stdin=ps[i-1].stdout, stdout=subprocess.PIPE, shell=shell) | |
for i in range(ncmds-1): | |
ps[i].stdout.close() | |
# Return standard output: use decode to convert Python3's byte object to string (is Python2 compatible) | |
if content: | |
return ps[-1].communicate(input=content.encode())[0].decode() | |
else: | |
return ps[-1].communicate()[0].decode() | |
# Return text from stderr and stdout: Not that only stderr from last command is returned, other stderr ignore | |
def ncheck_error_output(cmds, shell=False): | |
# Following will take either of these three arguments as cmds: | |
# "ls -lt | sort -r" | |
# [ "ls -lt", "sort -r" ] | |
# [ ["ls", "-lt"], ["sort", "-r"] ] | |
# and return the last form. This last form is used for the subprocess.Popen command | |
cmds = [ _split_if_string(cmd) for cmd in _split_if_string(cmds,"|") ] | |
ncmds = len(cmds) | |
ps = ncmds*[None] | |
PIPE = subprocess.PIPE | |
for i,cmd in enumerate(cmds): | |
if i==0: | |
ps[i] = subprocess.Popen(cmds[i], stdout=PIPE, stderr=PIPE, shell=shell) | |
else: | |
# Note: stdin below is the stdout of the previously run command (ps[i-1]) | |
ps[i] = subprocess.Popen(cmds[i], stdin=ps[i-1].stdout, stdout=PIPE, stderr=PIPE, shell=shell) | |
for i in range(ncmds-1): | |
ps[i].stdout.close() | |
ps[i].stderr.close() | |
last_out, last_err = ps[-1].communicate() | |
return last_err, last_out | |
def _is_sequence(var): | |
return type(var) in (type([]),type(())) | |
def _split_if_string(var,sep=None): | |
if not _is_sequence(var): | |
return var.split(sep) | |
else: | |
return var | |
# Print verbose text | |
def print_verbose(verbose, msg): | |
if verbose: | |
datetimestr = time.strftime("%Y-%m-%d %H:%M:%S"); | |
print("# VERBOSE: ", datetimestr, msg, file=sys.stderr) | |
# Get list of users who have submitted Slurm jobs in the past days_str | |
def get_slurm_users(days_str): | |
SECS_PER_DAY = 24 * 3600 | |
secs = time.time() - int(days_str) * SECS_PER_DAY | |
date = time.strftime("%Y-%m-%d", time.localtime(secs) ) | |
# Sacct command to list all users for jobs submitted since 'date' | |
cmd = [ [ 'sacct', '-anX', '-S', date, '-o', 'user%16' ] ] | |
out = ncheck_output(cmd) | |
# Get set of users | |
users = set() | |
for line in out.split("\n"): | |
user = line.strip() | |
if user: | |
users.add( user ) | |
return sorted(users) | |
# Get list of users who have logged into the cluster login nodes in the past days_str | |
def get_login_users(days_str): | |
cmd = [ [ 'clush', '-w', '@login', 'lastlog', '-t', days_str ] ] | |
err, out = ncheck_error_output(cmd) | |
users = set() | |
for line in out.split("\n"): | |
try: | |
host, user, other = line.split(None, 2) | |
except ValueError: | |
pass | |
else: | |
if not user=='Username': | |
users.add(user) | |
return sorted(users) | |
# Get name and email info for each users | |
def get_user_info(users): | |
userinfo = [] | |
# Use set() to remove duplicates | |
for user in set(users): | |
try: | |
entry = pwd.getpwnam(user) | |
except KeyError: | |
continue | |
if entry: | |
try: | |
name, email = entry.pw_gecos.split(" {",1) | |
email = email.rstrip(" }") | |
except (ValueError, KeyError): | |
pass | |
else: | |
userinfo.append( ( user, email, name ) ) | |
return sorted(userinfo) | |
# Return true if netid format (AAA00000) | |
def is_netid_format(name): | |
return bool( re.match( '^[a-zA-Z]{3}[0-9]{5}$', name ) ) | |
# Get list of all users - active and inactive | |
def get_all_users(): | |
users = [] | |
for record in pwd.getpwall(): | |
if record.pw_uid>=1000 and is_netid_format(record.pw_name): | |
users.append(record.pw_name) | |
return sorted(users) | |
def check_inactive_ldap(users): | |
inactive_ldap_users = [] | |
for user in users: | |
if not Is_Netid_Active(user): | |
inactive_ldap_users.append(user) | |
return sorted(inactive_ldap_users) | |
#----------------------------------------------------------------------- | |
# Main | |
#----------------------------------------------------------------------- | |
def main(): | |
opts, args = getopt.getopt(sys.argv[1:], 'v') | |
opts = dict(opts) | |
verbose = '-v' in opts | |
if not args: Usage() | |
# Check for valid day string | |
try: | |
days_str = args[0] | |
days = int(days_str) | |
except ValueError: | |
Usage(" ERROR: Could not parse the number of days (%s)" % days_str ) | |
# Get all users. This will include both active and inactive users. | |
print_verbose(verbose, "Reading list of all users (active and inactive)" ) | |
users_all = get_all_users() | |
print_verbose(verbose, "Number of users (active and inactive) %d" % len(users_all) ) | |
# Get list of users who have submitted slurm jobs in the past days | |
print_verbose(verbose, "Reading slurm users (this may take a minute or two)") | |
users_slurm = get_slurm_users(days_str) | |
print_verbose(verbose, "Number of slurm users %d" % len(users_slurm) ) | |
# Get list of users who have logged into on of the login nodes in the past days | |
print_verbose(verbose, "Reading login node users") | |
users_login = get_login_users(days_str) | |
print_verbose(verbose, "Number of login users %d" % len(users_login) ) | |
# Filter out users who have run jobs or performed logins | |
print_verbose(verbose, "Filter out active users (those who have run jobs or logged in)") | |
users_inactive = set(users_all) - set(users_slurm) - set(users_login) | |
print_verbose(verbose, "Number of inactive users %d" % len(users_inactive) ) | |
# Check inactive users, return those with no ldap entry | |
print_verbose(verbose, "Filter out ldap-active users (those who are active in ldap)" ) | |
users_inactive_no_ldap = check_inactive_ldap(users_inactive) | |
print_verbose(verbose, "Number of inactive users with no ldap account %d" % len(users_inactive_no_ldap) ) | |
# Get user info (remove admin accounts) | |
print_verbose(verbose, "Get info for final inactive users") | |
userinfo = get_user_info( users_inactive_no_ldap) | |
print( "# List of users who have not used clutser in %s days and are not in ldap" % days_str ) | |
for netid, email, name in userinfo: | |
print(netid, email, name) | |
main() |