Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add option to exclude top-level directories
  • Loading branch information
root committed Dec 18, 2020
1 parent a4a6a4f commit b47605d
Showing 1 changed file with 65 additions and 8 deletions.
73 changes: 65 additions & 8 deletions gpfs-scan-files.py
Expand Up @@ -33,6 +33,11 @@ DEF_OUTDIR = "/gpfs/scratchfs1/admin/gpfs-scan-files/tmp"
THRESHOLD = 1000000000000 # Highlight sizes over 1TB


# This directory is always exclude from mmapplypolicy when using an
# automatically generated list of directories to include
ALWAYS_EXCLUDE_DIRS = ['.snapshots']


#-----------------------------------------------------------------------
# Functions
#-----------------------------------------------------------------------
Expand Down Expand Up @@ -69,7 +74,34 @@ def _ncheck_output(cmds):
def get_hostname():
return os.uname()[1]

def run_mmapplypolicy(exefile, target_dir, policy_file, temp_dir, out_dir, include_dir_file, cmdonly=False):
# Write list of files in include_file_list to file in out_dir and return file name
def write_include_file_list(out_dir, include_file_list):
fpath = out_dir + "/include_file_list.txt"
with open(fpath,"w") as fout:
for fname in include_file_list:
print(fname, file=fout)
return fpath

# Return list of strings from file; each line a string, omit blanks and comment lines
def read_file_list(fname):
file_list = []
with open(fname) as fin:
for line in fin:
sline = line.strip()
if not sline or sline[0]=='#': continue
file_list.append(sline.split("#")[0]) # Remove trailing comments
return file_list

def get_ok_dirs(top_dir, exclude_dirs):
top_dir = top_dir.rstrip("/")
ok_dirs = []
for item in os.listdir(top_dir):
path = top_dir + "/" + item
if os.path.isdir(path) and not os.path.islink(path) and not item in exclude_dirs and not item in ALWAYS_EXCLUDE_DIRS:
ok_dirs.append(path)
return ok_dirs

def run_mmapplypolicy(exefile, target_dir, policy_file, temp_dir, out_dir, include_file_list, cmdonly=False):
# Ensure we are runnning mmapplypolicy on the correct host
hostname = get_hostname()
if not hostname in MMAPPLY_HOSTS and not cmdonly:
Expand Down Expand Up @@ -99,10 +131,12 @@ def run_mmapplypolicy(exefile, target_dir, policy_file, temp_dir, out_dir, inclu
# -i File list list of directories to include. Must exclude .snapshot from this list. We also
# exclude /gpfs/scratchfs/BROKEN, which contains borked tmp/ and tmp_ard/ - these are broken
# and cause mmapplypolicy to hang indefinitely.
# -x File list of top level directories to include.
# -a4 Number of threads to use. Default is 2.
if include_dir_file==None:
if not include_file_list:
cmd = "%(exefile)s %(target_dir)s -s %(temp_dir)s -I defer -P %(policy_file)s -f %(out_dir)s -a4" % locals()
else:
include_dir_file = write_include_file_list(out_dir, include_file_list)
cmd = "%(exefile)s %(target_dir)s -s %(temp_dir)s -I defer -P %(policy_file)s -f %(out_dir)s -i %(include_dir_file)s -a4" % locals()
if cmdonly:
print ("mmapplypolicy command is (%s)" % cmd)
Expand Down Expand Up @@ -262,7 +296,7 @@ def add_date(datetime, days):
# Read arguments from command line
def parse_args(args):
try:
opts,args = getopt.getopt(sys.argv[1:],'s:p:D:C:H:hId:o:i:q')
opts,args = getopt.getopt(sys.argv[1:],'s:p:D:C:H:hId:o:i:qx:')
except getopt.GetoptError as e:
Usage(" ERROR: " + str(e))
opts = dict(opts)
Expand All @@ -277,6 +311,8 @@ def parse_args(args):
Usage("You must specify a database using -D when using -I")
if '-D' in opts and not ('-C' in opts or '-H' in opts or '-I' in opts or '-p' in opts):
Usage("You must use either -C, -H, -p, or -I, when using -D")
if '-i' in opts and '-x' in opts:
Usage("You cannot use both -i and -x")
opts['writedb'] = (('-s' in opts) or ('-p' in opts)) and ('-D' in opts)
opts['readdb' ] = ('-D' in opts) and ('-C' in opts or '-H' in opts)
return opts, args
Expand All @@ -298,6 +334,9 @@ def Usage(msg=None):
-o Output directory for mmapplypolicy, must be on scratch partition.
Only valid with -s option. Default value is %s
-q With -s, print mmapplypolicy command only
-x DIR[,DIR,...] List of directories under DIRECTORY (-s option) that will be excluded.
-i FILE File with list of top-level directories under DIRECTORY (-s option) that
will be searched. Others top-level directories are ignored.
""" % DEF_OUTDIR )
if msg:
print();print(msg);print()
Expand Down Expand Up @@ -327,31 +366,49 @@ def read_compare(dbfile,datetime):
def main():

opts, args = parse_args(sys.argv[1:])
# Read DB records corresponding to given date/time
if '-d' in opts:
datetime = opts['-d']
db_read_datetime = datetime # used to retreive DB data
# Read most recent DB records
else:
datetime = time.strftime("%Y-%m-%d %H:%M:%S")
db_read_datetime = None


# Initialize database
# Initialize a new, empty database
if '-I' in opts:
import DB
db = DB.DB(opts['-D'])
db.create_tables()

# Scan directories with mmapplypolicy, and write to list.all-files in out_dir
if '-s' in opts:
out_dir = opts['-o'] if '-o' in opts else tempfile.mkdtemp(DEF_OUTDIR)
# Write outpufile into opts['-o']
if '-o' in opts:
out_dir = opts['-o']
# Write output into default location
else:
if not os.path.isdir(DEF_OUTDIR):
os.makedirs(DEF_OUTDIR)
out_dir = DEF_OUTDIR
temp_dir = out_dir + "/tmp"
policy_file = out_dir + "/policy_file"
target_dir = opts['-s']
include_dir_file = opts['-i'] if '-i' in opts else None
rc = run_mmapplypolicy(MMAPPLYPOLICY,target_dir,policy_file,temp_dir,out_dir, include_dir_file, '-q' in opts)
#rc = run_test(temp_dir,out_dir)
# Read user-supplied file with list of directories to include in search
if '-i' in opts:
include_file_list = read_file_list(opts['-i'])
# Create file with list of directories to include in search, by
# by excluding directories in opts['-x'] from top level of 'target_dir'
elif '-x' in opts:
include_file_list = get_ok_dirs(target_dir, opts['-x'].split(','))
# Call mmapplypolicy - this is where the 99% of execution time is spent
# Results are written to out_dir
rc = run_mmapplypolicy(MMAPPLYPOLICY,target_dir,policy_file,temp_dir,out_dir,include_file_list, '-q' in opts)
#rc = run_test(temp_dir,out_dir) # For testing
if rc>0: print_warn(" mmapplypolicy return code is (%d)" % rc)

# Process file from previous section to produce usage per user summary
if '-p' in opts:
# Get list of total files sizes per owner
inputfile = opts['-p']
Expand Down

0 comments on commit b47605d

Please sign in to comment.