diff --git a/gpfs-scan-files.py b/gpfs-scan-files.py index 251f08b..28743c3 100755 --- a/gpfs-scan-files.py +++ b/gpfs-scan-files.py @@ -33,6 +33,11 @@ DEF_OUTDIR = "/gpfs/scratchfs1/admin/gpfs-scan-files/tmp" THRESHOLD = 1000000000000 # Highlight sizes over 1TB +# This directory is always exclude from mmapplypolicy when using an +# automatically generated list of directories to include +ALWAYS_EXCLUDE_DIRS = ['.snapshots'] + + #----------------------------------------------------------------------- # Functions #----------------------------------------------------------------------- @@ -69,7 +74,34 @@ def _ncheck_output(cmds): def get_hostname(): return os.uname()[1] -def run_mmapplypolicy(exefile, target_dir, policy_file, temp_dir, out_dir, include_dir_file, cmdonly=False): +# Write list of files in include_file_list to file in out_dir and return file name +def write_include_file_list(out_dir, include_file_list): + fpath = out_dir + "/include_file_list.txt" + with open(fpath,"w") as fout: + for fname in include_file_list: + print(fname, file=fout) + return fpath + +# Return list of strings from file; each line a string, omit blanks and comment lines +def read_file_list(fname): + file_list = [] + with open(fname) as fin: + for line in fin: + sline = line.strip() + if not sline or sline[0]=='#': continue + file_list.append(sline.split("#")[0]) # Remove trailing comments + return file_list + +def get_ok_dirs(top_dir, exclude_dirs): + top_dir = top_dir.rstrip("/") + ok_dirs = [] + for item in os.listdir(top_dir): + path = top_dir + "/" + item + if os.path.isdir(path) and not os.path.islink(path) and not item in exclude_dirs and not item in ALWAYS_EXCLUDE_DIRS: + ok_dirs.append(path) + return ok_dirs + +def run_mmapplypolicy(exefile, target_dir, policy_file, temp_dir, out_dir, include_file_list, cmdonly=False): # Ensure we are runnning mmapplypolicy on the correct host hostname = get_hostname() if not hostname in MMAPPLY_HOSTS and not cmdonly: @@ -99,10 +131,12 @@ def run_mmapplypolicy(exefile, target_dir, policy_file, temp_dir, out_dir, inclu # -i File list list of directories to include. Must exclude .snapshot from this list. We also # exclude /gpfs/scratchfs/BROKEN, which contains borked tmp/ and tmp_ard/ - these are broken # and cause mmapplypolicy to hang indefinitely. + # -x File list of top level directories to include. # -a4 Number of threads to use. Default is 2. - if include_dir_file==None: + if not include_file_list: cmd = "%(exefile)s %(target_dir)s -s %(temp_dir)s -I defer -P %(policy_file)s -f %(out_dir)s -a4" % locals() else: + include_dir_file = write_include_file_list(out_dir, include_file_list) cmd = "%(exefile)s %(target_dir)s -s %(temp_dir)s -I defer -P %(policy_file)s -f %(out_dir)s -i %(include_dir_file)s -a4" % locals() if cmdonly: print ("mmapplypolicy command is (%s)" % cmd) @@ -262,7 +296,7 @@ def add_date(datetime, days): # Read arguments from command line def parse_args(args): try: - opts,args = getopt.getopt(sys.argv[1:],'s:p:D:C:H:hId:o:i:q') + opts,args = getopt.getopt(sys.argv[1:],'s:p:D:C:H:hId:o:i:qx:') except getopt.GetoptError as e: Usage(" ERROR: " + str(e)) opts = dict(opts) @@ -277,6 +311,8 @@ def parse_args(args): Usage("You must specify a database using -D when using -I") if '-D' in opts and not ('-C' in opts or '-H' in opts or '-I' in opts or '-p' in opts): Usage("You must use either -C, -H, -p, or -I, when using -D") + if '-i' in opts and '-x' in opts: + Usage("You cannot use both -i and -x") opts['writedb'] = (('-s' in opts) or ('-p' in opts)) and ('-D' in opts) opts['readdb' ] = ('-D' in opts) and ('-C' in opts or '-H' in opts) return opts, args @@ -298,6 +334,9 @@ def Usage(msg=None): -o Output directory for mmapplypolicy, must be on scratch partition. Only valid with -s option. Default value is %s -q With -s, print mmapplypolicy command only + -x DIR[,DIR,...] List of directories under DIRECTORY (-s option) that will be excluded. + -i FILE File with list of top-level directories under DIRECTORY (-s option) that + will be searched. Others top-level directories are ignored. """ % DEF_OUTDIR ) if msg: print();print(msg);print() @@ -327,15 +366,17 @@ def read_compare(dbfile,datetime): def main(): opts, args = parse_args(sys.argv[1:]) + # Read DB records corresponding to given date/time if '-d' in opts: datetime = opts['-d'] db_read_datetime = datetime # used to retreive DB data + # Read most recent DB records else: datetime = time.strftime("%Y-%m-%d %H:%M:%S") db_read_datetime = None - # Initialize database + # Initialize a new, empty database if '-I' in opts: import DB db = DB.DB(opts['-D']) @@ -343,15 +384,31 @@ def main(): # Scan directories with mmapplypolicy, and write to list.all-files in out_dir if '-s' in opts: - out_dir = opts['-o'] if '-o' in opts else tempfile.mkdtemp(DEF_OUTDIR) + # Write outpufile into opts['-o'] + if '-o' in opts: + out_dir = opts['-o'] + # Write output into default location + else: + if not os.path.isdir(DEF_OUTDIR): + os.makedirs(DEF_OUTDIR) + out_dir = DEF_OUTDIR temp_dir = out_dir + "/tmp" policy_file = out_dir + "/policy_file" target_dir = opts['-s'] - include_dir_file = opts['-i'] if '-i' in opts else None - rc = run_mmapplypolicy(MMAPPLYPOLICY,target_dir,policy_file,temp_dir,out_dir, include_dir_file, '-q' in opts) - #rc = run_test(temp_dir,out_dir) + # Read user-supplied file with list of directories to include in search + if '-i' in opts: + include_file_list = read_file_list(opts['-i']) + # Create file with list of directories to include in search, by + # by excluding directories in opts['-x'] from top level of 'target_dir' + elif '-x' in opts: + include_file_list = get_ok_dirs(target_dir, opts['-x'].split(',')) + # Call mmapplypolicy - this is where the 99% of execution time is spent + # Results are written to out_dir + rc = run_mmapplypolicy(MMAPPLYPOLICY,target_dir,policy_file,temp_dir,out_dir,include_file_list, '-q' in opts) + #rc = run_test(temp_dir,out_dir) # For testing if rc>0: print_warn(" mmapplypolicy return code is (%d)" % rc) + # Process file from previous section to produce usage per user summary if '-p' in opts: # Get list of total files sizes per owner inputfile = opts['-p']