diff --git a/DB.py b/DB.py
new file mode 100644
index 0000000..e542f29
--- /dev/null
+++ b/DB.py
@@ -0,0 +1,51 @@
+#  Provide stateful DB access accessible across functions
+import sqlite3
+class DB:
+	def __init__(self,dbpath):
+		self.db  = sqlite3.connect(dbpath)
+		self.dbc = self.db.cursor()
+	def write(self,sqls):
+		#  If sqls is a single string, convert to list of one
+		if not type(sqls) in ( type(()), type([]) ): sqls = [ sqls ]
+		for sql in sqls:
+			self.dbc.execute(sql)
+		self.db.commit()
+	def read(self,sql):
+		self.dbc.execute(sql)
+		return self.dbc.fetchall()
+	def close(self):
+		self.db.close()
+	#  Only needed when creating the database
+	def create_tables(self):
+		CREATE_TABLE_SQLS = [
+			"""CREATE TABLE IF NOT EXISTS user_footprint (
+			datetime text,
+			userid integer,
+			user text,
+			rank integer,
+			count integer,
+			footprint integer,
+			primary key (datetime,user)
+			);
+			""",
+			"""CREATE TABLE IF NOT EXISTS homedir_footprint (
+			datetime text,
+			dir text,
+			rank integer,
+			count integer,
+			footprint integer,
+			primary key (datetime,dir)
+			);
+			""",
+			"""CREATE TABLE IF NOT EXISTS shareddir_footprint (
+			datetime text,
+			dir text,
+			rank integer,
+			count integer,
+			footprint integer,
+			primary key (datetime,dir)
+			);
+			"""
+		]
+		self.write(CREATE_TABLE_SQLS)
+
diff --git a/User.py b/User.py
new file mode 100644
index 0000000..f235278
--- /dev/null
+++ b/User.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python
+
+import subprocess
+def _ncheck_output(cmds):
+	if type(cmds   )!=type([]): cmds = [cmds.split()]
+	if type(cmds[0])!=type([]): cmds = [cmds]
+	ncmds = len(cmds)
+	ps = ncmds*[None]
+	for i,cmd in enumerate(cmds):
+		if i==0:
+			ps[i] = subprocess.Popen(cmds[i],                       stdout=subprocess.PIPE, shell=False)
+		else:
+			ps[i] = subprocess.Popen(cmds[i], stdin=ps[i-1].stdout, stdout=subprocess.PIPE, shell=False)
+	for i in range(ncmds-1):
+		ps[i].stdout.close()
+	return ps[-1].communicate()[0]
+
+class User:
+	def __init__(self):
+		self.data = {}
+		self.is_loaded = False
+	def get_user(self,uid):
+		if not self.data:
+			self.load_data()
+		return self.data.get(int(uid),"N/A-" + str(uid))
+	def load_data(self):
+		for line in _ncheck_output('ssh head1 getent passwd').split("\n"):
+			sline = line.strip()
+			if not sline or sline[0]=='#': continue
+			fields = sline.split(":")
+			self.data[int(fields[2])] = fields[0]
+		self.is_loaded = True
+		
+
diff --git a/gpfs-scan-files.py b/gpfs-scan-files.py
new file mode 100755
index 0000000..cea3ac3
--- /dev/null
+++ b/gpfs-scan-files.py
@@ -0,0 +1,387 @@
+#!/usr/bin/python
+
+from __future__ import print_function
+
+
+#-----------------------------------------------------------------------
+#  Todo
+#-----------------------------------------------------------------------
+#
+#  Learn how to redirect stdout and stderr into files
+#
+
+#-----------------------------------------------------------------------
+#  Imports
+#-----------------------------------------------------------------------
+#  Standard modules
+import os,sys,time,getopt,tempfile,subprocess
+
+#  Custom modules
+import DB, User
+
+
+#-----------------------------------------------------------------------
+#  Configuration
+#-----------------------------------------------------------------------
+MMAPPLYPOLICY = '/usr/lpp/mmfs/bin/mmapplypolicy'
+TOTAL =         '/usr/local/bin/total'
+HTMLTEMPLATE  = '/gpfs/scratchfs1/admin/gpfs-scan-files/html.template'
+MMAPPLY_HOSTS = ['ddnnsd1.storrs.hpc.uconn.edu', 'ddnnsd2.storrs.hpc.uconn.edu']
+
+DEF_OUTDIR    = "/gpfs/scratchfs1/admin/gpfs-scan-files/tmp"
+
+THRESHOLD     = 1000000000000  #  Highlight sizes over 1TB
+
+
+#-----------------------------------------------------------------------
+#  Functions
+#-----------------------------------------------------------------------
+
+def print_error(msg):
+	print()
+	print("  ERROR:",msg)
+	print()
+	sys.exit()
+
+def print_warn(msg):
+	print()
+	print("  WARNING:",msg)
+	print()
+
+#  Accepts either 
+#    a single command represented by a single string
+#    a single command represented by an array of string components
+#    a series of commands represented by an array of an array of string components
+def _ncheck_output(cmds):
+	if type(cmds   )!=type([]): cmds = [cmds.split()]
+	if type(cmds[0])!=type([]): cmds = [cmds]
+	ncmds = len(cmds)
+	ps = ncmds*[None]
+	for i,cmd in enumerate(cmds):
+		if i==0:
+			ps[i] = subprocess.Popen(cmds[i],                       stdout=subprocess.PIPE, shell=False)
+		else:
+			ps[i] = subprocess.Popen(cmds[i], stdin=ps[i-1].stdout, stdout=subprocess.PIPE, shell=False)
+	for i in range(ncmds-1):
+		ps[i].stdout.close()
+	return ps[-1].communicate()[0]
+
+def get_hostname():
+	return os.uname()[1]
+
+def run_mmapplypolicy(exefile, target_dir, policy_file, temp_dir, out_dir):
+	#  Ensure we are runnning mmapplypolicy on the correct host
+	hostname = get_hostname()
+	if not hostname in MMAPPLY_HOSTS:
+		print_error("  When using the -s option, you must run on one of the DDN NSD nodes (%s)" % ",".join(MMAPPLY_HOSTS))
+	#  Create output directories
+	if not os.path.isdir(out_dir ): os.makedirs(out_dir )
+	if not os.path.isdir(temp_dir): os.makedirs(temp_dir)
+	#  Create policy file
+	with open(policy_file,"w") as fout:
+		print("RULE 'listall' list 'all-files'\nSHOW( varchar(access_time) || ' ' || varchar(kb_allocated) || '  ' || varchar(file_size) || ' ' || varchar(user_id) || ' ' || fileset_name  )", file=fout)
+	#  Run GPFS utility 'mmapplypolicy'
+	#    Notes about mmapplypolicy
+	#     - mmapplypolicy is the fastest way to get list of all files on scratch partition
+	#     - mmapplypolicy reads the RULES file that we wrote to temp file above
+	#     - Name of output data file is hard-coded in mmapplypolicy as "list.all-files"
+	#     - Status message are written to stdout and stderr
+	#     - It takes about 1.25 hours for mmapplypolicy to list all files on scratch disk
+	#     - Column output in list.all-files is
+	#           Inode_number, gen_number, snapshot_id, access_time, kb_allocated, file_size, "--", filename
+	#       The first three and last two columns are always present, the other columns are determined
+	#       from the SHOW policy command
+	cmd = "%(exefile)s %(target_dir)s -s %(temp_dir)s -I defer -P %(policy_file)s -f %(out_dir)s" % locals()
+	p = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+	out,err = p.communicate()
+	with open(out_dir + "/mm.out", "w") as fout: print(out,file=fout)
+	with open(out_dir + "/mm.err", "w") as ferr: print(err,file=ferr)
+	return p.returncode
+
+#  Dummy version of run_mmapplypolicy().  Instead of generating new output file, it copies one
+#  from TEST_OUTPUT.  So, you need to have that in place beforehand.
+def run_test(temp_dir, out_dir):
+	TEST_OUTPUT = '/gpfs/scratchfs1/admin/gpfs-scan-files/testfiles/list.all-files'
+	#  Create output directories
+	if not os.path.isdir(out_dir ): os.makedirs(out_dir )
+	if not os.path.isdir(temp_dir): os.makedirs(temp_dir)
+	cmd = [ 'cp', TEST_OUTPUT, out_dir+"/list.all-files" ]
+	p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+	out,err = p.communicate()
+	with open(out_dir + "/mm.out", "w") as fout: print(out,file=fout)
+	with open(out_dir + "/mm.err", "w") as ferr: print(err,file=ferr)
+	return p.returncode
+	
+def csvrow(row):
+	return ",".join(['"%s"' % str(col) for col in row])
+
+def htmlheader(headerstr):
+	row = [ str(col) for col in headerstr.split() ]
+	return "<tr><th>" + "</th><th>".join(row) + "</th></tr>\n"
+
+def htmlrow(*row,**args):
+	tclass = args.get('tclass','t')
+	#newrow = [ str(col) for col in row ]
+	#return "<tr><td>" + "</td><td>".join(newrow) + "</td></tr>"
+	return "<tr>" + ''.join( ["<td class='%s-%d'>%s</td>" % (tclass,i+1,str(col)) for i,col in enumerate(row)] ) + "</tr>\n"
+
+def ReadDB(dbfile,datetime=None):
+	db = DB.DB(dbfile)
+	#  Get most recent time
+	if datetime==None:
+		[[datetime]] = db.read('select max(datetime) from user_footprint;')
+	file_list  = db.read("select footprint,count,userid,user from user_footprint      where datetime='%s' order by footprint desc;" % datetime)
+	homedirs   = db.read("select footprint,count,dir         from homedir_footprint   where datetime='%s' order by footprint desc;" % datetime)
+	shareddirs = db.read("select footprint,count,dir         from shareddir_footprint where datetime='%s' order by footprint desc;" % datetime)
+	return datetime,file_list, homedirs, shareddirs
+
+
+def WriteDB(dbfile,datetime,file_list,homedirs,shareddirs):
+	db = DB.DB(dbfile)
+	#  Table user_footprint
+	for rank,(size,count,uid,username) in enumerate(file_list):
+		db.write("insert into user_footprint values('%s','%s','%s','%s','%s','%s');" % (datetime,uid,username,rank+1,count,size))
+	#  Table dir_footprint
+	for rank,(size,count,dirname) in enumerate(homedirs):
+		db.write("insert into homedir_footprint values('%s','%s','%s','%s','%s');" % (datetime,dirname,rank+1,count,size))
+	for rank,(size,count,dirname) in enumerate(shareddirs):
+		db.write("insert into shareddir_footprint values('%s','%s','%s','%s','%s');" % (datetime,dirname,rank+1,count,size))
+	db.close()
+
+
+def WriteCSV(csvfile,datetime,file_list,growths,homedirs,shareddirs):
+	with open(csvfile,'w') as fcsv:
+		#  Table user_footprint
+		print('"User Space Used"', file=fcsv)
+		print(csvrow('DateTime Rank UID User Count Growth Size'.split()), file=fcsv)
+		for rank,(size,count,uid,username) in enumerate(file_list):
+			growth = growths.get(username,'-')
+			print(csvrow([datetime,rank+1,uid,username,count,growth,size]), file=fcsv)
+		print(file=fcsv)
+
+		#  Table homedir_footprint
+		print('"Home Directories"', file=fcsv)
+		print(csvrow('DateTime Rank Home_Directory Count Size'.split()), file=fcsv)
+		for rank,(size,count,dirname) in enumerate(homedirs):
+			print(csvrow([datetime,rank+1,dirname,count,size]), file=fcsv)
+		print(file=fcsv)
+
+		#  Table shareddir_footprint
+		print('"Shared Directories"', file=fcsv)
+		print(csvrow('DateTime Rank Shared_Directory Count Size'.split()), file=fcsv)
+		for rank,(size,count,dirname) in enumerate(shareddirs):
+			print(csvrow([datetime,rank+1,dirname,count,size]), file=fcsv)
+		print(file=fcsv)
+
+
+def WriteHTML(htmlfile,datetime,file_list,growths,homedirs,shareddirs,topn=100):
+	with open(HTMLTEMPLATE) as fin:
+		htmltemplate = fin.read()
+	#  Set datetime
+	htmltemplate = htmltemplate.replace('{{TIMESTAMP}}',datetime)
+	#  Table user_footprint
+	msg  = "<table>\n"
+	msg += htmlheader('Rank UID User Count Growth Size Cumulative')
+	cumsize = cumcount = 0
+	for rank,(size,count,uid,username) in enumerate(file_list):
+		cumsize  += size
+		cumcount += count
+		growth    = growths.get(username,'-')
+		if rank+1<=topn:
+			msg += htmlrow(rank+1,uid,username,comma(count),hilite_size(comma(growth),growth,THRESHOLD),comma(size),comma(cumsize),tclass='t1')
+	cumgrowth = sum(growths.values())
+	msg += htmlrow(len(file_list),'ALL','',comma(cumcount),comma(cumgrowth),0,comma(cumsize),tclass='t1')
+	msg += "</table>\n"
+	htmltemplate = htmltemplate.replace('{{USER_TABLE}}',msg)
+	#  Table homedir_footprint
+	msg  = "<table>\n"
+	msg += htmlheader('Rank Home_Directory Count Size Cumulative')
+	cumsize = 0
+	for rank,(size,count,dirname) in enumerate(homedirs):
+		cumsize  += size
+		msg += htmlrow(rank+1,dirname,comma(count),comma(size),comma(cumsize),tclass='t2')
+	msg += "</table>\n"
+	htmltemplate = htmltemplate.replace('{{HOMEDIR_TABLE}}',msg)
+	#  Table shareddir_footprint
+	msg  = "<table>\n"
+	msg += htmlheader('Rank Shared_Directory Count Size Cumulative')
+	cumsize = 0
+	for rank,(size,count,dirname) in enumerate(shareddirs):
+		cumsize  += size
+		msg += htmlrow(rank+1,dirname,comma(count),comma(size),comma(cumsize),tclass='t3')
+	msg += "</table>\n"
+	htmltemplate = htmltemplate.replace('{{SHAREDDIR_TABLE}}',msg)
+	#  Configure date navigation links
+	htmltemplate = htmltemplate.replace('{{BEFORE}}',add_date(datetime,-1))
+	htmltemplate = htmltemplate.replace('{{AFTER}}', add_date(datetime, 1))
+	#  Write final version of page
+	with open(htmlfile,"w") as fout:
+		fout.write(htmltemplate)
+	
+def comma(s):
+    new = ''
+    #sign, s = '' if s>-0 else '-', abs(int(s))
+    s = str(s)
+    for i,c in enumerate(s[-1:0:-1]):
+            new = c + new
+            if (i % 3) == 2:
+                    new = "," + new
+    return (s[0] + new).replace("-,","-")
+
+#  Make string bold if size reaches or exceeds threshold
+def hilite_size(strval,size,threshold):
+	if size>=threshold:
+		return '<span class="threshold">' + strval + '</span>'
+	elif size<=-threshold:
+		return '<span class="thresholdneg">' + strval + '</span>'
+	else:
+		return strval
+
+
+#  Reads in datetime string "%Y-%m-%d %H:%M:%S", but returns date string
+def add_date(datetime, days):
+	secs = time.mktime(time.strptime(datetime, "%Y-%m-%d %H:%M:%S")) + days*24*3600
+	return time.strftime("%Y-%m-%d",time.localtime(secs))
+
+
+#  Read arguments from command line
+def parse_args(args):
+	try:
+		opts,args = getopt.getopt(sys.argv[1:],'s:p:D:C:H:hId:o:')
+	except getopt.GetoptError as e:
+		Usage("   ERROR: " + str(e))
+	opts = dict(opts)
+	if not opts or '-h' in opts: Usage()   #   Print usage
+	##if '-s' in opts and not '-p' in opts: 
+	##	Usage("You must use -s and -p together")
+	if '-p' in opts and not ( ('-D' in opts) + ('-C' in opts) + ('-H' in opts) ):
+		Usage(" You must use -p with at least one of -D, -C and -H")
+	if '-o' in opts and not '-s' in opts:
+		Usage(" You can only use -o with the -s option")
+	if '-I' in opts and not '-D' in opts:
+		Usage("You must specify a database using -D when using -I")
+	if '-D' in opts and not ('-C' in opts or '-H' in opts or '-I' in opts or '-p' in opts):
+		Usage("You must use either -C, -H, -p, or -I, when using -D")
+	opts['writedb'] = (('-s' in opts) or ('-p' in opts)) and ('-D' in opts)
+	opts['readdb' ] = ('-D' in opts) and ('-C' in opts or '-H' in opts)
+	return opts, args
+
+def Usage(msg=None):
+	print("""
+   Usage:  gpfs-scan-files.py [-s directory] [-p filelist] [-D dbname] [-C csvfile] [-H htmlfile] [-h] [-I]
+
+   Options: 
+    -d DATETIME   Set datetime (format YYYY-mm-dd HH:MM:SS) to write to DB, CSV or HTML
+                  when using -s and/or -D.
+    -s DIRECTORY  Scan directory recursively using mmapplypolicy.  Mmapplypolicy output
+                  will be written to tmp/list.all-files, where tmp is a directory
+                  created under the current directory (i.e., not the /tmp directory)
+    -p MMOUTPUT   Parse mmapplyoutput and write to DB, CSV and/or HTML.  MMOUTPUT
+                  is the file created from the -s option above.
+    -D DBNAME     Name of database file 
+    -I            Create new empty datbase file in DBNAME given by -D
+    -o            Output directory for mmapplypolicy, must be on scratch partition. 
+                  Only valid with -s option.  Default value is %s
+""" % DEF_OUTDIR )
+	if msg:
+		print();print(msg);print()
+	sys.exit()
+
+
+def read_compare(dbfile,datetime):
+	db = DB.DB(dbfile)
+	#  Get all datetimes to find datetime previous to requested datetime
+	datetimes = db.read("select distinct(datetime) from user_footprint order by datetime;")
+	prev_datetime = None
+	for [curr_datetime] in datetimes:
+		if prev_datetime and curr_datetime==datetime:
+			break
+		prev_datetime = curr_datetime
+	if not prev_datetime:
+		print_error("Could not find previous datetime to %s" % datetime)
+	#  Get files for two datetimes
+	files0  = dict(db.read("select user,footprint from user_footprint where datetime='%s' order by footprint desc;" % prev_datetime))
+	files1  = dict(db.read("select user,footprint from user_footprint where datetime='%s' order by footprint desc;" % datetime)     )
+	#  Combine the two lists
+	return dict([ (user,files1[user]-files0[user]) for user in set(files0.keys()).intersection(files1.keys()) ])
+
+#-----------------------------------------------------------------------
+#  Main
+#-----------------------------------------------------------------------
+def main():
+
+	opts, args = parse_args(sys.argv[1:])
+	if '-d' in opts:
+		datetime = opts['-d']
+		db_read_datetime = datetime  #  used to retreive DB data
+	else:
+		datetime    = time.strftime("%Y-%m-%d %H:%M:%S")
+		db_read_datetime = None
+
+
+	#  Initialize database
+	if '-I' in opts:
+		import DB
+		db = DB.DB(opts['-D'])
+		db.create_tables()
+
+	#  Scan directories with mmapplypolicy, and write to list.all-files in out_dir
+	if '-s' in opts:
+		out_dir     = opts['-o'] if '-o' in opts else tempfile.mkdtemp(DEF_OUTDIR)
+		temp_dir    = out_dir + "/tmp"
+		policy_file = out_dir + "/policy_file"
+		target_dir  = opts['-s']
+		rc = run_mmapplypolicy(MMAPPLYPOLICY,target_dir,policy_file,temp_dir,out_dir)
+		#rc = run_test(temp_dir,out_dir)
+		if rc>0: print_warn(" mmapplypolicy return code is (%d)" % rc)
+
+	if '-p' in opts:
+		#  Get list of total files sizes per owner
+		inputfile = opts['-p']
+		file_list = []
+		user = User.User()
+		for line in _ncheck_output([[TOTAL, "-s1r", "8", "6,n", inputfile]]).split("\n"):
+			sline = line.strip()
+			if not sline or sline[0]=='#': continue
+			uid, kb_allocated, count = sline.split()
+			#  Size is number of kb block allocated, plus 4096 per inode (assume #files==#inodes)
+			size = int(kb_allocated) * 1024 + int(count) * 4096
+			file_list.append((size,count,uid,user.get_user(uid)))
+		file_list.sort(key=lambda x: x[0], reverse=True)
+
+		#  Get list of total files kb_allocateds per home and shared directories
+		homedirs, shareddirs = [], []
+		for line in _ncheck_output([['sed','s#/# #g',inputfile],['/usr/local/bin/total','-s1r','13,14','6,n','-']]).split("\n"):
+			sline = line.strip()
+			if not sline or sline[0]=='#': continue
+			home_or_shared, name, kb_allocated, count = sline.split()
+			#  Size is number of kb block allocated, plus 4096 per inode (assume #files==#inodes)
+			size = int(kb_allocated) * 1024 + int(count) * 4096
+			if home_or_shared=='home':
+				homedirs.append((size,count,name))
+			elif home_or_shared=='shared':
+				shareddirs.append((size,count,name))
+		homedirs.sort(key=lambda x: x[0], reverse=True)
+		shareddirs.sort(key=lambda x: x[0], reverse=True)
+
+	#  Write data to database
+	if opts['writedb']:
+		WriteDB(opts['-D'],datetime,file_list,homedirs,shareddirs)
+
+	#  Read data from database
+	if opts['readdb']:
+		datetime, file_list, homedirs, shareddirs = ReadDB(opts['-D'],datetime=db_read_datetime)
+		growth = read_compare(opts['-D'], datetime) # Use datetime from previous search
+
+	#  Write three tables in one csv file/sheet
+	if '-C' in opts:
+		WriteCSV(opts['-C'],datetime,file_list,growth,homedirs,shareddirs)
+
+	#  Write three tables in HTML file
+	if '-H' in opts:
+		WriteHTML(opts['-H'],datetime,file_list,growth,homedirs,shareddirs)
+		
+				
+		
+	
+main()