From 4f47bc1d0b9b99a3e00d77cd83fb0221c50cd472 Mon Sep 17 00:00:00 2001 From: Jerry Shi Date: Tue, 15 Nov 2022 22:16:25 -0500 Subject: [PATCH] remove hard coded file name. update readme --- cleanup-roster.py | 11 ++++++----- readme.MD | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/cleanup-roster.py b/cleanup-roster.py index 9efc0e5..89ab14e 100644 --- a/cleanup-roster.py +++ b/cleanup-roster.py @@ -5,9 +5,9 @@ def is_hot_major(m, checksecond = False): if checksecond: + # some records are appended " as Second" and some " Second" if not m.endswith("Second"): return "" - m = m[-10:] if m.startswith("Computer Science & Engineering"): major = "CSE" @@ -24,8 +24,9 @@ def is_hot_major(m, checksecond = False): return major def get_major_from_program(prog): - m = re.findall(r"[-/]([^/]+)", prog) + m = re.findall(r"(?: -|/)([^/]+)", prog) + print(m) assert len(m) > 0 major = is_hot_major(m[0]) @@ -68,6 +69,7 @@ def add_header(self, line): def add(self, line): # replace & + logging.debug(line) line1 = re.sub(r"&", "&", line) # remove br tag # s = re.sub(r"", "", s) @@ -134,7 +136,7 @@ def is_tr(s): def is_end_of_row(s): return s.startswith('') or s.startswith('') -parser = argparse.ArgumentParser(description='Clean up downloaded roster from HuskyCT') +parser = argparse.ArgumentParser(description='Clean up downloaded roster from HuskyCT. Nov 2022.') parser.add_argument('infiles', nargs='*', default=[], help='Input files.') parser.add_argument('-o', nargs='?', default='', const='', help='Output file.') parser.add_argument('-f', nargs='+', default=[], help='Add the list of fields to the default list') @@ -155,8 +157,7 @@ def is_end_of_row(s): state = State.START row = '' try: -# for line in fileinput.input(args.infiles): - for line in fileinput.input("ps.xls"): + for line in fileinput.input(args.infiles): # remove spaces at the end of the line, including newline line = line.rstrip() diff --git a/readme.MD b/readme.MD index fc36fca..fda41e3 100644 --- a/readme.MD +++ b/readme.MD @@ -46,3 +46,22 @@ cat *.xls | py cleanup-roster.py -o all.csv # save student records in separate csv files dir *.xls | foreach { py cleanup-roster.py $_ -o ($_.name -replace '.xls','.csv') } ``` + +## Notes + +### Nov 2022 + +The format of downloaded files keeps changing, mainly in "Program and Plan" field. +The format in Nov 2022 is: + + - <\r> + - <\r>/ + - <\r>// + - <\r>/ as Second + - <\r>/ Second + +The school can be : + + Engineering + Liberal Arts & Sciences + CCS Non-Degree