diff --git a/cleanup-roster-html.py b/cleanup-roster-html.py
deleted file mode 100644
index 0623a40..0000000
--- a/cleanup-roster-html.py
+++ /dev/null
@@ -1,187 +0,0 @@
-#!/usr/bin/python3
-import sys, argparse, fileinput, logging
-import csv, re
-from enum import Enum, auto
-
-def is_hot_major(m, checksecond = False):
- if checksecond:
- # some records are appended " as Second" and some " Second"
- if not m.endswith("Second"):
- return ""
-
- if m.startswith("Computer Science & Engineering"):
- major = "CSE"
- elif m.startswith("Computer Science"):
- major = "CS"
- elif m.startswith("Computer Engineering"):
- major = "CompE"
- elif m.startswith("Electrical Engineering"):
- major = "EE"
- else:
- if m.startswith("Computer"):
- logging.warning(f"Program '{m}' starts with Computer.")
- major = ""
- return major
-
-def get_major_from_program(prog):
- m = re.findall(r"(?: -|/)([^/]+)", prog)
-
- assert len(m) > 0
-
- major = is_hot_major(m[0])
-
- if major == "":
- if len(m) > 1:
- for p in m[1:]:
- major = is_hot_major(p, True)
- if major:
- break
- if major == "":
- major = m[0]
- logging.debug(f"{m} ==> {major}")
- return major
-
-class State(Enum):
- START = auto()
- ROW = auto()
-
-class Students:
-
- def __init__(self):
- self.name_to_idx = {}
- self.student_list = []
- self.field_list = []
- self.output_fields = ["section", "id", "netid", "name", "email"]
-
- def add_header(self, line):
- if len(self.name_to_idx):
- return
- m = re.findall(r'
]*>(.*?) | ', line, re.IGNORECASE)
- idx = 0
- self.field_list = []
- for f in m:
- shortname = f.split(maxsplit=1)[0].lower()
- self.name_to_idx[shortname] = idx
- self.field_list.append(shortname)
- idx += 1
- logging.info("Field names are: " + ','.join(self.field_list))
-
- def add(self, line):
- # replace &
- logging.debug(line)
- line1 = re.sub(r"&", "&", line)
- # remove br tag
- # s = re.sub(r"
", "", s)
- m = re.findall(r']*>(.*?) | ', line1, re.IGNORECASE)
- if m:
- if len(m) != len(self.field_list):
- logging.warning("The number of fields does not seem correct: "
- + ','.join(m))
- self.student_list.append(m)
- else:
- self.add_header(line1)
-
- def __iter__(self):
- self.idx = 0
- return self
-
- def __next__(self):
- if self.idx >= len(self.student_list):
- raise StopIteration
- # print(self.idx, self.student_list[self.idx])
- record = [ self.student_list[self.idx][self.name_to_idx[f]] for f in self.output_fields ]
- self.idx += 1
- return record
-
- def __str__(self):
- # show all fields
- return '\n'.join([ str(s) for s in self.student_list])
-
- def set_major(self):
- idx = self.name_to_idx['program']
- self.name_to_idx['major'] = len(self.field_list)
- self.field_list.append('major')
- for s in self.student_list:
- s.append(get_major_from_program(s[idx]))
-
- def set_fields(self, fsel):
- if len(fsel) == 0:
- return
-
- # output all fields
- if len(fsel) == 1 and fsel[0] == 'all':
- self.output_fields = list(self.field_list)
- return
-
- self.output_fields = []
- self.add_fields(fsel)
-
- def add_fields(self, fields):
- for f in [ f.lower() for f in fields]:
- assert f in self.field_list
- self.output_fields.append(f)
-
-def write_csv(file, students, nl = None):
- if nl is None:
- csvwriter = csv.writer(file)
- else:
- csvwriter = csv.writer(file, lineterminator=nl)
- for s in students:
- csvwriter.writerow(s)
-
-def is_tr(s):
- return s.startswith('') or s.startswith('')
-
-parser = argparse.ArgumentParser(description='Clean up downloaded roster from HuskyCT. Nov 2022.')
-parser.add_argument('infiles', nargs='*', default=[], help='Input files.')
-parser.add_argument('-o', nargs='?', default='', const='', help='Output file.')
-parser.add_argument('-f', nargs='+', default=[], help='Add the list of fields to the default list')
-parser.add_argument('--fields', nargs='+', default=[], help='Set the list of fields to print.')
-parser.add_argument("-v", action='count', default=0, help='Verbose level.')
-
-args = parser.parse_args()
-
-if args.v == 1:
- logging.basicConfig(level=logging.INFO)
-elif args.v > 1:
- logging.basicConfig(level=logging.DEBUG)
-
-logging.debug(args)
-
-students = Students()
-
-state = State.START
-row = ''
-try:
- for line in fileinput.input(args.infiles):
- # remove spaces at the end of the line, including newline
- line = line.rstrip()
-
- if state == State.ROW:
- if is_end_of_row(line):
- students.add(row)
- state = State.START
- else:
- row += line
- continue
-
- # looking for a row
- if is_tr(line):
- row = line
- state = State.ROW
-
-except FileNotFoundError as e:
- exit(1)
-
-students.set_major()
-students.set_fields(args.fields)
-students.add_fields(args.f)
-
-if args.o != '':
- with open(args.o, 'w', newline='') as csvfile:
- write_csv(csvfile, students)
-else:
- write_csv(sys.stdout, students, '\n')