From 449c315353514023d628d52cb546ce3ba4152e7c Mon Sep 17 00:00:00 2001 From: Jerry Shi Date: Sun, 24 Dec 2023 07:10:04 -0500 Subject: [PATCH] rm the old version --- cleanup-roster-html.py | 187 ----------------------------------------- 1 file changed, 187 deletions(-) delete mode 100644 cleanup-roster-html.py diff --git a/cleanup-roster-html.py b/cleanup-roster-html.py deleted file mode 100644 index 0623a40..0000000 --- a/cleanup-roster-html.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/python3 -import sys, argparse, fileinput, logging -import csv, re -from enum import Enum, auto - -def is_hot_major(m, checksecond = False): - if checksecond: - # some records are appended " as Second" and some " Second" - if not m.endswith("Second"): - return "" - - if m.startswith("Computer Science & Engineering"): - major = "CSE" - elif m.startswith("Computer Science"): - major = "CS" - elif m.startswith("Computer Engineering"): - major = "CompE" - elif m.startswith("Electrical Engineering"): - major = "EE" - else: - if m.startswith("Computer"): - logging.warning(f"Program '{m}' starts with Computer.") - major = "" - return major - -def get_major_from_program(prog): - m = re.findall(r"(?: -|/)([^/]+)", prog) - - assert len(m) > 0 - - major = is_hot_major(m[0]) - - if major == "": - if len(m) > 1: - for p in m[1:]: - major = is_hot_major(p, True) - if major: - break - if major == "": - major = m[0] - logging.debug(f"{m} ==> {major}") - return major - -class State(Enum): - START = auto() - ROW = auto() - -class Students: - - def __init__(self): - self.name_to_idx = {} - self.student_list = [] - self.field_list = [] - self.output_fields = ["section", "id", "netid", "name", "email"] - - def add_header(self, line): - if len(self.name_to_idx): - return - m = re.findall(r']*>(.*?)', line, re.IGNORECASE) - idx = 0 - self.field_list = [] - for f in m: - shortname = f.split(maxsplit=1)[0].lower() - self.name_to_idx[shortname] = idx - self.field_list.append(shortname) - idx += 1 - logging.info("Field names are: " + ','.join(self.field_list)) - - def add(self, line): - # replace & - logging.debug(line) - line1 = re.sub(r"&", "&", line) - # remove br tag - # s = re.sub(r"", "", s) - m = re.findall(r']*>(.*?)', line1, re.IGNORECASE) - if m: - if len(m) != len(self.field_list): - logging.warning("The number of fields does not seem correct: " - + ','.join(m)) - self.student_list.append(m) - else: - self.add_header(line1) - - def __iter__(self): - self.idx = 0 - return self - - def __next__(self): - if self.idx >= len(self.student_list): - raise StopIteration - # print(self.idx, self.student_list[self.idx]) - record = [ self.student_list[self.idx][self.name_to_idx[f]] for f in self.output_fields ] - self.idx += 1 - return record - - def __str__(self): - # show all fields - return '\n'.join([ str(s) for s in self.student_list]) - - def set_major(self): - idx = self.name_to_idx['program'] - self.name_to_idx['major'] = len(self.field_list) - self.field_list.append('major') - for s in self.student_list: - s.append(get_major_from_program(s[idx])) - - def set_fields(self, fsel): - if len(fsel) == 0: - return - - # output all fields - if len(fsel) == 1 and fsel[0] == 'all': - self.output_fields = list(self.field_list) - return - - self.output_fields = [] - self.add_fields(fsel) - - def add_fields(self, fields): - for f in [ f.lower() for f in fields]: - assert f in self.field_list - self.output_fields.append(f) - -def write_csv(file, students, nl = None): - if nl is None: - csvwriter = csv.writer(file) - else: - csvwriter = csv.writer(file, lineterminator=nl) - for s in students: - csvwriter.writerow(s) - -def is_tr(s): - return s.startswith('') or s.startswith('') - -parser = argparse.ArgumentParser(description='Clean up downloaded roster from HuskyCT. Nov 2022.') -parser.add_argument('infiles', nargs='*', default=[], help='Input files.') -parser.add_argument('-o', nargs='?', default='', const='', help='Output file.') -parser.add_argument('-f', nargs='+', default=[], help='Add the list of fields to the default list') -parser.add_argument('--fields', nargs='+', default=[], help='Set the list of fields to print.') -parser.add_argument("-v", action='count', default=0, help='Verbose level.') - -args = parser.parse_args() - -if args.v == 1: - logging.basicConfig(level=logging.INFO) -elif args.v > 1: - logging.basicConfig(level=logging.DEBUG) - -logging.debug(args) - -students = Students() - -state = State.START -row = '' -try: - for line in fileinput.input(args.infiles): - # remove spaces at the end of the line, including newline - line = line.rstrip() - - if state == State.ROW: - if is_end_of_row(line): - students.add(row) - state = State.START - else: - row += line - continue - - # looking for a row - if is_tr(line): - row = line - state = State.ROW - -except FileNotFoundError as e: - exit(1) - -students.set_major() -students.set_fields(args.fields) -students.add_fields(args.f) - -if args.o != '': - with open(args.o, 'w', newline='') as csvfile: - write_csv(csvfile, students) -else: - write_csv(sys.stdout, students, '\n')