diff --git a/cleanup-roster.py b/cleanup-roster.py new file mode 100644 index 0000000..b6ffe0c --- /dev/null +++ b/cleanup-roster.py @@ -0,0 +1,98 @@ +# import system module +import sys, re, argparse +import csv + +class Students: + + def __init__(self): + self.id = "" + self.name = "NameNotSet" + self.program = "" + self.netid = "" + self.email = "" + self.section = "" + self.fields = [] + + def add_field(self, s): + # replace & + s = re.sub(r"&", "&", s) + # remove br tag + s = re.sub(r"", "", s) + self.fields.append(s) + col = len(self.fields) - 1 + # print(col, s) + if col == 1: + self.id = s + elif col == 2: + self.name = s + elif col == 5: + self.program = s + elif col == 8: + self.netid = s + elif col == 10: + self.email = s + elif col == 13: + self.section = s + + def get_fields(self, fdno): + # could provide more options to get customized fields + if len(fdno) == 0: # only a short list + return (self.section, self.id, self.netid, self.name, self.email) + elif len(fdno) == 1 and fdno[0] == 'all': + return self.fields + else: + return [self.fields[int(_)] for _ in fdno] + +def write_csv(file, student_list, fields): + csvwriter = csv.writer(file) + for s in student_list: + csvwriter.writerow(s.get_fields(fields)) + +parser = argparse.ArgumentParser(description='Clean up downloaded roster from HuskyCT') +parser.add_argument('infile', nargs='?', help='Input file.') +parser.add_argument('-o', nargs='?', default='', const='', help='Output file.') +parser.add_argument('-f', nargs='+', default='', help='List of field numbers.') + +args = parser.parse_args() +# print(args) + +student_list = [] +field_num = -1 +student = None +previous = '' + +with open(args.infile, 'r') as file: + num_lines = 0 + for line in file: + num_lines += 1 + + if line.startswith('') or line.startswith(''): + if student is not None: + student_list.append(student) + field_num = -1 + student = None + + # remove spaces at the end of the line, including newline + line = line.rstrip() + + if line.startswith(''): + previous = line + continue + + # now we have a line that starts with "" + m = re.search(r'^]*>(.*)', line) + if m: + if student is None: + student = Students() + student.add_field(m.group(1)) + +if args.o != '': + with open(args.o, 'w', newline='') as csvfile: + write_csv(csvfile, student_list, args.f) +else: + write_csv(sys.stdout, student_list, args.f) diff --git a/readme.MD b/readme.MD new file mode 100644 index 0000000..39b408f --- /dev/null +++ b/readme.MD @@ -0,0 +1,32 @@ +## A Python script that cleans up rosters downloaded from HuskyCT + +The downloaded file is actually an HTML file although Excel can +extract tables from it. The format may have changed recently. Now +a student's info is on multiple rows. This script reads a downloaded +file and writes selected columns into a CSV file. + +### Usage + +Assume all files are under the current directory. + +The following command prints info on the screen. + +``` +python cleanup-roster.py downloaded.xls +``` + +The `-o` option specifies an output file. + +``` +python cleanup-roster.py downloaded.xls -o section1.csv +``` + +The `-f` option specifies the fields to be included. `all` +means all fields. Without `-f` option, only a few most useful +fields are printed. + +``` +python cleanup-roster.py downloaded.xls -f 1 2 +python cleanup-roster.py downloaded.xls -f all +``` +