Skip to content

Commit

Permalink
remove hard coded file name. update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
Jerry Shi committed Nov 16, 2022
1 parent 3e74c27 commit 4f47bc1
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
11 changes: 6 additions & 5 deletions cleanup-roster.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

def is_hot_major(m, checksecond = False):
if checksecond:
# some records are appended " as Second" and some " Second"
if not m.endswith("Second"):
return ""
m = m[-10:]

if m.startswith("Computer Science & Engineering"):
major = "CSE"
Expand All @@ -24,8 +24,9 @@ def is_hot_major(m, checksecond = False):
return major

def get_major_from_program(prog):
m = re.findall(r"[-/]([^/]+)", prog)
m = re.findall(r"(?: -|/)([^/]+)", prog)

print(m)
assert len(m) > 0

major = is_hot_major(m[0])
Expand Down Expand Up @@ -68,6 +69,7 @@ def add_header(self, line):

def add(self, line):
# replace &
logging.debug(line)
line1 = re.sub(r"&", "&", line)
# remove br tag
# s = re.sub(r"<br\s*/>", "", s)
Expand Down Expand Up @@ -134,7 +136,7 @@ def is_tr(s):
def is_end_of_row(s):
return s.startswith('<tr>') or s.startswith('</table>')

parser = argparse.ArgumentParser(description='Clean up downloaded roster from HuskyCT')
parser = argparse.ArgumentParser(description='Clean up downloaded roster from HuskyCT. Nov 2022.')
parser.add_argument('infiles', nargs='*', default=[], help='Input files.')
parser.add_argument('-o', nargs='?', default='', const='', help='Output file.')
parser.add_argument('-f', nargs='+', default=[], help='Add the list of fields to the default list')
Expand All @@ -155,8 +157,7 @@ def is_end_of_row(s):
state = State.START
row = ''
try:
# for line in fileinput.input(args.infiles):
for line in fileinput.input("ps.xls"):
for line in fileinput.input(args.infiles):
# remove spaces at the end of the line, including newline
line = line.rstrip()

Expand Down
19 changes: 19 additions & 0 deletions readme.MD
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,22 @@ cat *.xls | py cleanup-roster.py -o all.csv
# save student records in separate csv files
dir *.xls | foreach { py cleanup-roster.py $_ -o ($_.name -replace '.xls','.csv') }
```

## Notes

### Nov 2022

The format of downloaded files keeps changing, mainly in "Program and Plan" field.
The format in Nov 2022 is:

<school> - <\r><major>
<school> - <\r><major>/<minor>
<school> - <\r><major>/<minor1/>/<minor2>
<school> - <\r><major>/<major2> as Second
<school> - <\r><major>/<major2> Second

The school can be :

Engineering
Liberal Arts & Sciences
CCS Non-Degree

0 comments on commit 4f47bc1

Please sign in to comment.