Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
traffic_dataproc/scr_006_gps_rids.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
74 lines (54 sloc)
2.16 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Write the region IDs for each pickup and dropoff in the GPS data. | |
For each row in --filename such as "./data/gps_part-r-000.pkl", | |
write the corresponding list of pickup region IDs to string -o | |
""" | |
import pandas as pd | |
import numpy as np | |
from math import floor, ceil | |
from matplotlib import path | |
import argparse | |
def process_line(line): | |
# Not sure what these values are used for tbh! | |
val1 = line[0] | |
val2 = line[1] | |
polyline = [float(v) for v in line[2:]] | |
polyline = np.array(list(zip(polyline[0::2], polyline[1::2]))) | |
return val1, val2, polyline | |
def point_to_rids(point, polypaths, radius=0.0): | |
RIDs = [] | |
for rid, polypath in enumerate(polypaths): | |
if polypath.contains_point(point, radius=radius): | |
RIDs.append(rid) | |
return RIDs | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--filename", "-f", | |
help="Filename of OD data to look at.", | |
type=str, nargs=1) | |
parser.add_argument("--outname", "-o", | |
help="Filename to explort CSV of RIDs to", | |
type=str, nargs=1) | |
args = parser.parse_args() | |
fn = args.filename[0] | |
print(f"Processing {fn}") | |
with open("./DataForUConn/DecideRegion/area.txt") as f: | |
area = [process_line(line.split(",")) for line in f.read().strip().split("\n")] | |
polypaths = [path.Path(line[2][1:]) for line in area] | |
df = pd.read_pickle(fn) | |
RIDs_outfile = open(args.outname[0], "w+") | |
for ii in range(len(df)): | |
if ii % 10000 == 0: | |
print(f" Processing point {ii} of {len(df)}...") | |
# Log points that correspond to 0 or multiple RIDs. | |
point = (df["lon"][ii], df["lat"][ii]) | |
# List of region IDs for pickup, dropoff | |
list_of_RIDs = point_to_rids(point, polypaths=polypaths) | |
RIDs_outfile.write(str(list_of_RIDs)[1:-1]) | |
RIDs_outfile.write("\n") | |
RIDs_outfile.close() | |
""" | |
Usage: | |
python -i scr_006_gps_rids.py -f ./data/gps_part-r-000.pkl -o ./data/gps_rid-000.csv | |
python -i scr_006_gps_rids.py -f ./data/gps_part-r-001.pkl -o ./data/gps_rid-001.csv | |
""" |