Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Quantizing code\n",
"\n",
"### Demand\n",
"\n",
"Demand is a lot easier to get. Demand is the number of pickups in a given timeslot in a given region. We can process each OD row one by one, incrementing the corresponding cell indexed by (timestep, regionID).\n",
"\n",
"\n",
"### Status\n",
"\n",
"We want to get the **status** of cars (occupied, vacant, low-battery, or charging) for a given timeslot for a given region ID. This is going to be a bit difficult to handle! Specific questions...\n",
"\n",
" * To clarify: We want to record status per timeslot and per region ID, correct?\n",
" * Can we just report occupied, and infer vacant? (Otherwise, we'd need to track per-car.)\n",
" * How do we infer low-battery?\n",
" * We don't have what we need yet to infer charging.\n",
" * Should we update region ID per car ID by the `gps` files?\n",
"\n",
"\n",
"### Supply\n",
"\n",
"Supply is going to be more difficult to process. I should expect to hear from Sihong with details about this.\n",
"\n",
"---"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Our codebase so far\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"from math import floor, ceil\n",
"from matplotlib import path\n",
"\n",
"# Just some code from our previous files...\n",
"\n",
"# List of filenames of the original data\n",
"original_od_fns = [\"./DataForUConn/201407OD/201407.gz/part-m-{x}\".format(x=str(x).rjust(5,\"0\")) for x in range(12)]\n",
"original_gps_fns = [\"./DataForUConn/201407GPS/part-r-{x}\".format(x=str(x).rjust(5,\"0\")) for x in range(2)]\n",
"\n",
"# Time string to unix time converter\n",
"formattime = lambda timestr: int(time.mktime(time.strptime(timestr, \"%Y-%m-%dT%H:%M:%S.000Z\")))\n",
"\n",
"# Headers\n",
"od_headers = [\"id\", \"ptime\", \"dtime\", \"plon\", \"plat\", \"dlon\", \"dlat\"]\n",
"gps_headers = [\"id\", \"color\", \"lon\", \"lat\", \"time\", \"speed\", \"noMeaning\"]\n",
"\n",
"# Filenames of our pickles\n",
"gps_fns = [\"./data/gps_part-r-{x}.pkl\".format(x=str(x).rjust(3, \"0\")) for x in range(2)]\n",
"od_fns = [\"./data/od_part-m-{x}.pkl\".format(x=str(x).rjust(3, \"0\")) for x in range(12)]\n",
"\n",
"# Region IDs corresponding to each row of a given dataframe.\n",
"# These were processed in scr_005 and scr_006\n",
"# e.g. od_pick-004.csv row 12345 is an integer RegionID, mapping the lat/lon of of row 12345 of dataframe od_part-m-004.pkl\n",
"gps_rid_fns = [\"./data/gps_rid-{x}.csv\".format(x=str(x).rjust(3,\"0\")) for x in range(2)]\n",
"od_pick_rid_fns = [\"./data/od_pick-{x}.csv\".format(x=str(x).rjust(3,\"0\")) for x in range(12)]\n",
"od_drop_rid_fns = [\"./data/od_drop-{x}.csv\".format(x=str(x).rjust(3,\"0\")) for x in range(12)]\n",
"\n",
"# Start, end times\n",
"start_time = 1404360000\n",
"end_time = 1405828798\n",
"\n",
"def time_to_index(tt, st = 1404360000, divisor=60):\n",
" return floor((tt-st)/divisor)\n",
"\n",
"NN = ceil((end_time - start_time)/60)\n",
"\n",
"# Read the processed CSVs\n",
"def _safe_to_int(cell):\n",
" # some cells are empty, so, return None instead of int\n",
" try:\n",
" return int(cell)\n",
" except ValueError:\n",
" return None\n",
"\n",
"def read_rid_csv(fn):\n",
" with open(fn) as f:\n",
" return [[_safe_to_int(cell) for cell in line.split(\",\")] for line in f.read().strip().split(\"\\n\")]"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Our (timestep, regionID) array!\n",
"demand = np.zeros((NN, 492))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def get_demand_from_files(od_fn, pick_rid_fn, number_of_timesteps, number_of_regions, verbose=True):\n",
" oddf = pd.read_pickle(od_fn)\n",
" pick_rids = read_rid_csv(pick_rid_fn)\n",
" \n",
" # They should have the same amount of rows!\n",
" assert len(oddf) == len(pick_rids), \"OD DF and PICK RIDs don't match! What gives?\"\n",
" \n",
" demand = np.zeros((number_of_timesteps, number_of_regions))\n",
" \n",
" for row_idx in range(len(oddf)):\n",
" if verbose and row_idx % 50000 == 0:\n",
" print(f\" Processing row {row_idx} of {len(oddf)}\")\n",
" ts = time_to_index(oddf[\"ptime\"][row_idx])\n",
" rid = pick_rids[row_idx][0] # Choose the first region ID in the list.\n",
" \n",
" if rid is not None:\n",
" # skip if rid is None.\n",
" demand[ts][rid] += 1\n",
" \n",
" return demand"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing ./data/od_part-m-000.pkl\n",
" Processing row 0 of 753817\n",
" Processing row 50000 of 753817\n",
" Processing row 100000 of 753817\n",
" Processing row 150000 of 753817\n",
" Processing row 200000 of 753817\n",
" Processing row 250000 of 753817\n",
" Processing row 300000 of 753817\n",
" Processing row 350000 of 753817\n",
" Processing row 400000 of 753817\n",
" Processing row 450000 of 753817\n",
" Processing row 500000 of 753817\n",
" Processing row 550000 of 753817\n",
" Processing row 600000 of 753817\n",
" Processing row 650000 of 753817\n",
" Processing row 700000 of 753817\n",
" Processing row 750000 of 753817\n",
"Processing ./data/od_part-m-001.pkl\n",
" Processing row 0 of 573391\n",
" Processing row 50000 of 573391\n",
" Processing row 100000 of 573391\n",
" Processing row 150000 of 573391\n",
" Processing row 200000 of 573391\n",
" Processing row 250000 of 573391\n",
" Processing row 300000 of 573391\n",
" Processing row 350000 of 573391\n",
" Processing row 400000 of 573391\n",
" Processing row 450000 of 573391\n",
" Processing row 500000 of 573391\n",
" Processing row 550000 of 573391\n",
"Processing ./data/od_part-m-002.pkl\n",
" Processing row 0 of 505989\n",
" Processing row 50000 of 505989\n",
" Processing row 100000 of 505989\n",
" Processing row 150000 of 505989\n",
" Processing row 200000 of 505989\n",
" Processing row 250000 of 505989\n",
" Processing row 300000 of 505989\n",
" Processing row 350000 of 505989\n",
" Processing row 400000 of 505989\n",
" Processing row 450000 of 505989\n",
" Processing row 500000 of 505989\n",
"Processing ./data/od_part-m-003.pkl\n",
" Processing row 0 of 476765\n",
" Processing row 50000 of 476765\n",
" Processing row 100000 of 476765\n",
" Processing row 150000 of 476765\n",
" Processing row 200000 of 476765\n",
" Processing row 250000 of 476765\n",
" Processing row 300000 of 476765\n",
" Processing row 350000 of 476765\n",
" Processing row 400000 of 476765\n",
" Processing row 450000 of 476765\n",
"Processing ./data/od_part-m-004.pkl\n",
" Processing row 0 of 474205\n",
" Processing row 50000 of 474205\n",
" Processing row 100000 of 474205\n",
" Processing row 150000 of 474205\n",
" Processing row 200000 of 474205\n",
" Processing row 250000 of 474205\n",
" Processing row 300000 of 474205\n",
" Processing row 350000 of 474205\n",
" Processing row 400000 of 474205\n",
" Processing row 450000 of 474205\n",
"Processing ./data/od_part-m-005.pkl\n",
" Processing row 0 of 466073\n",
" Processing row 50000 of 466073\n",
" Processing row 100000 of 466073\n",
" Processing row 150000 of 466073\n",
" Processing row 200000 of 466073\n",
" Processing row 250000 of 466073\n",
" Processing row 300000 of 466073\n",
" Processing row 350000 of 466073\n",
" Processing row 400000 of 466073\n",
" Processing row 450000 of 466073\n",
"Processing ./data/od_part-m-006.pkl\n",
" Processing row 0 of 460057\n",
" Processing row 50000 of 460057\n",
" Processing row 100000 of 460057\n",
" Processing row 150000 of 460057\n",
" Processing row 200000 of 460057\n",
" Processing row 250000 of 460057\n",
" Processing row 300000 of 460057\n",
" Processing row 350000 of 460057\n",
" Processing row 400000 of 460057\n",
" Processing row 450000 of 460057\n",
"Processing ./data/od_part-m-007.pkl\n",
" Processing row 0 of 450821\n",
" Processing row 50000 of 450821\n",
" Processing row 100000 of 450821\n",
" Processing row 150000 of 450821\n",
" Processing row 200000 of 450821\n",
" Processing row 250000 of 450821\n",
" Processing row 300000 of 450821\n",
" Processing row 350000 of 450821\n",
" Processing row 400000 of 450821\n",
" Processing row 450000 of 450821\n",
"Processing ./data/od_part-m-008.pkl\n",
" Processing row 0 of 450109\n",
" Processing row 50000 of 450109\n",
" Processing row 100000 of 450109\n",
" Processing row 150000 of 450109\n",
" Processing row 200000 of 450109\n",
" Processing row 250000 of 450109\n",
" Processing row 300000 of 450109\n",
" Processing row 350000 of 450109\n",
" Processing row 400000 of 450109\n",
" Processing row 450000 of 450109\n",
"Processing ./data/od_part-m-009.pkl\n",
" Processing row 0 of 445630\n",
" Processing row 50000 of 445630\n",
" Processing row 100000 of 445630\n",
" Processing row 150000 of 445630\n",
" Processing row 200000 of 445630\n",
" Processing row 250000 of 445630\n",
" Processing row 300000 of 445630\n",
" Processing row 350000 of 445630\n",
" Processing row 400000 of 445630\n",
"Processing ./data/od_part-m-010.pkl\n",
" Processing row 0 of 433209\n",
" Processing row 50000 of 433209\n",
" Processing row 100000 of 433209\n",
" Processing row 150000 of 433209\n",
" Processing row 200000 of 433209\n",
" Processing row 250000 of 433209\n",
" Processing row 300000 of 433209\n",
" Processing row 350000 of 433209\n",
" Processing row 400000 of 433209\n",
"Processing ./data/od_part-m-011.pkl\n",
" Processing row 0 of 428812\n",
" Processing row 50000 of 428812\n",
" Processing row 100000 of 428812\n",
" Processing row 150000 of 428812\n",
" Processing row 200000 of 428812\n",
" Processing row 250000 of 428812\n",
" Processing row 300000 of 428812\n",
" Processing row 350000 of 428812\n",
" Processing row 400000 of 428812\n"
]
}
],
"source": [
"demand = np.zeros((NN, 492))\n",
"\n",
"for od_fn, pick_rid_fn in list(zip(od_fns, od_pick_rid_fns)):\n",
" print(f\"Processing {od_fn}\")\n",
" oddf = pd.read_pickle(od_fn)\n",
" pick_rids = read_rid_csv(pick_rid_fn)\n",
" \n",
" assert len(oddf) == len(pick_rids), \"OD DF and PICK RIDs don't match! What gives?\"\n",
" \n",
" demand += get_demand_from_files(od_fn, pick_rid_fn, NN, 492)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"np.save(\"./data/demand\", demand)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(demand).astype(int).to_csv(\"./data/demand.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}