Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from math import floor, ceil\n",
"from matplotlib import path\n",
"from matplotlib import pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Some useful tools/numbers we have of the time\n",
"start_time = 1404360000\n",
"end_time = 1405828798\n",
"\n",
"def time_to_index(tt, st = 1404360000, divisor=60):\n",
" return floor((tt-st)/divisor)\n",
"\n",
"NN = ceil((end_time - start_time)/60)\n",
"\n",
"# Our combined dataframes\n",
"# Index on `id`. For a given DataFrame.loc[id], the values will be sorted by time / pickup time\n",
"\n",
"OD = pd.read_pickle(\"./data/OD_combined.pkl\")\n",
"GPS = pd.read_pickle(\"./data/GPS_combined.pkl\")\n",
"\n",
"def _from_traffic_dataframe_get_dict_of_ids_to_dataframe(DF, time_key=\"time\"):\n",
" IDs = list(set(DF.index))\n",
" \n",
" dict_of_ids_to_dataframe = {}\n",
" \n",
" for key in IDs:\n",
" try:\n",
" dict_of_ids_to_dataframe[key] = DF.loc[key].set_index(keys=time_key)\n",
" except:\n",
" # If it doesn't work, it means the key is only associated with one entry.\n",
" # So, just don't include it.\n",
" pass\n",
" \n",
" return dict_of_ids_to_dataframe\n",
"\n",
"# Get IDs\n",
"OD_IDs = set(OD.index)\n",
"GPS_IDs = set(GPS.index)\n",
"IDs = OD_IDs.intersection(GPS_IDs)\n",
"\n",
"# Dict of ID to dataframe\n",
"OD_ddf = _from_traffic_dataframe_get_dict_of_ids_to_dataframe(OD, time_key=\"ptime\")\n",
"GPS_ddf = _from_traffic_dataframe_get_dict_of_ids_to_dataframe(GPS, time_key=\"time\")\n",
"\n",
"# Warning: Will take about 4.2GB"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13398\n",
"664\n",
"645\n"
]
}
],
"source": [
"print(len(OD_IDs))\n",
"print(len(GPS_IDs))\n",
"print(len(IDs))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7f48000e28d0>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# example plot\n",
"an_id = list(IDs)[123]\n",
"a_gps_df = GPS_ddf[an_id]\n",
"an_od_df = OD_ddf[an_id]\n",
"\n",
"plt.plot(a_gps_df[\"lat\"], a_gps_df[\"lon\"], linewidth=.1, alpha=.5, color=\"black\")\n",
"plt.scatter(an_od_df[\"plat\"], an_od_df[\"plon\"], s=2, color=\"magenta\", alpha=.5)\n",
"plt.scatter(an_od_df[\"dlat\"], an_od_df[\"dlon\"], s=2, color=\"cyan\", alpha=.5)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dtime</th>\n",
" <th>plon</th>\n",
" <th>plat</th>\n",
" <th>dlon</th>\n",
" <th>dlat</th>\n",
" <th>pRID</th>\n",
" <th>dRID</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ptime</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1404361394</th>\n",
" <td>1404361974</td>\n",
" <td>114.107201</td>\n",
" <td>22.612200</td>\n",
" <td>114.119202</td>\n",
" <td>22.554600</td>\n",
" <td>196</td>\n",
" <td>273</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1404368281</th>\n",
" <td>1404368589</td>\n",
" <td>114.138397</td>\n",
" <td>22.545000</td>\n",
" <td>114.125999</td>\n",
" <td>22.549200</td>\n",
" <td>172</td>\n",
" <td>178</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1404385739</th>\n",
" <td>1404386227</td>\n",
" <td>114.132896</td>\n",
" <td>22.579599</td>\n",
" <td>114.118698</td>\n",
" <td>22.562500</td>\n",
" <td>191</td>\n",
" <td>276</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1404393106</th>\n",
" <td>1404393498</td>\n",
" <td>114.040802</td>\n",
" <td>22.531300</td>\n",
" <td>114.024101</td>\n",
" <td>22.539000</td>\n",
" <td>237</td>\n",
" <td>251</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1404393609</th>\n",
" <td>1404394342</td>\n",
" <td>114.025002</td>\n",
" <td>22.538700</td>\n",
" <td>114.079002</td>\n",
" <td>22.540600</td>\n",
" <td>251</td>\n",
" <td>284</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1405731621</th>\n",
" <td>1405732016</td>\n",
" <td>114.115097</td>\n",
" <td>22.580099</td>\n",
" <td>114.110199</td>\n",
" <td>22.598499</td>\n",
" <td>200</td>\n",
" <td>188</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1405735246</th>\n",
" <td>1405735979</td>\n",
" <td>114.081398</td>\n",
" <td>22.556000</td>\n",
" <td>114.047699</td>\n",
" <td>22.535000</td>\n",
" <td>213</td>\n",
" <td>235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1405736910</th>\n",
" <td>1405737634</td>\n",
" <td>114.042297</td>\n",
" <td>22.558599</td>\n",
" <td>114.088501</td>\n",
" <td>22.543200</td>\n",
" <td>292</td>\n",
" <td>283</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1405738016</th>\n",
" <td>1405738598</td>\n",
" <td>114.081497</td>\n",
" <td>22.540199</td>\n",
" <td>114.043602</td>\n",
" <td>22.527599</td>\n",
" <td>283</td>\n",
" <td>237</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1405739091</th>\n",
" <td>1405740116</td>\n",
" <td>114.047997</td>\n",
" <td>22.535000</td>\n",
" <td>114.046204</td>\n",
" <td>22.597200</td>\n",
" <td>235</td>\n",
" <td>340</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>370 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" dtime plon plat dlon dlat pRID \\\n",
"ptime \n",
"1404361394 1404361974 114.107201 22.612200 114.119202 22.554600 196 \n",
"1404368281 1404368589 114.138397 22.545000 114.125999 22.549200 172 \n",
"1404385739 1404386227 114.132896 22.579599 114.118698 22.562500 191 \n",
"1404393106 1404393498 114.040802 22.531300 114.024101 22.539000 237 \n",
"1404393609 1404394342 114.025002 22.538700 114.079002 22.540600 251 \n",
"... ... ... ... ... ... ... \n",
"1405731621 1405732016 114.115097 22.580099 114.110199 22.598499 200 \n",
"1405735246 1405735979 114.081398 22.556000 114.047699 22.535000 213 \n",
"1405736910 1405737634 114.042297 22.558599 114.088501 22.543200 292 \n",
"1405738016 1405738598 114.081497 22.540199 114.043602 22.527599 283 \n",
"1405739091 1405740116 114.047997 22.535000 114.046204 22.597200 235 \n",
"\n",
" dRID \n",
"ptime \n",
"1404361394 273 \n",
"1404368281 178 \n",
"1404385739 276 \n",
"1404393106 251 \n",
"1404393609 284 \n",
"... ... \n",
"1405731621 188 \n",
"1405735246 235 \n",
"1405736910 283 \n",
"1405738016 237 \n",
"1405739091 340 \n",
"\n",
"[370 rows x 7 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ID = list(IDs)[0]\n",
"\n",
"df = OD_ddf[ID]\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def get_demand_from_df(df):\n",
" demand = np.zeros((NN, 492))\n",
" demand.dtype = np.int\n",
" for row_idx in range(len(df)):\n",
" ts = time_to_index(df.index[row_idx])\n",
" rid = df[\"pRID\"].iloc[row_idx]\n",
" demand[ts][rid] += 1\n",
" \n",
" return demand"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" ...,\n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 0, 0],\n",
" [0, 0, 0, ..., 0, 0, 0]])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_demand_from_df(OD_ddf[ID])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"370"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.sum(get_demand_from_df(OD_ddf[ID])) # should equal number of rows. good!"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Operating on id 0 of 645\n",
"Operating on id 100 of 645\n",
"Operating on id 200 of 645\n",
"Operating on id 300 of 645\n",
"Operating on id 400 of 645\n",
"Operating on id 500 of 645\n",
"Operating on id 600 of 645\n"
]
}
],
"source": [
"# Let's redo the demand processing\n",
"demand = np.zeros((NN, 492))\n",
"\n",
"for idx, ID in enumerate(list(IDs)):\n",
" if idx % 100 == 0:\n",
" print(f\"Operating on id {idx} of 645\")\n",
" \n",
" demand += get_demand_from_df(OD_ddf[ID])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"222787.0"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.sum(demand) # let's see what this sums to"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# write to csv\n",
"pd.DataFrame(demand).astype(int).to_csv(\"./data/demand_only-overlapping-ids.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# TODO - REPROCESS DEMAND USING ONLY THESE IDs\n",
"# TODO - TRAFFIC EVENT EXTRACTION"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}