nb_008_quantizing-for-demand.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Quantizing code\n",
    "\n",
    "### Demand\n",
    "\n",
    "Demand is a lot easier to get. Demand is the number of pickups in a given timeslot in a given region. We can process each OD row one by one, incrementing the corresponding cell indexed by (timestep, regionID).\n",
    "\n",
    "\n",
    "### Status\n",
    "\n",
    "We want to get the **status** of cars (occupied, vacant, low-battery, or charging) for a given timeslot for a given region ID. This is going to be a bit difficult to handle! Specific questions...\n",
    "\n",
    " * To clarify: We want to record status per timeslot and per region ID, correct?\n",
    " * Can we just report occupied, and infer vacant? (Otherwise, we'd need to track per-car.)\n",
    " * How do we infer low-battery?\n",
    " * We don't have what we need yet to infer charging.\n",
    " * Should we update region ID per car ID by the `gps` files?\n",
    "\n",
    "\n",
    "### Supply\n",
    "\n",
    "Supply is going to be more difficult to process. I should expect to hear from Sihong with details about this.\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Our codebase so far\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from math import floor, ceil\n",
    "from matplotlib import path\n",
    "\n",
    "# Just some code from our previous files...\n",
    "\n",
    "# List of filenames of the original data\n",
    "original_od_fns = [\"./DataForUConn/201407OD/201407.gz/part-m-{x}\".format(x=str(x).rjust(5,\"0\")) for x in range(12)]\n",
    "original_gps_fns = [\"./DataForUConn/201407GPS/part-r-{x}\".format(x=str(x).rjust(5,\"0\")) for x in range(2)]\n",
    "\n",
    "# Time string to unix time converter\n",
    "formattime = lambda timestr: int(time.mktime(time.strptime(timestr, \"%Y-%m-%dT%H:%M:%S.000Z\")))\n",
    "\n",
    "# Headers\n",
    "od_headers = [\"id\", \"ptime\", \"dtime\", \"plon\", \"plat\", \"dlon\", \"dlat\"]\n",
    "gps_headers = [\"id\", \"color\", \"lon\", \"lat\", \"time\", \"speed\", \"noMeaning\"]\n",
    "\n",
    "# Filenames of our pickles\n",
    "gps_fns = [\"./data/gps_part-r-{x}.pkl\".format(x=str(x).rjust(3, \"0\")) for x in range(2)]\n",
    "od_fns = [\"./data/od_part-m-{x}.pkl\".format(x=str(x).rjust(3, \"0\")) for x in range(12)]\n",
    "\n",
    "# Region IDs corresponding to each row of a given dataframe.\n",
    "# These were processed in scr_005 and scr_006\n",
    "# e.g. od_pick-004.csv row 12345 is an integer RegionID, mapping the lat/lon of of row 12345 of dataframe od_part-m-004.pkl\n",
    "gps_rid_fns = [\"./data/gps_rid-{x}.csv\".format(x=str(x).rjust(3,\"0\")) for x in range(2)]\n",
    "od_pick_rid_fns = [\"./data/od_pick-{x}.csv\".format(x=str(x).rjust(3,\"0\")) for x in range(12)]\n",
    "od_drop_rid_fns = [\"./data/od_drop-{x}.csv\".format(x=str(x).rjust(3,\"0\")) for x in range(12)]\n",
    "\n",
    "# Start, end times\n",
    "start_time = 1404360000\n",
    "end_time = 1405828798\n",
    "\n",
    "def time_to_index(tt, st = 1404360000, divisor=60):\n",
    "    return floor((tt-st)/divisor)\n",
    "\n",
    "NN = ceil((end_time - start_time)/60)\n",
    "\n",
    "# Read the processed CSVs\n",
    "def _safe_to_int(cell):\n",
    "    # some cells are empty, so, return None instead of int\n",
    "    try:\n",
    "        return int(cell)\n",
    "    except ValueError:\n",
    "        return None\n",
    "\n",
    "def read_rid_csv(fn):\n",
    "    with open(fn) as f:\n",
    "        return [[_safe_to_int(cell) for cell in line.split(\",\")] for line in f.read().strip().split(\"\\n\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Our (timestep, regionID) array!\n",
    "demand = np.zeros((NN, 492))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_demand_from_files(od_fn, pick_rid_fn, number_of_timesteps, number_of_regions, verbose=True):\n",
    "    oddf = pd.read_pickle(od_fn)\n",
    "    pick_rids = read_rid_csv(pick_rid_fn)\n",
    "    \n",
    "    # They should have the same amount of rows!\n",
    "    assert len(oddf) == len(pick_rids), \"OD DF and PICK RIDs don't match! What gives?\"\n",
    "    \n",
    "    demand = np.zeros((number_of_timesteps, number_of_regions))\n",
    "    \n",
    "    for row_idx in range(len(oddf)):\n",
    "        if verbose and row_idx % 50000 == 0:\n",
    "            print(f\"  Processing row {row_idx} of {len(oddf)}\")\n",
    "        ts = time_to_index(oddf[\"ptime\"][row_idx])\n",
    "        rid = pick_rids[row_idx][0] # Choose the first region ID in the list.\n",
    "        \n",
    "        if rid is not None:\n",
    "            # skip if rid is None.\n",
    "            demand[ts][rid] += 1\n",
    "    \n",
    "    return demand"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing ./data/od_part-m-000.pkl\n",
      "  Processing row 0 of 753817\n",
      "  Processing row 50000 of 753817\n",
      "  Processing row 100000 of 753817\n",
      "  Processing row 150000 of 753817\n",
      "  Processing row 200000 of 753817\n",
      "  Processing row 250000 of 753817\n",
      "  Processing row 300000 of 753817\n",
      "  Processing row 350000 of 753817\n",
      "  Processing row 400000 of 753817\n",
      "  Processing row 450000 of 753817\n",
      "  Processing row 500000 of 753817\n",
      "  Processing row 550000 of 753817\n",
      "  Processing row 600000 of 753817\n",
      "  Processing row 650000 of 753817\n",
      "  Processing row 700000 of 753817\n",
      "  Processing row 750000 of 753817\n",
      "Processing ./data/od_part-m-001.pkl\n",
      "  Processing row 0 of 573391\n",
      "  Processing row 50000 of 573391\n",
      "  Processing row 100000 of 573391\n",
      "  Processing row 150000 of 573391\n",
      "  Processing row 200000 of 573391\n",
      "  Processing row 250000 of 573391\n",
      "  Processing row 300000 of 573391\n",
      "  Processing row 350000 of 573391\n",
      "  Processing row 400000 of 573391\n",
      "  Processing row 450000 of 573391\n",
      "  Processing row 500000 of 573391\n",
      "  Processing row 550000 of 573391\n",
      "Processing ./data/od_part-m-002.pkl\n",
      "  Processing row 0 of 505989\n",
      "  Processing row 50000 of 505989\n",
      "  Processing row 100000 of 505989\n",
      "  Processing row 150000 of 505989\n",
      "  Processing row 200000 of 505989\n",
      "  Processing row 250000 of 505989\n",
      "  Processing row 300000 of 505989\n",
      "  Processing row 350000 of 505989\n",
      "  Processing row 400000 of 505989\n",
      "  Processing row 450000 of 505989\n",
      "  Processing row 500000 of 505989\n",
      "Processing ./data/od_part-m-003.pkl\n",
      "  Processing row 0 of 476765\n",
      "  Processing row 50000 of 476765\n",
      "  Processing row 100000 of 476765\n",
      "  Processing row 150000 of 476765\n",
      "  Processing row 200000 of 476765\n",
      "  Processing row 250000 of 476765\n",
      "  Processing row 300000 of 476765\n",
      "  Processing row 350000 of 476765\n",
      "  Processing row 400000 of 476765\n",
      "  Processing row 450000 of 476765\n",
      "Processing ./data/od_part-m-004.pkl\n",
      "  Processing row 0 of 474205\n",
      "  Processing row 50000 of 474205\n",
      "  Processing row 100000 of 474205\n",
      "  Processing row 150000 of 474205\n",
      "  Processing row 200000 of 474205\n",
      "  Processing row 250000 of 474205\n",
      "  Processing row 300000 of 474205\n",
      "  Processing row 350000 of 474205\n",
      "  Processing row 400000 of 474205\n",
      "  Processing row 450000 of 474205\n",
      "Processing ./data/od_part-m-005.pkl\n",
      "  Processing row 0 of 466073\n",
      "  Processing row 50000 of 466073\n",
      "  Processing row 100000 of 466073\n",
      "  Processing row 150000 of 466073\n",
      "  Processing row 200000 of 466073\n",
      "  Processing row 250000 of 466073\n",
      "  Processing row 300000 of 466073\n",
      "  Processing row 350000 of 466073\n",
      "  Processing row 400000 of 466073\n",
      "  Processing row 450000 of 466073\n",
      "Processing ./data/od_part-m-006.pkl\n",
      "  Processing row 0 of 460057\n",
      "  Processing row 50000 of 460057\n",
      "  Processing row 100000 of 460057\n",
      "  Processing row 150000 of 460057\n",
      "  Processing row 200000 of 460057\n",
      "  Processing row 250000 of 460057\n",
      "  Processing row 300000 of 460057\n",
      "  Processing row 350000 of 460057\n",
      "  Processing row 400000 of 460057\n",
      "  Processing row 450000 of 460057\n",
      "Processing ./data/od_part-m-007.pkl\n",
      "  Processing row 0 of 450821\n",
      "  Processing row 50000 of 450821\n",
      "  Processing row 100000 of 450821\n",
      "  Processing row 150000 of 450821\n",
      "  Processing row 200000 of 450821\n",
      "  Processing row 250000 of 450821\n",
      "  Processing row 300000 of 450821\n",
      "  Processing row 350000 of 450821\n",
      "  Processing row 400000 of 450821\n",
      "  Processing row 450000 of 450821\n",
      "Processing ./data/od_part-m-008.pkl\n",
      "  Processing row 0 of 450109\n",
      "  Processing row 50000 of 450109\n",
      "  Processing row 100000 of 450109\n",
      "  Processing row 150000 of 450109\n",
      "  Processing row 200000 of 450109\n",
      "  Processing row 250000 of 450109\n",
      "  Processing row 300000 of 450109\n",
      "  Processing row 350000 of 450109\n",
      "  Processing row 400000 of 450109\n",
      "  Processing row 450000 of 450109\n",
      "Processing ./data/od_part-m-009.pkl\n",
      "  Processing row 0 of 445630\n",
      "  Processing row 50000 of 445630\n",
      "  Processing row 100000 of 445630\n",
      "  Processing row 150000 of 445630\n",
      "  Processing row 200000 of 445630\n",
      "  Processing row 250000 of 445630\n",
      "  Processing row 300000 of 445630\n",
      "  Processing row 350000 of 445630\n",
      "  Processing row 400000 of 445630\n",
      "Processing ./data/od_part-m-010.pkl\n",
      "  Processing row 0 of 433209\n",
      "  Processing row 50000 of 433209\n",
      "  Processing row 100000 of 433209\n",
      "  Processing row 150000 of 433209\n",
      "  Processing row 200000 of 433209\n",
      "  Processing row 250000 of 433209\n",
      "  Processing row 300000 of 433209\n",
      "  Processing row 350000 of 433209\n",
      "  Processing row 400000 of 433209\n",
      "Processing ./data/od_part-m-011.pkl\n",
      "  Processing row 0 of 428812\n",
      "  Processing row 50000 of 428812\n",
      "  Processing row 100000 of 428812\n",
      "  Processing row 150000 of 428812\n",
      "  Processing row 200000 of 428812\n",
      "  Processing row 250000 of 428812\n",
      "  Processing row 300000 of 428812\n",
      "  Processing row 350000 of 428812\n",
      "  Processing row 400000 of 428812\n"
     ]
    }
   ],
   "source": [
    "demand = np.zeros((NN, 492))\n",
    "\n",
    "for od_fn, pick_rid_fn in list(zip(od_fns, od_pick_rid_fns)):\n",
    "    print(f\"Processing {od_fn}\")\n",
    "    oddf = pd.read_pickle(od_fn)\n",
    "    pick_rids = read_rid_csv(pick_rid_fn)\n",
    "    \n",
    "    assert len(oddf) == len(pick_rids), \"OD DF and PICK RIDs don't match! What gives?\"\n",
    "    \n",
    "    demand += get_demand_from_files(od_fn, pick_rid_fn, NN, 492)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save(\"./data/demand\", demand)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(demand).astype(int).to_csv(\"./data/demand.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}