Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9a3d1d4d-54e0-4725-933b-8337450f2df3",
"metadata": {
"id": "9a3d1d4d-54e0-4725-933b-8337450f2df3"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import torch.nn as nn\n",
"import torch\n",
"from datetime import datetime, timedelta\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "cc2e45cc-2981-4ff4-bbdd-b640b3bcbc37",
"metadata": {
"id": "cc2e45cc-2981-4ff4-bbdd-b640b3bcbc37"
},
"source": [
"## data processing"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c421468f-c578-4583-8747-a30b5e2e8bf6",
"metadata": {
"id": "c421468f-c578-4583-8747-a30b5e2e8bf6"
},
"outputs": [],
"source": [
"!pwd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b298662f-dadb-451e-9ee7-8c6f2989d3b9",
"metadata": {
"id": "b298662f-dadb-451e-9ee7-8c6f2989d3b9"
},
"outputs": [],
"source": [
"df = pd.read_csv(\"./data/scooter/E-Scooter_Trips_-_2020.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19665919-c190-4d8b-af25-b50998df6e20",
"metadata": {
"id": "19665919-c190-4d8b-af25-b50998df6e20"
},
"outputs": [],
"source": [
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f056dce-a831-45de-8248-7e02fd953a7d",
"metadata": {
"id": "3f056dce-a831-45de-8248-7e02fd953a7d"
},
"outputs": [],
"source": [
"df = df.dropna()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e90ac011-2b64-4d37-9ac8-aa1385fd3122",
"metadata": {
"id": "e90ac011-2b64-4d37-9ac8-aa1385fd3122"
},
"outputs": [],
"source": [
"df = df.astype({\"Start Community Area Number\": int, \"End Community Area Number\": int})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f97344e1-77e2-4319-8ee1-183e735702da",
"metadata": {
"id": "f97344e1-77e2-4319-8ee1-183e735702da"
},
"outputs": [],
"source": [
"communities = set(df[\"Start Community Area Number\"].unique() + df[\"End Community Area Number\"].unique())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c403e385-1f00-4dbc-a867-25d2e11b07d5",
"metadata": {
"id": "c403e385-1f00-4dbc-a867-25d2e11b07d5"
},
"outputs": [],
"source": [
"len(communities)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f38f6c51-859e-4a55-b6a5-20e68b18252a",
"metadata": {
"id": "f38f6c51-859e-4a55-b6a5-20e68b18252a"
},
"outputs": [],
"source": [
"df[\"Start Time\"] = pd.to_datetime(df[\"Start Time\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0b94d62-a159-4b8c-98eb-a11b4ff4adcc",
"metadata": {
"id": "b0b94d62-a159-4b8c-98eb-a11b4ff4adcc"
},
"outputs": [],
"source": [
"df[\"End Time\"] = pd.to_datetime(df[\"End Time\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f960381b-e875-4d8e-bcf8-60f786128e28",
"metadata": {
"id": "f960381b-e875-4d8e-bcf8-60f786128e28",
"scrolled": true
},
"outputs": [],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dde242e4-6720-40b4-9ddc-6f54d93cbb20",
"metadata": {
"id": "dde242e4-6720-40b4-9ddc-6f54d93cbb20"
},
"outputs": [],
"source": [
"df[\"Start Time\"].min(), df[\"Start Time\"].max(), df[\"End Time\"].min(), df[\"End Time\"].max()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e11d210a-d174-4529-925a-29ddc86bb9d6",
"metadata": {
"id": "e11d210a-d174-4529-925a-29ddc86bb9d6"
},
"outputs": [],
"source": [
"community_map = {key: value for value, key in enumerate(communities)}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f782954d-48ec-4c09-a3cb-2b2927cc0c98",
"metadata": {
"id": "f782954d-48ec-4c09-a3cb-2b2927cc0c98"
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"start = datetime(year = 2020, month = 8, day = 12, hour = 0, minute = 0, second = 0)\n",
"end = datetime(year = 2020, month = 12, day = 12, hour = 0, minute = 0, second = 0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d07ede9b-7a8d-4a1e-9eaf-432d2478f22b",
"metadata": {
"id": "d07ede9b-7a8d-4a1e-9eaf-432d2478f22b"
},
"outputs": [],
"source": [
"start, end"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a9b13f8-2a1f-4eef-8503-993a753e5606",
"metadata": {
"id": "1a9b13f8-2a1f-4eef-8503-993a753e5606"
},
"outputs": [],
"source": [
"(end - start).total_seconds()/(60 * 60)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3dedc91-bf17-4f7a-81c2-a3d3648a4d72",
"metadata": {
"id": "f3dedc91-bf17-4f7a-81c2-a3d3648a4d72"
},
"outputs": [],
"source": [
"2928 + 24"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ad49821-b152-4227-81fb-40b2687d71c8",
"metadata": {
"id": "8ad49821-b152-4227-81fb-40b2687d71c8"
},
"outputs": [],
"source": [
"df[\"start_midx\"] = df[\"Start Community Area Number\"].map(community_map)\n",
"df[\"end_midx\"] = df[\"End Community Area Number\"].map(community_map)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "93d4c5cb-a009-49ab-9631-0b90dd06dad0",
"metadata": {
"id": "93d4c5cb-a009-49ab-9631-0b90dd06dad0"
},
"outputs": [],
"source": [
"start_trips = pd.DataFrame(df.groupby([\"Start Time\", \"start_midx\"])[\"Trip ID\"].count())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b543c975-204b-4c2c-acd8-75bab929da80",
"metadata": {
"id": "b543c975-204b-4c2c-acd8-75bab929da80"
},
"outputs": [],
"source": [
"end_trips = pd.DataFrame(df.groupby([\"End Time\", \"end_midx\"])[\"Trip ID\"].count())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f9651eea-5181-4ae5-a342-6e622076ae02",
"metadata": {
"id": "f9651eea-5181-4ae5-a342-6e622076ae02"
},
"outputs": [],
"source": [
"trips.loc[(y[0], 15)][0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe68a2f9-034d-4aea-9fa8-ac147f88a1c8",
"metadata": {
"id": "fe68a2f9-034d-4aea-9fa8-ac147f88a1c8"
},
"outputs": [],
"source": [
"in_map = np.zeros(64)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e118c0a4-60e4-413a-8d12-39ca32f646aa",
"metadata": {
"id": "e118c0a4-60e4-413a-8d12-39ca32f646aa"
},
"outputs": [],
"source": [
"in_map.reshape(8,8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "175845f6-2893-49f6-a7a8-4f24f17db903",
"metadata": {
"id": "175845f6-2893-49f6-a7a8-4f24f17db903"
},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"import numpy as np\n",
"\n",
"timeslots = []\n",
"time_clips = []\n",
"\n",
"for i in range(2952):\n",
" time = start + timedelta(hours=i)\n",
" timeslots.append(time)\n",
" check_in_map = np.zeros(64)\n",
" check_out_map = np.zeros(64)\n",
" for j in range(64):\n",
" try:\n",
" check_in_map[j] = start_trips.loc[(time, j)][0]\n",
" except:\n",
" continue\n",
"\n",
" try:\n",
" check_out_map[j] = end_trips.loc[(time, j)][0]\n",
" except:\n",
" continue\n",
"\n",
" check_in_map = check_in_map.reshape(8, 8)\n",
" check_out_map = check_out_map.reshape(8, 8)\n",
"\n",
" time_clips.append((check_in_map, check_out_map))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fac14eaa-6b8a-404c-a3a7-6d2342836646",
"metadata": {
"id": "fac14eaa-6b8a-404c-a3a7-6d2342836646"
},
"outputs": [],
"source": [
"timeslots[-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cc1205f-8246-425e-9435-0b9011cca921",
"metadata": {
"id": "1cc1205f-8246-425e-9435-0b9011cca921"
},
"outputs": [],
"source": [
"data = np.array(time_clips)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35867474-d779-4eaa-b0ad-864bb2a41e65",
"metadata": {
"id": "35867474-d779-4eaa-b0ad-864bb2a41e65"
},
"outputs": [],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74442b4c-ed9a-4baa-8bb6-6b2527ff08fc",
"metadata": {
"id": "74442b4c-ed9a-4baa-8bb6-6b2527ff08fc"
},
"outputs": [],
"source": [
"np.save(\"./scooter_data\", data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "44960cb7-8949-4df2-99a5-d00865e94004",
"metadata": {
"id": "44960cb7-8949-4df2-99a5-d00865e94004"
},
"outputs": [],
"source": [
"data = np.load(\"./scooter_data.npy\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "237ec1ab-7fb7-41b1-b9bf-07808cf2e38d",
"metadata": {
"id": "237ec1ab-7fb7-41b1-b9bf-07808cf2e38d"
},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "py310",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}