Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np \n",
"\n",
"spatio_data_file = \"./data/SpatialFeatures.csv\"\n",
"raw_spatio_feature = pd.read_csv(spatio_data_file)\n",
"raw_spatio_feature\n",
"bikeLane_matrix = raw_spatio_feature[\"BikeLane_miles\"]\n",
"bikeLane_matrix = bikeLane_matrix.fillna(0).to_numpy()\n",
"bikeLane_matrix = bikeLane_matrix/bikeLane_matrix.max()\n",
"bikeLane_matrix_board = bikeLane_matrix[np.newaxis,:] * np.ones((1248,1))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"pd.to_pickle(raw_spatio_feature,\"./data/raw_feature.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"raw_data = pd.read_pickle(\"./data/raw_feature.pkl\")\n",
"X_train = pd.read_pickle(\"./data/X_train.pkl\")\n",
"y_train = pd.read_pickle(\"./data/y_train.pkl\")\n",
"X_test = pd.read_pickle(\"./data/X_test.pkl\")\n",
"y_test = pd.read_pickle(\"./data/y_test.pkl\")\n",
"\n",
"\n",
"trip_avg_in = y_train.mean(axis = 0)[0]\n",
"trip_avg_out = y_train.mean(axis=0)[1]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x17de86c4190>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt \n",
"plt.imshow(trip_avg_in)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x17de87647d0>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.imshow(trip_avg_out)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Join_Count</th>\n",
" <th>TARGET_FID</th>\n",
" <th>BlockID</th>\n",
" <th>BikeLane_miles</th>\n",
" <th>AADT</th>\n",
" <th>SpeedLimit</th>\n",
" <th>AvgSpeed</th>\n",
" <th>Pop2019</th>\n",
" <th>Pop_Density</th>\n",
" <th>Shape_Length</th>\n",
" <th>Shape_Area</th>\n",
" <th>StationNum</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>32638.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>7840.490288</td>\n",
" <td>3.787844e+06</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>7840.686448</td>\n",
" <td>3.787843e+06</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>0.807055</td>\n",
" <td>21234.741935</td>\n",
" <td>25.0</td>\n",
" <td>15.0</td>\n",
" <td>1049.0</td>\n",
" <td>0.029594</td>\n",
" <td>7840.882970</td>\n",
" <td>3.787843e+06</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2.096221</td>\n",
" <td>5915.125000</td>\n",
" <td>25.0</td>\n",
" <td>19.0</td>\n",
" <td>1816.0</td>\n",
" <td>0.030252</td>\n",
" <td>7841.080477</td>\n",
" <td>3.787844e+06</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>0.984320</td>\n",
" <td>23940.078947</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1892.0</td>\n",
" <td>0.022647</td>\n",
" <td>7841.277327</td>\n",
" <td>3.787843e+06</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123</th>\n",
" <td>3</td>\n",
" <td>124</td>\n",
" <td>124</td>\n",
" <td>1.955707</td>\n",
" <td>9050.487805</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1986.0</td>\n",
" <td>0.046045</td>\n",
" <td>7840.280937</td>\n",
" <td>3.787843e+06</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124</th>\n",
" <td>3</td>\n",
" <td>125</td>\n",
" <td>125</td>\n",
" <td>1.440449</td>\n",
" <td>9121.500000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1198.0</td>\n",
" <td>0.039726</td>\n",
" <td>7840.085072</td>\n",
" <td>3.787843e+06</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>2</td>\n",
" <td>126</td>\n",
" <td>126</td>\n",
" <td>0.985704</td>\n",
" <td>8770.901961</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>555.0</td>\n",
" <td>0.015125</td>\n",
" <td>7839.890518</td>\n",
" <td>3.787843e+06</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>4</td>\n",
" <td>127</td>\n",
" <td>127</td>\n",
" <td>0.228593</td>\n",
" <td>12266.085714</td>\n",
" <td>25.0</td>\n",
" <td>16.0</td>\n",
" <td>814.0</td>\n",
" <td>0.031125</td>\n",
" <td>7839.696621</td>\n",
" <td>3.787844e+06</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>4</td>\n",
" <td>128</td>\n",
" <td>128</td>\n",
" <td>1.619940</td>\n",
" <td>11077.037037</td>\n",
" <td>25.0</td>\n",
" <td>16.0</td>\n",
" <td>1385.0</td>\n",
" <td>0.059671</td>\n",
" <td>7839.501980</td>\n",
" <td>3.787843e+06</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>128 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" Join_Count TARGET_FID BlockID BikeLane_miles AADT \\\n",
"0 0 1 1 NaN 32638.666667 \n",
"1 0 2 2 NaN NaN \n",
"2 3 3 3 0.807055 21234.741935 \n",
"3 3 4 4 2.096221 5915.125000 \n",
"4 3 5 5 0.984320 23940.078947 \n",
".. ... ... ... ... ... \n",
"123 3 124 124 1.955707 9050.487805 \n",
"124 3 125 125 1.440449 9121.500000 \n",
"125 2 126 126 0.985704 8770.901961 \n",
"126 4 127 127 0.228593 12266.085714 \n",
"127 4 128 128 1.619940 11077.037037 \n",
"\n",
" SpeedLimit AvgSpeed Pop2019 Pop_Density Shape_Length Shape_Area \\\n",
"0 NaN NaN 0.0 0.000000 7840.490288 3.787844e+06 \n",
"1 NaN NaN 0.0 0.000000 7840.686448 3.787843e+06 \n",
"2 25.0 15.0 1049.0 0.029594 7840.882970 3.787843e+06 \n",
"3 25.0 19.0 1816.0 0.030252 7841.080477 3.787844e+06 \n",
"4 NaN NaN 1892.0 0.022647 7841.277327 3.787843e+06 \n",
".. ... ... ... ... ... ... \n",
"123 NaN NaN 1986.0 0.046045 7840.280937 3.787843e+06 \n",
"124 NaN NaN 1198.0 0.039726 7840.085072 3.787843e+06 \n",
"125 NaN NaN 555.0 0.015125 7839.890518 3.787843e+06 \n",
"126 25.0 16.0 814.0 0.031125 7839.696621 3.787844e+06 \n",
"127 25.0 16.0 1385.0 0.059671 7839.501980 3.787843e+06 \n",
"\n",
" StationNum \n",
"0 0 \n",
"1 0 \n",
"2 3 \n",
"3 3 \n",
"4 3 \n",
".. ... \n",
"123 3 \n",
"124 3 \n",
"125 2 \n",
"126 4 \n",
"127 4 \n",
"\n",
"[128 rows x 12 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from scipy.stats import pearsonr\n",
"def get_feature_in_2d(df,feature_name):\n",
"\n",
" feature = df[feature_name].fillna(0)\n",
" feature_arranged = np.reshape(feature,(16,8))[::-1,:]\n",
" return feature_arranged\n",
"\n",
"def cal_pearson(feature_matrix,trip_matrix,filter_zero = False,log_text = None):\n",
" f_array = feature_matrix.flatten()\n",
" t_array = trip_matrix.flatten()\n",
" f_array_filter0 = f_array[f_array!=0]\n",
" t_array_filter0 = t_array[f_array!=0]\n",
" if filter_zero:\n",
" correlation, p_value = pearsonr(f_array_filter0, t_array_filter0)\n",
" else:\n",
" correlation, p_value = pearsonr(f_array, t_array) \n",
" if log_text!=None:\n",
" print(log_text)\n",
" print(\"Pearson correlation coefficient:\", correlation)\n",
" print(\"P-value:\", p_value)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"file_path = \"pearson_output_filter0.txt\"\n",
"\n",
"with open(file_path,\"w\") as file:\n",
" original_stdout = sys.stdout\n",
" sys.stdout = file\n",
"\n",
" feature_name_list = raw_data.columns\n",
" feature_name_list\n",
" for feature_name in feature_name_list:\n",
" temp_feature = get_feature_in_2d(raw_data,feature_name)\n",
" cal_pearson(temp_feature,trip_avg_in,log_text=feature_name,filter_zero=True)\n",
" print(\"\")\n",
" sys.stdout = original_stdout"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# feature_bikeLane = raw_data[\"BikeLane_miles\"]\n",
"# feature_bikeLane\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x17dff3d47d0>"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"feature_bikeLane = get_feature_in_2d(raw_data,\"BikeLane_miles\")\n",
"plot_data = get_feature_in_2d(raw_data,\"BikeLane_miles\")\n",
"\n",
"plt.imshow(plot_data)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pearson correlation coefficient: 0.6651100883626\n",
"P-value: 1.1054968365962953e-17\n"
]
}
],
"source": [
"# calculate pearson corralation\n",
"\n",
"correlation, p_value = pearsonr(feature_bikeLane.flatten(), trip_avg_out.flatten())\n",
"\n",
"print(\"Pearson correlation coefficient:\", correlation)\n",
"print(\"P-value:\", p_value)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pearson correlation coefficient: 0.9981870630298622\n",
"P-value: 1.174508784681668e-155\n"
]
}
],
"source": [
"cal_pearson(trip_avg_out,trip_avg_in,filter_zero=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}