-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add PHM08 and an initial notebook to test it. Also add .gitignore
- Loading branch information
RussellBentley
committed
Apr 12, 2024
1 parent
87662c8
commit ed2cd79
Showing
8 changed files
with
131,225 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.ipynb_checkpoints |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "d93bf412-ff4d-4f04-a8af-3d101b2b1e48", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import math\n", | ||
"\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"from matplotlib.colors import ListedColormap\n", | ||
"from matplotlib import ticker\n", | ||
"import matplotlib as mpl\n", | ||
"\n", | ||
"from sklearn.datasets import make_circles, make_classification, make_moons\n", | ||
"from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier\n", | ||
"from sklearn.inspection import DecisionBoundaryDisplay\n", | ||
"from sklearn.tree import DecisionTreeClassifier\n", | ||
"from sklearn.model_selection import train_test_split\n", | ||
"\n", | ||
"from imblearn.ensemble import BalancedRandomForestClassifier" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "fb307961-d858-45e9-80ae-505806ec4063", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"train data raw: (45918, 26)\n", | ||
"test data raw: (45918, 26)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"train_data_raw = np.loadtxt(\"Challenge_Data/train.txt\")\n", | ||
"print(f\"train data raw: {train_data_raw.shape}\")\n", | ||
"\n", | ||
"test_data_raw = np.loadtxt(\"Challenge_Data/test.txt\")\n", | ||
"print(f\"test data raw: {train_data_raw.shape}\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"id": "86dba2b5-498f-4cf0-977c-867fb81406ee", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# We need to do things:\n", | ||
"# - Strip unit number\n", | ||
"# - Generate Labels\n", | ||
"def process_raw_data(data):\n", | ||
" (m, _) = data.shape\n", | ||
" y = []\n", | ||
" for i in range(0, m):\n", | ||
" unit = data[i][0]\n", | ||
" if i < m - 1 and unit != data[i + 1][0]:\n", | ||
" y.append(1)\n", | ||
" elif i == m - 1:\n", | ||
" y.append(1)\n", | ||
" else:\n", | ||
" y.append(0)\n", | ||
"\n", | ||
" return (data[:,1:], y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 22, | ||
"id": "0afb4e8b-0e22-4296-8e7f-5dd2e7019722", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"x train: (45918, 25), y train: 45918, # pos: 218\n", | ||
"x test: (29820, 25), y test: 29820, # pos: 218\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"(x_train, y_train) = process_raw_data(train_data_raw)\n", | ||
"print(f\"x train: {x_train.shape}, y train: {len(y_train)}, # pos: {np.sum(y_train)}\")\n", | ||
"\n", | ||
"(x_test, y_test) = process_raw_data(test_data_raw)\n", | ||
"print(f\"x test: {x_test.shape}, y test: {len(y_test)}, # pos: {np.sum(y_test)}\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 37, | ||
"id": "205561a6-7d65-422e-ba47-79f4a4e4398c", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Compute the Perfomance Metrics\n", | ||
"# - A trained classifier\n", | ||
"# - x, y test data\n", | ||
"# returns labels, \n", | ||
"# 1 -> true positive\n", | ||
"# 2 -> true negative\n", | ||
"# 3 -> false positive\n", | ||
"# 4 -> false negative\n", | ||
"def score_classifier(clf, x, y):\n", | ||
" score = clf.score(x, y)\n", | ||
" (m, _) = x.shape\n", | ||
" p = clf.predict(x)\n", | ||
" true_pos = 0\n", | ||
" true_neg = 0\n", | ||
" false_pos = 0\n", | ||
" false_neg = 0\n", | ||
" labels = []\n", | ||
" for i in range(0, m):\n", | ||
" x_i = x[i]\n", | ||
" y_i = y[i]\n", | ||
" y_h = p[i]\n", | ||
"\n", | ||
" if y_i == y_h and y_h == 1:\n", | ||
" true_pos += 1\n", | ||
" labels.append(1)\n", | ||
" \n", | ||
" if y_i == y_h and y_h == 0:\n", | ||
" true_neg += 1\n", | ||
" labels.append(2)\n", | ||
"\n", | ||
" if y_i != y_h and y_h == 1:\n", | ||
" false_pos += 1\n", | ||
" labels.append(3)\n", | ||
"\n", | ||
" if y_i != y_h and y_h == 0:\n", | ||
" false_neg += 1\n", | ||
" labels.append(4)\n", | ||
" \n", | ||
" # Metrics from Table 2 of paper\n", | ||
" tnr = 0\n", | ||
" if true_neg + false_pos != 0:\n", | ||
" tnr = true_neg / (true_neg + false_pos)\n", | ||
"\n", | ||
" tpr = 0\n", | ||
" if true_pos + false_neg != 0:\n", | ||
" tpr = true_pos / (true_pos + false_neg)\n", | ||
" \n", | ||
" g_mean = math.sqrt(tnr * tpr)\n", | ||
"\n", | ||
" precision = 0\n", | ||
" if true_pos + false_pos != 0:\n", | ||
" precision = true_pos / (true_pos + false_pos)\n", | ||
"\n", | ||
" f_measure = 0\n", | ||
" if precision + tpr != 0:\n", | ||
" f_measure = (2 * precision * tpr) / (precision + tpr)\n", | ||
" \n", | ||
" print(f\"TP: {true_pos}, TN: {true_neg}, FP: {false_pos}, FN: {false_neg}\")\n", | ||
" print(f\"tnr: {tnr}, tpr: {tpr}, g_mean: {g_mean}, precision: {precision}, f_measure: {f_measure}, score: {score}\")\n", | ||
"\n", | ||
" return labels" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 38, | ||
"id": "65e24363-933c-4bf6-bf3d-3a2c524a7dd2", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"random forest\n", | ||
"TP: 0, TN: 29602, FP: 0, FN: 218\n", | ||
"tnr: 1.0, tpr: 0.0, g_mean: 0.0, precision: 0, f_measure: 0, score: 0.9926894701542589\n", | ||
"weighted random forest\n", | ||
"TP: 0, TN: 29602, FP: 0, FN: 218\n", | ||
"tnr: 1.0, tpr: 0.0, g_mean: 0.0, precision: 0, f_measure: 0, score: 0.9926894701542589\n", | ||
"balanced forest\n", | ||
"TP: 5, TN: 29531, FP: 71, FN: 213\n", | ||
"tnr: 0.997601513411256, tpr: 0.022935779816513763, g_mean: 0.15126390400958672, precision: 0.06578947368421052, f_measure: 0.034013605442176874, score: 0.9904761904761905\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"rf_clf = RandomForestClassifier()\n", | ||
"rf_clf.fit(x_train, y_train)\n", | ||
"score = rf_clf.score(x_test, y_test)\n", | ||
"print(\"random forest\")\n", | ||
"rf_labels = score_classifier(rf_clf, x_test, y_test)\n", | ||
"\n", | ||
"clf2 = RandomForestClassifier(class_weight = {0:0.01, 1:0.99},)\n", | ||
"clf2.fit(x_train, y_train)\n", | ||
"score = clf2.score(x_test, y_test)\n", | ||
"print(\"weighted random forest\")\n", | ||
"wf_labels = score_classifier(clf2, x_test, y_test)\n", | ||
"\n", | ||
"bf_clf = BalancedRandomForestClassifier(sampling_strategy='all', replacement=True, bootstrap=False)\n", | ||
"bf_clf.fit(x_train, y_train)\n", | ||
"print(\"balanced forest\")\n", | ||
"bf_labels = score_classifier(bf_clf, x_test, y_test)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 40, | ||
"id": "076b6141-37c7-44b0-898c-12d0651691d3", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"balanced forest\n", | ||
"TP: 8, TN: 29539, FP: 63, FN: 210\n", | ||
"tnr: 0.9978717654212553, tpr: 0.03669724770642202, g_mean: 0.1913613005675611, precision: 0.11267605633802817, f_measure: 0.05536332179930796, score: 0.9908450704225352\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"bf_clf = BalancedRandomForestClassifier(sampling_strategy='all', replacement=True, bootstrap=False, max_features=25, class_weight='balanced_subsample')\n", | ||
"bf_clf.fit(x_train, y_train)\n", | ||
"print(\"balanced forest\")\n", | ||
"bf_labels = score_classifier(bf_clf, x_test, y_test)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "2dbb5d04-32d4-4d78-bdf5-66809cf526c1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Binary file not shown.
Oops, something went wrong.