Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
binary-classification-of-tweets-about-protest/grid_search.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
83 lines (77 sloc)
3.07 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas import read_csv | |
from utils import get_data, get_model_spec | |
from xgboost import XGBClassifier | |
from sklearn.metrics import accuracy_score | |
from sklearn.linear_model import LogisticRegression | |
""" | |
Best accuracy: 0.7871077184054284 | |
Min Child Weight: 1 | |
Gamma: 0.001 | |
Subsample 1.0 | |
Colsample_bytree: 0.5 | |
Max depth: 40 | |
Eta 0.35 | |
""" | |
def get_model_spec(model,X_train,X_test,Y_train,Y_test): | |
best_model = model() | |
best_model.fit(X_train,Y_train) | |
y_pred = best_model.predict(X_test) | |
accuracy = accuracy_score(Y_test, y_pred) | |
return accuracy | |
min_child_weight = [0.002] | |
gamma = [0.001] | |
l = [1,5,10,20,30] | |
eta = [0.30] #0.35 | |
subsample = [1] | |
colsample_bytree = [0.01] | |
max_depth = [35] #35 | |
X_train, X_test, Y_train, Y_test = get_data(1, 1, 8) | |
best_accuracy = 0 | |
for chosen_min_child_weight in min_child_weight: | |
for chosen_gamma in gamma: | |
for chosen_subsample in subsample: | |
for chosen_colsample_bytree in colsample_bytree: | |
for chosen_max_depth in max_depth: | |
for chosen_eta in eta: | |
for chosen_lambda in l: | |
model = lambda: XGBClassifier( | |
min_child_weight=chosen_min_child_weight, | |
gamma=chosen_gamma, | |
subsample=chosen_subsample, | |
colsample_bytree=chosen_colsample_bytree, | |
max_depth=chosen_max_depth, | |
eta=chosen_eta, | |
reg_lambda=chosen_lambda) | |
accuracy = get_model_spec(model,X_train,X_test,Y_train,Y_test) | |
if(accuracy > best_accuracy): | |
best_accuracy = accuracy | |
chosen_min_child_weight | |
print("Best accuracy:", accuracy) | |
print("Min Child Weight:" ,chosen_min_child_weight) | |
print("Gamma: ", chosen_gamma) | |
print("Subsample", chosen_subsample) | |
print("Colsample_bytree:", chosen_colsample_bytree) | |
print("Max depth:", chosen_max_depth) | |
print("Eta", chosen_eta) | |
print("LAmbda: ", chosen_lambda) | |
""" | |
X_train, X_test, Y_train, Y_test = get_data(1, 1, 4) | |
best_accuracy = 0 | |
tol = [0.1,0.15,0.2,0.3,0.35,0.4,0.20] | |
c = [1] | |
solver = ['sag'] | |
for chosen_tol in tol: | |
for chosen_c in c: | |
for chosen_solver in solver: | |
model = lambda: LogisticRegression( | |
tol=chosen_tol, | |
C=chosen_c, | |
solver=chosen_solver) | |
accuracy = get_model_spec(model,X_train,X_test,Y_train,Y_test) | |
if(accuracy > best_accuracy): | |
best_accuracy = accuracy | |
print("Accuracy: ",accuracy) | |
print("Tol: ", chosen_tol) | |
print("C:" ,chosen_c) | |
print("solver:", chosen_solver) | |
""" |