grid_search.py

from pandas import read_csv
from utils import get_data, get_model_spec
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
"""
Best accuracy: 0.7871077184054284
Min Child Weight: 1
Gamma:  0.001
Subsample 1.0
Colsample_bytree: 0.5
Max depth: 40
Eta 0.35
"""
def get_model_spec(model,X_train,X_test,Y_train,Y_test):
    best_model = model()
    best_model.fit(X_train,Y_train)
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(Y_test, y_pred)
    return accuracy

min_child_weight = [0.002]
gamma = [0.001]
l = [1,5,10,20,30]
eta = [0.30] #0.35
subsample = [1]
colsample_bytree = [0.01]
max_depth = [35] #35

X_train, X_test, Y_train, Y_test = get_data(1, 1, 8)


best_accuracy = 0
for chosen_min_child_weight in min_child_weight:
    for chosen_gamma in gamma:
        for chosen_subsample in subsample:
            for chosen_colsample_bytree in colsample_bytree:
                for chosen_max_depth in max_depth:
                    for chosen_eta in eta:
                        for  chosen_lambda in l:
                            model = lambda: XGBClassifier(
                                min_child_weight=chosen_min_child_weight,
                                gamma=chosen_gamma,
                                subsample=chosen_subsample,
                                colsample_bytree=chosen_colsample_bytree,
                                max_depth=chosen_max_depth,
                                eta=chosen_eta,
                                reg_lambda=chosen_lambda)
                            accuracy = get_model_spec(model,X_train,X_test,Y_train,Y_test)
                            if(accuracy > best_accuracy):
                                best_accuracy = accuracy
                                chosen_min_child_weight
                                print("Best accuracy:", accuracy)
                                print("Min Child Weight:" ,chosen_min_child_weight)
                                print("Gamma: ", chosen_gamma)
                                print("Subsample", chosen_subsample)
                                print("Colsample_bytree:", chosen_colsample_bytree)
                                print("Max depth:", chosen_max_depth)
                                print("Eta", chosen_eta)
                                print("LAmbda: ", chosen_lambda)


"""
X_train, X_test, Y_train, Y_test = get_data(1, 1, 4)
best_accuracy = 0
tol = [0.1,0.15,0.2,0.3,0.35,0.4,0.20]
c = [1]
solver = ['sag']
for chosen_tol in tol:
    for chosen_c in c:
        for chosen_solver in solver:
            model = lambda: LogisticRegression(
                tol=chosen_tol,
                C=chosen_c,
                solver=chosen_solver)
            accuracy = get_model_spec(model,X_train,X_test,Y_train,Y_test)
            if(accuracy > best_accuracy):
                best_accuracy = accuracy
                print("Accuracy: ",accuracy)
                print("Tol: ", chosen_tol)
                print("C:" ,chosen_c)
                print("solver:", chosen_solver)
"""
	from pandas import read_csv
	from utils import get_data, get_model_spec
	from xgboost import XGBClassifier
	from sklearn.metrics import accuracy_score
	from sklearn.linear_model import LogisticRegression
	"""
	Best accuracy: 0.7871077184054284
	Min Child Weight: 1
	Gamma: 0.001
	Subsample 1.0
	Colsample_bytree: 0.5
	Max depth: 40
	Eta 0.35
	"""
	def get_model_spec(model,X_train,X_test,Y_train,Y_test):
	best_model = model()
	best_model.fit(X_train,Y_train)
	y_pred = best_model.predict(X_test)
	accuracy = accuracy_score(Y_test, y_pred)
	return accuracy

	min_child_weight = [0.002]
	gamma = [0.001]
	l = [1,5,10,20,30]
	eta = [0.30] #0.35
	subsample = [1]
	colsample_bytree = [0.01]
	max_depth = [35] #35

	X_train, X_test, Y_train, Y_test = get_data(1, 1, 8)


	best_accuracy = 0
	for chosen_min_child_weight in min_child_weight:
	for chosen_gamma in gamma:
	for chosen_subsample in subsample:
	for chosen_colsample_bytree in colsample_bytree:
	for chosen_max_depth in max_depth:
	for chosen_eta in eta:
	for chosen_lambda in l:
	model = lambda: XGBClassifier(
	min_child_weight=chosen_min_child_weight,
	gamma=chosen_gamma,
	subsample=chosen_subsample,
	colsample_bytree=chosen_colsample_bytree,
	max_depth=chosen_max_depth,
	eta=chosen_eta,
	reg_lambda=chosen_lambda)
	accuracy = get_model_spec(model,X_train,X_test,Y_train,Y_test)
	if(accuracy > best_accuracy):
	best_accuracy = accuracy
	chosen_min_child_weight
	print("Best accuracy:", accuracy)
	print("Min Child Weight:" ,chosen_min_child_weight)
	print("Gamma: ", chosen_gamma)
	print("Subsample", chosen_subsample)
	print("Colsample_bytree:", chosen_colsample_bytree)
	print("Max depth:", chosen_max_depth)
	print("Eta", chosen_eta)
	print("LAmbda: ", chosen_lambda)


	"""
	X_train, X_test, Y_train, Y_test = get_data(1, 1, 4)
	best_accuracy = 0
	tol = [0.1,0.15,0.2,0.3,0.35,0.4,0.20]
	c = [1]
	solver = ['sag']
	for chosen_tol in tol:
	for chosen_c in c:
	for chosen_solver in solver:
	model = lambda: LogisticRegression(
	tol=chosen_tol,
	C=chosen_c,
	solver=chosen_solver)
	accuracy = get_model_spec(model,X_train,X_test,Y_train,Y_test)
	if(accuracy > best_accuracy):
	best_accuracy = accuracy
	print("Accuracy: ",accuracy)
	print("Tol: ", chosen_tol)
	print("C:" ,chosen_c)
	print("solver:", chosen_solver)
	"""