Want to share your content on python-bloggers? click here.
Bayesian optimization(BO) is widely used for Machine Learning hyperparameter tuning. BO relies mainly on a probabilistic model of the objective function (generally a Gaussian process model that approximates the objective function) called the surrogate and improved sequentially, and an acquisition function that allows to select the next point to evaluate in the sequential optimization procedure.
In this post, I will show how to use conformalized surrogates to tune hyperparameters of machine learning models. In this context, instead of using the posterior closed-form distribution of a Gaussian Process, any conformalized surrogate can be used for a probabilistic approximation of the objective function. And since there’s no closed-form expression of the acquisition function (here the Expected Improvement over the current optimum), a monte-carlo approximation of the (expectation) acquisition function is used, based on simulations of the conformalized surrogate. The simulation approach is similar to the one used in this post, except, the sequential ordering doesn’t matter here.
0 – Install and load packages
!pip install nnetsauce
!pip install git+https://github.com/Techtonique/GPopt.git --upgrade --no-cache-dir
import GPopt as gp import nnetsauce as ns import numpy as np from sklearn.datasets import load_breast_cancer from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import ElasticNetCV from sklearn.model_selection import cross_val_score, train_test_split from sklearn import metrics from time import time
1 – Cross-validation and hyperparameter tuning
def ridge2_cv(X_train, y_train, lambda1 = 0.1, lambda2 = 0.1, n_hidden_features=5, n_clusters=5, dropout = 0.8, solver="L-BFGS-B"): estimator = ns.Ridge2Classifier(lambda1 = lambda1, lambda2 = lambda2, n_hidden_features=n_hidden_features, n_clusters=n_clusters, dropout = dropout, solver=solver) return -cross_val_score(estimator, X_train, y_train, scoring='accuracy', cv=5, n_jobs=None, verbose=0).mean() def optimize_ridge2(X_train, y_train, solver="L-BFGS-B", surrogate="rf"): # objective function for hyperparams tuning def crossval_objective(x): return ridge2_cv(X_train=X_train, y_train=y_train, lambda1 = 10**x[0], lambda2 = 10**x[1], n_hidden_features=int(x[2]), n_clusters=int(x[3]), dropout = x[4], solver = solver) if surrogate == "rf": gp_opt = gp.GPOpt(objective_func=crossval_objective, lower_bound = np.array([ -10, -10, 3, 2, 0.6]), upper_bound = np.array([ 10, 10, 100, 5, 1]), surrogate_obj = ns.CustomRegressor(obj=RandomForestRegressor(), # it's a conformalized quasi-randomized network replications=250, # number of simulations for evaluating the expected improvement type_pi="kde"), # Kernel Density Estimation is used for simulation acquisition="ei", # expected improvement by simulation params_names=["lambda1", "lambda2", "n_hidden_features", "n_clusters", "dropout"], n_init=10, n_iter=90, seed=3137) elif surrogate == "enet": gp_opt = gp.GPOpt(objective_func=crossval_objective, lower_bound = np.array([ -10, -10, 3, 2, 0.6]), upper_bound = np.array([ 10, 10, 100, 5, 1]), surrogate_obj = ns.CustomRegressor(obj=ElasticNetCV(), # the model is nonlinear, it's a conformalized quasi-randomized network replications=250, # number of simulations for evaluating the expected improvement type_pi="kde"), # Kernel Density Estimation is used for simulation acquisition="ei", # expected improvement by simulation params_names=["lambda1", "lambda2", "n_hidden_features", "n_clusters", "dropout"], n_init=10, n_iter=90, seed=3137) return gp_opt.optimize(method = "mc", verbose=2, abs_tol=1e-3) # monte carlo computation of expected improvement
dataset = load_breast_cancer() X = dataset.data y = dataset.target # split data into training test and test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3137) # hyperparams tuning, surrogate = conformalized Random Forest res_opt1 = optimize_ridge2(X_train, y_train, solver="L-BFGS-B", surrogate = "rf") print(res_opt1) # hyperparams tuning with different starting values for the optimization algorithm, surrogate = conformalized Random Forest res_opt2 = optimize_ridge2(X_train, y_train, solver="L-BFGS-B-lstsq", surrogate = "rf") print(res_opt2) # hyperparams tuning, surrogate = conformalized ElasticNet res_opt3 = optimize_ridge2(X_train, y_train, solver="L-BFGS-B", surrogate = "enet") print(res_opt3) # hyperparams tuning with different starting values for the optimization algorithm, surrogate = conformalized ElasticNet res_opt4 = optimize_ridge2(X_train, y_train, solver="L-BFGS-B-lstsq", surrogate = "enet") print(res_opt4)
res_opt1.best_params["lambda1"] = 10**(res_opt1.best_params["lambda1"]) res_opt1.best_params["lambda2"] = 10**(res_opt1.best_params["lambda2"]) res_opt1.best_params["n_hidden_features"] = int(res_opt1.best_params["n_hidden_features"]) res_opt1.best_params["n_clusters"] = int(res_opt1.best_params["n_clusters"]) print(res_opt1.best_params) res_opt2.best_params["lambda1"] = 10**(res_opt2.best_params["lambda1"]) res_opt2.best_params["lambda2"] = 10**(res_opt2.best_params["lambda2"]) res_opt2.best_params["n_hidden_features"] = int(res_opt2.best_params["n_hidden_features"]) res_opt2.best_params["n_clusters"] = int(res_opt2.best_params["n_clusters"]) print(res_opt2.best_params) res_opt3.best_params["lambda1"] = 10**(res_opt3.best_params["lambda1"]) res_opt3.best_params["lambda2"] = 10**(res_opt3.best_params["lambda2"]) res_opt3.best_params["n_hidden_features"] = int(res_opt3.best_params["n_hidden_features"]) res_opt3.best_params["n_clusters"] = int(res_opt3.best_params["n_clusters"]) print(res_opt3.best_params) res_opt4.best_params["lambda1"] = 10**(res_opt4.best_params["lambda1"]) res_opt4.best_params["lambda2"] = 10**(res_opt4.best_params["lambda2"]) res_opt4.best_params["n_hidden_features"] = int(res_opt4.best_params["n_hidden_features"]) res_opt4.best_params["n_clusters"] = int(res_opt4.best_params["n_clusters"]) print(res_opt4.best_params)
{'lambda1': 0.2143160456513889, 'lambda2': 99.32768474363539, 'n_hidden_features': 3, 'n_clusters': 4, 'dropout': 0.80830078125} {'lambda1': 1.19372502075462e-10, 'lambda2': 0.0003873778332245682, 'n_hidden_features': 5, 'n_clusters': 3, 'dropout': 0.8306396484375} {'lambda1': 0.03853145684685379, 'lambda2': 0.0020254361391973223, 'n_hidden_features': 91, 'n_clusters': 4, 'dropout': 0.75242919921875} {'lambda1': 1.19372502075462e-10, 'lambda2': 0.0003873778332245682, 'n_hidden_features': 5, 'n_clusters': 3, 'dropout': 0.8306396484375}
2 – Out-of-sample scores
from time import time clf1 = ns.Ridge2Classifier(**res_opt1.best_params, solver="L-BFGS-B") start = time() clf1.fit(X_train, y_train) print(f"Elapsed: {time()-start}") print(f"Test set accuracy: {clf1.score(X_test, y_test)}") clf2 = ns.Ridge2Classifier(**res_opt2.best_params, solver="L-BFGS-B-lstsq") start = time() clf2.fit(X_train, y_train) print(f"Elapsed: {time()-start}") print(f"Test set accuracy: {clf2.score(X_test, y_test)}") clf3 = ns.Ridge2Classifier(**res_opt3.best_params, solver="L-BFGS-B") start = time() clf3.fit(X_train, y_train) print(f"Elapsed: {time()-start}") print(f"Test set accuracy: {clf3.score(X_test, y_test)}") clf4 = ns.Ridge2Classifier(**res_opt4.best_params, solver="L-BFGS-B-lstsq") start = time() clf4.fit(X_train, y_train) print(f"Elapsed: {time()-start}") print(f"Test set accuracy: {clf4.score(X_test, y_test)}")
Elapsed: 1.5195319652557373 Test set accuracy: 0.9736842105263158 Elapsed: 1.8859667778015137 Test set accuracy: 0.9736842105263158 Elapsed: 0.5796549320220947 Test set accuracy: 0.9736842105263158 Elapsed: 0.6930491924285889 Test set accuracy: 0.9736842105263158
# confusion matrix import matplotlib.pyplot as plt import numpy as np import seaborn as sns from sklearn.metrics import confusion_matrix y_pred = clf2.predict(X_test) cm = confusion_matrix(y_test, y_pred) fig, ax = plt.subplots(figsize=(10, 8)) ax = sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=np.arange(0, 2), yticklabels=np.arange(0, 2)) ax.set_xlabel('Predicted labels') ax.set_ylabel('True labels') ax.set_title('Confusion Matrix') plt.show()
Want to share your content on python-bloggers? click here.