from hyperopt import fmin, tpe, hp, Trials
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
# Assuming `model`, `X_train`, `y_train`, and `kf` are defined earlier in your code
# Define the search space for hyperopt
search_space = {
'max_depth': hp.quniform('max_depth', 3, 6, 1),
'min_samples_leaf': hp.uniform('min_samples_leaf', 0.04, 0.08),
'max_features': hp.uniform('max_features', 0.2, 0.8)
}
# Define the objective function to minimize
def objective(params):
model.set_params(**params)
scores = cross_val_score(model, X_train, y_train, cv=kf, scoring=mae_scorer)
# loss = 1 - scores
return np.mean(scores) # loss
# Initialize Trials to store optimization results
trials = Trials()
# Use fmin from hyperopt to perform Bayesian optimization
best_hyperparams = fmin(
fn=objective,
space=search_space,
algo=tpe.suggest,
max_evals=50, # Adjust the number of evaluations as needed
trials=trials,
verbose=1,
rstate=np.random.RandomState(42)
)
# Get the best hyperparameters
best_bayes_hyperparams = {key: best_hyperparams[key] for key in search_space}
# Set the best hyperparameters to the model
model.set_params(**best_bayes_hyperparams)
# Fit the model with the best hyperparameters
model.fit(X_train, y_train)
# Save the best model
joblib.dump(model, 'my_best_bayes_model.pkl')
# Access the results from hyperopt
hyperopt_results = pd.DataFrame({
'Accuracy': [-trial['result']['loss'] for trial in trials.results],
'Parameter': [trial['misc']['vals'] for trial in trials.trials]
})
# Visualize contribution of parameter to get the optimal accuracy (Scatterplot or kdeplot)
plt.scatter(hyperopt_results['Parameter'], hyperopt_results['Accuracy'], s=100, alpha=0.5)
plt.show()