如何使用贝叶斯优化(在 Python 中)在超参数 space 上调整超参数?
How to tune hyperparameters over a hyperparameter space using Bayesian Optimization (in Python)?
我正在尝试使用贝叶斯优化调整超参数,以使用下面的代码对超参数 space 进行随机森林回归,但我收到一条错误消息
TypeError: init() got an unexpected keyword argument 'min_samples'
我在尝试以下代码时遇到此错误:
# Import packages
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
# Create datasets
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
#Create Hyperparameter space
space= {'n_estimators':hp.choice('n_estimators', range(2, 150, 1)),
'min_samples':hp.choice('min_samples', range(2, 100, 1)),
'max_features':hp.choice('max_features', range(2, 100, 1)),
'max_samples':hp.choice('max_samples', range(2, 100, 1)),
}
#Define Objective Function
def objective(space):
rf = RandomForestRegressor(**space)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
#Surrogate Fn
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print(best)
print(trials.results)
我也尝试使用下面的代码在 objective 函数中列出超参数,但出现以下错误
TypeError: objective() missing 3 required positional arguments: 'min_samples', 'max_features', and 'max_samples'
#Define Objective Function
def objective(n_estimators,min_samples,max_features,max_samples):
rf = RandomForestRegressor(n_estimators, min_samples, max_features, max_samples)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
你能告诉我如何修复我的代码吗?
我能够使用以下代码调整单个超参数:
# Import packages
import numpy as np
import time
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
from collections import OrderedDict
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
space= hp.choice('num_leaves', range(2, 100, 1))
def objective(num_leaves):
rf = RandomForestRegressor(num_leaves)
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print(best)
print(trials.results)
问题是 RandomForestClassifier
中没有名为 min_samples
的参数。参见 here。可能你的意思是 min_samples_leaf
.
只需将 min_sample_leaf
的上限保持在数据集中样本数的范围内即可。
否则你的代码没有其他问题。
import matplotlib.pyplot as plt
# Import packages
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
# Create datasets
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
#Create Hyperparameter space
space= {'n_estimators':hp.choice('n_estimators', range(2, 150, 1)),
'min_samples_leaf':hp.choice('min_samples', range(2, 50, 1)),
'max_features':hp.choice('max_features', range(2, 10, 1)),
'max_samples':hp.choice('max_samples', range(2, 50, 1)),
}
#Define Objective Function
def objective(space):
rf = RandomForestRegressor(**space)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
#Surrogate Fn
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=2,
trials=trials)
print(best)
print(trials.results)
我正在尝试使用贝叶斯优化调整超参数,以使用下面的代码对超参数 space 进行随机森林回归,但我收到一条错误消息
TypeError: init() got an unexpected keyword argument 'min_samples'
我在尝试以下代码时遇到此错误:
# Import packages
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
# Create datasets
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
#Create Hyperparameter space
space= {'n_estimators':hp.choice('n_estimators', range(2, 150, 1)),
'min_samples':hp.choice('min_samples', range(2, 100, 1)),
'max_features':hp.choice('max_features', range(2, 100, 1)),
'max_samples':hp.choice('max_samples', range(2, 100, 1)),
}
#Define Objective Function
def objective(space):
rf = RandomForestRegressor(**space)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
#Surrogate Fn
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print(best)
print(trials.results)
我也尝试使用下面的代码在 objective 函数中列出超参数,但出现以下错误
TypeError: objective() missing 3 required positional arguments: 'min_samples', 'max_features', and 'max_samples'
#Define Objective Function
def objective(n_estimators,min_samples,max_features,max_samples):
rf = RandomForestRegressor(n_estimators, min_samples, max_features, max_samples)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
你能告诉我如何修复我的代码吗?
我能够使用以下代码调整单个超参数:
# Import packages
import numpy as np
import time
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
from collections import OrderedDict
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
space= hp.choice('num_leaves', range(2, 100, 1))
def objective(num_leaves):
rf = RandomForestRegressor(num_leaves)
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print(best)
print(trials.results)
问题是 RandomForestClassifier
中没有名为 min_samples
的参数。参见 here。可能你的意思是 min_samples_leaf
.
只需将 min_sample_leaf
的上限保持在数据集中样本数的范围内即可。
否则你的代码没有其他问题。
import matplotlib.pyplot as plt
# Import packages
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
# Create datasets
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
#Create Hyperparameter space
space= {'n_estimators':hp.choice('n_estimators', range(2, 150, 1)),
'min_samples_leaf':hp.choice('min_samples', range(2, 50, 1)),
'max_features':hp.choice('max_features', range(2, 10, 1)),
'max_samples':hp.choice('max_samples', range(2, 50, 1)),
}
#Define Objective Function
def objective(space):
rf = RandomForestRegressor(**space)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
#Surrogate Fn
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=2,
trials=trials)
print(best)
print(trials.results)