GridSearchCV 没有属性 best_estimator_
GridSearchCV has no attribute best_estimator_
我在 20 分钟前工作时一直收到有关该属性的错误。我不确定会出现什么问题,当我在单独的笔记本上设置代码时,它 运行 和 GridSearchCV 顺利移动。我需要更新 Scikit-Learn 吗?我发布了整个代码,因为我相信这是必不可少的,以防遗漏一些细节。感谢任何帮助。
import pandas as pd
train_data = pd.read_csv("~/Desktop/Personal/Data/train.csv")
test_features = pd.read_csv("~/Desktop/Personal/Data/test.csv")
test_survived = pd.read_csv("~/Desktop/Personal/Data/gender_submission.csv")
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
def data_process(data):
data = data.drop("Cabin", 1)
data = data.drop("Embarked", 1)
data = data.drop("Ticket",1)
data = data.drop("Name", 1)
data = data.drop("PassengerId", 1)
data["Sex"] = LabelEncoder().fit_transform(data["Sex"])
numerical_attr = ["Age", "Pclass", "SibSp", "Parch", "Fare"]
for attr in numerical_attr:
data[attr].fillna(round(data[attr].mean(), 0), inplace=True)
return data
train_data = data_process(train_data)
test_features = data_process(test_features).to_numpy()
test_survived = test_survived.drop("PassengerId", 1).to_numpy()
full_train_features = train_data.drop("Survived", 1).to_numpy()
full_train_survived = train_data.drop(["Age", "Pclass", "SibSp", "Parch", "Fare", "Sex"], 1).to_numpy().ravel()
train_set,test_set = train_test_split(train_data, test_size = 0.3, random_state = 1)
part_train_set_features = train_set.drop("Survived", 1).to_numpy()
part_train_set_survived = train_set.drop(["Age", "Pclass", "SibSp", "Parch", "Fare", "Sex"], 1).to_numpy().ravel()
val_set_features = test_set.drop("Survived", 1).to_numpy()
val_set_survived = test_set.drop(["Age", "Pclass", "SibSp", "Parch", "Fare", "Sex"], 1).to_numpy().ravel()
log_reg = LogisticRegression(solver = 'liblinear')
log_reg.fit(part_train_set_features, part_train_set_survived)
predict_log_reg_base = log_reg.predict(val_set_features)
accuracy_log_reg_base = accuracy_score(predict_log_reg_base, val_set_survived)
print(accuracy_log_reg_base)
fixed_range1 = range(1,21)
c_values = [i/10 for i in fixed_range1]
fixed_range2 = range(10,21)
max_iter_values = [i*10 for i in fixed_range2]
parameters_log_reg = {'C' : c_values, 'penalty' : ['l1', 'l2'], 'max_iter' : max_iter_values}
log_reg_best = GridSearchCV(LogisticRegression(solver = 'liblinear'), parameters_log_reg, return_train_score = True)
final_log_reg = log_reg_best.best_estimator_
您需要先安装它:
# define
log_reg_best = GridSearchCV(LogisticRegression(solver = 'liblinear'), parameters_log_reg, return_train_score = True)
# fit
log_reg_best.fit(part_train_set_features, part_train_set_survived)
# get best estimator
final_log_reg = log_reg_best.best_estimator_
我在 20 分钟前工作时一直收到有关该属性的错误。我不确定会出现什么问题,当我在单独的笔记本上设置代码时,它 运行 和 GridSearchCV 顺利移动。我需要更新 Scikit-Learn 吗?我发布了整个代码,因为我相信这是必不可少的,以防遗漏一些细节。感谢任何帮助。
import pandas as pd
train_data = pd.read_csv("~/Desktop/Personal/Data/train.csv")
test_features = pd.read_csv("~/Desktop/Personal/Data/test.csv")
test_survived = pd.read_csv("~/Desktop/Personal/Data/gender_submission.csv")
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
def data_process(data):
data = data.drop("Cabin", 1)
data = data.drop("Embarked", 1)
data = data.drop("Ticket",1)
data = data.drop("Name", 1)
data = data.drop("PassengerId", 1)
data["Sex"] = LabelEncoder().fit_transform(data["Sex"])
numerical_attr = ["Age", "Pclass", "SibSp", "Parch", "Fare"]
for attr in numerical_attr:
data[attr].fillna(round(data[attr].mean(), 0), inplace=True)
return data
train_data = data_process(train_data)
test_features = data_process(test_features).to_numpy()
test_survived = test_survived.drop("PassengerId", 1).to_numpy()
full_train_features = train_data.drop("Survived", 1).to_numpy()
full_train_survived = train_data.drop(["Age", "Pclass", "SibSp", "Parch", "Fare", "Sex"], 1).to_numpy().ravel()
train_set,test_set = train_test_split(train_data, test_size = 0.3, random_state = 1)
part_train_set_features = train_set.drop("Survived", 1).to_numpy()
part_train_set_survived = train_set.drop(["Age", "Pclass", "SibSp", "Parch", "Fare", "Sex"], 1).to_numpy().ravel()
val_set_features = test_set.drop("Survived", 1).to_numpy()
val_set_survived = test_set.drop(["Age", "Pclass", "SibSp", "Parch", "Fare", "Sex"], 1).to_numpy().ravel()
log_reg = LogisticRegression(solver = 'liblinear')
log_reg.fit(part_train_set_features, part_train_set_survived)
predict_log_reg_base = log_reg.predict(val_set_features)
accuracy_log_reg_base = accuracy_score(predict_log_reg_base, val_set_survived)
print(accuracy_log_reg_base)
fixed_range1 = range(1,21)
c_values = [i/10 for i in fixed_range1]
fixed_range2 = range(10,21)
max_iter_values = [i*10 for i in fixed_range2]
parameters_log_reg = {'C' : c_values, 'penalty' : ['l1', 'l2'], 'max_iter' : max_iter_values}
log_reg_best = GridSearchCV(LogisticRegression(solver = 'liblinear'), parameters_log_reg, return_train_score = True)
final_log_reg = log_reg_best.best_estimator_
您需要先安装它:
# define
log_reg_best = GridSearchCV(LogisticRegression(solver = 'liblinear'), parameters_log_reg, return_train_score = True)
# fit
log_reg_best.fit(part_train_set_features, part_train_set_survived)
# get best estimator
final_log_reg = log_reg_best.best_estimator_