AttributeError: 'DecisionTreeClassifier' object has no attribute 'precision_score'
AttributeError: 'DecisionTreeClassifier' object has no attribute 'precision_score'
我最近才开始学习数据科学。这是我写的:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score
import numpy as np
#reading data
df = pd.read_csv('titanic.csv')
df['male'] = df['Sex'] == 'male'
X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
y = df['Survived'].values
#spliting data into train/test
kf = KFold(n_splits=4+1, shuffle=True, random_state=10)
tree_scores = {'accuracy_scores':[],'precision_scores':[],'recall_scores':[]}
logistic_scores = {'accuracy_scores':[],'precision_scores':[],'recall_scores':[]}
#making the models
for train_indexes, test_indexes in kf.split(X):
X_train, X_test = X[train_indexes], X[test_indexes]
y_train, y_test = y[train_indexes], y[test_indexes]
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
tree_scores['accuracy_scores'].append(tree.score(X_test,y_test))
tree_prediction = tree.predict(X_test)
#tree_scores['precision_scores'].append(tree.precision_score(y_test,tree_prediction))
#tree_scores['recall_scores'].append(tree.recall_score(y_test,tree_prediction))
logistic = LogisticRegression()
logistic.fit(X_train,y_train)
logistic_scores['accuracy_scores'].append(logistic.score(X_test,y_test))
logistic_prediction = logistic.predict(X_test)
logistic_scores['precision_scores'].append(precision_score(y_test,logistic_prediction))
logistic_scores['recall_scores'].append(recall_score(y_test,logistic_prediction))
print("Decision Tree")
print(" accuracy:", np.mean(tree_scores['accuracy_scores']))
print(" precision:", np.mean(tree_scores['precision_scores']))
print(" recall:", np.mean(tree_scores['recall_scores']))
print("Logistic Regression")
print(" accuracy:", np.mean(logistic_scores['accuracy_scores']))
print(" precision:", np.mean(logistic_scores['precision_scores']))
print(" recall:", np.mean(logistic_scores['recall_scores']))
for 循环中注释的两行给出精度 和 回想起来的错误,我不知道为什么。尽管在我 运行 之前,他们的精度和回忆都起作用了。而且我似乎也找不到任何拼写错误。
我想知道不同的 python 语法是否会干扰 sklearn?因为一旦我尝试了这样的组合:
X = df.loc['Plass':'Fare'].values
y = df.Survived.values
它给出了错误,但是当我使用正常的预期方式时它工作正常。
(注意:代码可能缩进错误,第一次使用stackexchange的家伙。)
DecisionTreeClassifier
确实没有这样的方法
您需要更改:
tree_scores['precision_scores'].append(tree.precision_score(y_test,tree_prediction))
tree_scores['recall_scores'].append(tree.recall_score(y_test,tree_prediction))
至:
tree_scores['precision_scores'].append(precision_score(y_test,tree_prediction))
tree_scores['recall_scores'].append(recall_score(y_test,tree_prediction))
你可以走了
我最近才开始学习数据科学。这是我写的:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score
import numpy as np
#reading data
df = pd.read_csv('titanic.csv')
df['male'] = df['Sex'] == 'male'
X = df[['Pclass', 'male', 'Age', 'Siblings/Spouses', 'Parents/Children', 'Fare']].values
y = df['Survived'].values
#spliting data into train/test
kf = KFold(n_splits=4+1, shuffle=True, random_state=10)
tree_scores = {'accuracy_scores':[],'precision_scores':[],'recall_scores':[]}
logistic_scores = {'accuracy_scores':[],'precision_scores':[],'recall_scores':[]}
#making the models
for train_indexes, test_indexes in kf.split(X):
X_train, X_test = X[train_indexes], X[test_indexes]
y_train, y_test = y[train_indexes], y[test_indexes]
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
tree_scores['accuracy_scores'].append(tree.score(X_test,y_test))
tree_prediction = tree.predict(X_test)
#tree_scores['precision_scores'].append(tree.precision_score(y_test,tree_prediction))
#tree_scores['recall_scores'].append(tree.recall_score(y_test,tree_prediction))
logistic = LogisticRegression()
logistic.fit(X_train,y_train)
logistic_scores['accuracy_scores'].append(logistic.score(X_test,y_test))
logistic_prediction = logistic.predict(X_test)
logistic_scores['precision_scores'].append(precision_score(y_test,logistic_prediction))
logistic_scores['recall_scores'].append(recall_score(y_test,logistic_prediction))
print("Decision Tree")
print(" accuracy:", np.mean(tree_scores['accuracy_scores']))
print(" precision:", np.mean(tree_scores['precision_scores']))
print(" recall:", np.mean(tree_scores['recall_scores']))
print("Logistic Regression")
print(" accuracy:", np.mean(logistic_scores['accuracy_scores']))
print(" precision:", np.mean(logistic_scores['precision_scores']))
print(" recall:", np.mean(logistic_scores['recall_scores']))
for 循环中注释的两行给出精度 和 回想起来的错误,我不知道为什么。尽管在我 运行 之前,他们的精度和回忆都起作用了。而且我似乎也找不到任何拼写错误。
我想知道不同的 python 语法是否会干扰 sklearn?因为一旦我尝试了这样的组合:
X = df.loc['Plass':'Fare'].values
y = df.Survived.values
它给出了错误,但是当我使用正常的预期方式时它工作正常。
(注意:代码可能缩进错误,第一次使用stackexchange的家伙。)
DecisionTreeClassifier
确实没有这样的方法
您需要更改:
tree_scores['precision_scores'].append(tree.precision_score(y_test,tree_prediction))
tree_scores['recall_scores'].append(tree.recall_score(y_test,tree_prediction))
至:
tree_scores['precision_scores'].append(precision_score(y_test,tree_prediction))
tree_scores['recall_scores'].append(recall_score(y_test,tree_prediction))
你可以走了