比较 ML 中不同分类模型的准确性
Comparing Accuracy of different classification models in ML
我正在做一个项目来测试我在分类模型方面的技能,但似乎我犯了一个错误,我不确定如何解决它。以下是我的代码和错误:
必需:实施不同的算法,如决策树、逻辑回归和支持向量机,看看哪个算法的准确性更高。比较每个算法的结果并了解模型的行为。
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn import metrics
def main():
x, y = loadData()
x_train, x_test, y_train, y_test = trainTest(x,y)
logisticAccuracy = logisticRegressionModel(x_train, x_test, y_train, y_test)
dectreeAccuracy = decisionTreeModel(x_train, x_test, y_train, y_test)
svmAccuracy = svmModel(x_train, x_test, y_train, y_test)
printComparisonResults(logisticAccuracy,dectreeAccuracy,svmAccuracy)
def loadData():
df = pd.read_csv('D:\Tutorials\Resources\creditcard.csv')
x = df[['Time','V1','V2','V3','V5','V6','V7','V8','V9','V10','V11','V12',\
'V13','V14','V5','V16','V17','V18','V19','V20','V21','V22','V23',\
'V24','V25','V26','V27', 'V28','Amount']]
y = df['Class']
return x, y
def trainTest(x,y):
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25, random_state=0)
return x_train, x_test, y_train, y_test
# LogisticRegression model
def logisticRegressionModel(x_train, x_test, y_train, y_test):
logistic_reg = LogisticRegression().fit(x_train, y_train)
y_pred = logistic_reg.predict(x_test)
#evaluate accuracy of our model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def decisionTreeModel(x_train, x_test, y_train, y_test):
dec_tree = DecisionTreeClassifier()
dec_tree = dec_tree.fit(x_train, y_train)
y_pred = dec_tree.predict(x_test)
#evaluate model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def svmModel(x_train, x_test, y_train, y_test):
svm_model = svm.SVC(kernel='linear')
svm_model = svm_model.fit(x_train, y_train)
y_pred = svm_model.predict(y_test)
#evaluate model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def printComparisonResults(logisticAccuracy,dectreeAccuracy,svmAccuracy):
print("Printing Results")
lr = "Logistic Regression"
dt = "Decision Tree"
sv = "SVM is Accurate"
if logisticAccuracy > (dectreeAccuracy and svmAccuracy):
print(lr, "is accurate than", dt, "and", sv)
elif dectreeAccuracy > (logisticAccuracy and svmAccuracy):
print(dt, "is accurate than", lr, "and", sv)
else:
print(sv, "is accurate than", lr, "and", dt)
print("Done")
if name == 'main': main()
我遇到的错误:
ValueError: Expected 2D array, got 1D array instead:
array=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
我已将我的数组重塑为 x = np.array(df[[...]].reshape((1,-1))
但仍然出现错误:
raise ValueError("Found input variables with inconsistent numbers of"
ValueError: Found input variables with inconsistent numbers of samples: [1, 2470]
欢迎来到 SO!您需要考虑几项更改。
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn import metrics
import sys
def main():
x, y = loadData()
x_train, x_test, y_train, y_test = trainTest(x,y)
logisticAccuracy = logisticRegressionModel(x_train, x_test, y_train, y_test)
dectreeAccuracy = decisionTreeModel(x_train, x_test, y_train, y_test)
svmAccuracy = svmModel(x_train, x_test, y_train, y_test)
printComparisonResults(logisticAccuracy,dectreeAccuracy,svmAccuracy)
def loadData():
df = pd.read_csv('.\creditcard.csv')
x = df[['Time','V1','V2','V3','V5','V6','V7','V8','V9','V10','V11','V12',\
'V13','V14','V5','V16','V17','V18','V19','V20','V21','V22','V23',\
'V24','V25','V26','V27', 'V28','Amount']].values
y = df['Class'].values.reshape(-1, 1)
return x, y
def trainTest(x,y):
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25, random_state=0, stratify=y)
return x_train, x_test, y_train, y_test
# LogisticRegression model
def logisticRegressionModel(x_train, x_test, y_train, y_test):
logistic_reg = LogisticRegression().fit(x_train, y_train)
y_pred = logistic_reg.predict(x_test)
#evaluate accuracy of our model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def decisionTreeModel(x_train, x_test, y_train, y_test):
dec_tree = DecisionTreeClassifier()
dec_tree = dec_tree.fit(x_train, y_train)
y_pred = dec_tree.predict(x_test)
#evaluate model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def svmModel(x_train, x_test, y_train, y_test):
svm_model = svm.SVC(kernel='linear')
svm_model = svm_model.fit(x_train, y_train)
y_pred = svm_model.predict(x_test)
#evaluate model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def printComparisonResults(logisticAccuracy,dectreeAccuracy,svmAccuracy):
print("Printing Results")
lr = "Logistic Regression"
dt = "Decision Tree"
sv = "SVM is Accurate"
if logisticAccuracy > (dectreeAccuracy and svmAccuracy):
print(lr, "is accurate than", dt, "and", sv)
elif dectreeAccuracy > (logisticAccuracy and svmAccuracy):
print(dt, "is accurate than", lr, "and", sv)
else:
print(sv, "is accurate than", lr, "and", dt)
print("Done")
if __name__ == '__main__':
main()
输出:
Printing Results
Logistic Regression is accurate than Decision Tree and SVM is Accurate
Done
并且您的实施将完美运行 :),我使用了可用的数据 here。
我正在做一个项目来测试我在分类模型方面的技能,但似乎我犯了一个错误,我不确定如何解决它。以下是我的代码和错误:
必需:实施不同的算法,如决策树、逻辑回归和支持向量机,看看哪个算法的准确性更高。比较每个算法的结果并了解模型的行为。
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn import metrics
def main():
x, y = loadData()
x_train, x_test, y_train, y_test = trainTest(x,y)
logisticAccuracy = logisticRegressionModel(x_train, x_test, y_train, y_test)
dectreeAccuracy = decisionTreeModel(x_train, x_test, y_train, y_test)
svmAccuracy = svmModel(x_train, x_test, y_train, y_test)
printComparisonResults(logisticAccuracy,dectreeAccuracy,svmAccuracy)
def loadData():
df = pd.read_csv('D:\Tutorials\Resources\creditcard.csv')
x = df[['Time','V1','V2','V3','V5','V6','V7','V8','V9','V10','V11','V12',\
'V13','V14','V5','V16','V17','V18','V19','V20','V21','V22','V23',\
'V24','V25','V26','V27', 'V28','Amount']]
y = df['Class']
return x, y
def trainTest(x,y):
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25, random_state=0)
return x_train, x_test, y_train, y_test
# LogisticRegression model
def logisticRegressionModel(x_train, x_test, y_train, y_test):
logistic_reg = LogisticRegression().fit(x_train, y_train)
y_pred = logistic_reg.predict(x_test)
#evaluate accuracy of our model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def decisionTreeModel(x_train, x_test, y_train, y_test):
dec_tree = DecisionTreeClassifier()
dec_tree = dec_tree.fit(x_train, y_train)
y_pred = dec_tree.predict(x_test)
#evaluate model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def svmModel(x_train, x_test, y_train, y_test):
svm_model = svm.SVC(kernel='linear')
svm_model = svm_model.fit(x_train, y_train)
y_pred = svm_model.predict(y_test)
#evaluate model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def printComparisonResults(logisticAccuracy,dectreeAccuracy,svmAccuracy):
print("Printing Results")
lr = "Logistic Regression"
dt = "Decision Tree"
sv = "SVM is Accurate"
if logisticAccuracy > (dectreeAccuracy and svmAccuracy):
print(lr, "is accurate than", dt, "and", sv)
elif dectreeAccuracy > (logisticAccuracy and svmAccuracy):
print(dt, "is accurate than", lr, "and", sv)
else:
print(sv, "is accurate than", lr, "and", dt)
print("Done")
if name == 'main': main()
我遇到的错误:
ValueError: Expected 2D array, got 1D array instead:
array=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
我已将我的数组重塑为 x = np.array(df[[...]].reshape((1,-1))
但仍然出现错误:
raise ValueError("Found input variables with inconsistent numbers of"
ValueError: Found input variables with inconsistent numbers of samples: [1, 2470]
欢迎来到 SO!您需要考虑几项更改。
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn import metrics
import sys
def main():
x, y = loadData()
x_train, x_test, y_train, y_test = trainTest(x,y)
logisticAccuracy = logisticRegressionModel(x_train, x_test, y_train, y_test)
dectreeAccuracy = decisionTreeModel(x_train, x_test, y_train, y_test)
svmAccuracy = svmModel(x_train, x_test, y_train, y_test)
printComparisonResults(logisticAccuracy,dectreeAccuracy,svmAccuracy)
def loadData():
df = pd.read_csv('.\creditcard.csv')
x = df[['Time','V1','V2','V3','V5','V6','V7','V8','V9','V10','V11','V12',\
'V13','V14','V5','V16','V17','V18','V19','V20','V21','V22','V23',\
'V24','V25','V26','V27', 'V28','Amount']].values
y = df['Class'].values.reshape(-1, 1)
return x, y
def trainTest(x,y):
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25, random_state=0, stratify=y)
return x_train, x_test, y_train, y_test
# LogisticRegression model
def logisticRegressionModel(x_train, x_test, y_train, y_test):
logistic_reg = LogisticRegression().fit(x_train, y_train)
y_pred = logistic_reg.predict(x_test)
#evaluate accuracy of our model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def decisionTreeModel(x_train, x_test, y_train, y_test):
dec_tree = DecisionTreeClassifier()
dec_tree = dec_tree.fit(x_train, y_train)
y_pred = dec_tree.predict(x_test)
#evaluate model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def svmModel(x_train, x_test, y_train, y_test):
svm_model = svm.SVC(kernel='linear')
svm_model = svm_model.fit(x_train, y_train)
y_pred = svm_model.predict(x_test)
#evaluate model
acc = metrics.accuracy_score(y_test, y_pred)
return acc
def printComparisonResults(logisticAccuracy,dectreeAccuracy,svmAccuracy):
print("Printing Results")
lr = "Logistic Regression"
dt = "Decision Tree"
sv = "SVM is Accurate"
if logisticAccuracy > (dectreeAccuracy and svmAccuracy):
print(lr, "is accurate than", dt, "and", sv)
elif dectreeAccuracy > (logisticAccuracy and svmAccuracy):
print(dt, "is accurate than", lr, "and", sv)
else:
print(sv, "is accurate than", lr, "and", dt)
print("Done")
if __name__ == '__main__':
main()
输出:
Printing Results
Logistic Regression is accurate than Decision Tree and SVM is Accurate
Done
并且您的实施将完美运行 :),我使用了可用的数据 here。