SVM 卡在拟合模型中

SVM stuck in fitting the model

我正在使用 SVM,我有一个问题,程序的执行停留在 model.fit(X_test, y_test),这对应于拟合 SVM 模型。如何解决?这是我的代码:

# Make Predictions with Naive Bayes On The Iris Dataset
import collections
from csv import reader
from math import sqrt, exp, pi

from IPython.display import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns; sns.set()

from sklearn.cross_validation import train_test_split 
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.externals.six import StringIO
from sklearn.feature_selection import SelectKBest, chi2
from sklearn import datasets, metrics
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier, export_graphviz

# Function to split the dataset 
def splitdataset(balance_data, column_count): 

    # Separating the target variable 
    X = balance_data.values[:, 1:column_count] 
    Y = balance_data.values[:, 0] 

    # Splitting the dataset into train and test 
    X_train, X_test, y_train, y_test = train_test_split( 
    X, Y, test_size = 0.3, random_state = 100) 

    return X, Y, X_train, X_test, y_train, y_test 

def importdata(): 
    balance_data = pd.read_csv( 'dataExtended.txt', sep= ',') 
    row_count, column_count = balance_data.shape

    # Printing the dataswet shape 
    print ("Dataset Length: ", len(balance_data)) 
    print ("Dataset Shape: ", balance_data.shape) 
    print("Number of columns ", column_count)

    # Printing the dataset obseravtions 
    print ("Dataset: ",balance_data.head()) 
    balance_data['gold'] = balance_data['gold'].astype('category').cat.codes
    balance_data['Program'] = balance_data['Program'].astype('category').cat.codes

    return balance_data, column_count 


# Driver code 
def main(): 
    print("hey")

    # Building Phase 
    data,column_count = importdata() 
    X, Y, X_train, X_test, y_train, y_test = splitdataset(data, column_count) 

    #Create a svm Classifier
    model = svm.SVC(kernel='linear',probability=True) # Linear Kernel

    print('before fitting')

    model.fit(X_test, y_test)
    print('fitting over')
    predicted = model.predict(X_test)   
    print('prediction over')
    print(metrics.classification_report(y_test, predicted))
    print('classification over')
    print(metrics.confusion_matrix(y_test, predicted))
    probs = model.predict_proba(X_test)
    probs_list = list(probs)
    y_pred=[None]*len(y_test)
    y_pred_list = list(y_pred)
    y_test_list = list(y_test)
    i=0
    threshold=0.7
    while i<len(probs_list):
            #print('probs ',probs_list[i][0])
            if (probs_list[i][0]>=threshold) & (probs_list[i][1]<threshold):
                   y_pred_list[i]=0
                   i=i+1

            elif (probs_list[i][0]<threshold) & (probs_list[i][1]>=threshold):
                   y_pred_list[i]=1
                   i=i+1
            else: 
                   #print(y_pred[i])
                   #print('i==> ',i, ' probs length ', len(probs_list), ' ', len(y_pred_list), ' ', len(y_test_list))
                   y_pred_list.pop(i)
                   y_test_list.pop(i)
                   probs_list.pop(i)




    #print(y_pred_list)
    print('confusion matrix\n',confusion_matrix(y_test_list,y_pred_list))
    print('classification report\n', classification_report(y_test_list,y_pred_list))
    print('accuracy score', accuracy_score(y_test_list, y_pred_list))

    print('Mean Absolute Error:', metrics.mean_absolute_error(y_test_list, y_pred_list))
    print('Mean Squared Error:', metrics.mean_squared_error(y_test_list, y_pred_list))
    print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test_list, y_pred_list)))

if __name__=="__main__": 
    main() 

这很可能是由于初始化模型时将参数 probability 设置为 True。正如您在 docs:

中看到的那样

probability: bool, default=False

Whether to enable probability estimates. This must be enabled prior to calling fit, will slow down that method as it internally uses 5-fold cross-validation, and predict_proba may be inconsistent with predict.

这个问题已在 Whosebug 上讨论 and