用 Matplotlib 绘制 SVM?
Plot SVM with Matplotlib?
我有一些有趣的用户数据。它提供了一些关于要求用户执行的某些任务的及时性的信息。我想知道,如果 late
- 它告诉我用户是否准时 (0
)、有点晚 (1
) 或很晚 (2
) - 是 predictable/explainable。我从提供交通灯信息的列中生成 late
(绿色 = 未迟到,红色 = 超晚)。
这是我的做法:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import svm
import sklearn.metrics as sm
#load user data
df = pd.read_csv('April.csv', error_bad_lines=False, encoding='iso8859_15', delimiter=';')
#convert objects to datetime data types
cols = ['Planned Start', 'Actual Start', 'Planned End', 'Actual End']
df = df[cols].apply(
pd.to_datetime, dayfirst=True, errors='ignore'
).join(df.drop(cols, 1))
#convert datetime to numeric data types
cols = ['Planned Start', 'Actual Start', 'Planned End', 'Actual End']
df = df[cols].apply(
pd.to_numeric, errors='ignore'
).join(df.drop(cols, 1))
#add likert scale for green, yellow and red traffic lights
df['late'] = 0
df.ix[df['End Time Traffic Light'].isin(['Yellow']), 'late'] = 1
df.ix[df['End Time Traffic Light'].isin(['Red']), 'late'] = 2
#Supervised Learning
#X and y arrays
# X = np.array(df.drop(['late'], axis=1))
X = df[['Planned Start', 'Actual Start', 'Planned End', 'Actual End', 'Measure Package', 'Measure' , 'Responsible User']].as_matrix()
y = np.array(df['late'])
#preprocessing the data
X = preprocessing.scale(X)
#Supper Vector Machine
clf = svm.SVC(decision_function_shape='ovo')
clf.fit(X, y)
print(clf.score(X, y))
我现在正在尝试了解如何绘制决策 boundaries.My 目标是使用 Actual End
和 Planned End
绘制双向散点图。自然地,我检查了文档(参见 here)。但我不能绕过它。这是如何工作的?
作为对未来的提醒,如果您提供公开可用的数据集和您尝试的绘图代码,您通常会得到更快(更好)的响应,因为我们没有 'April.csv'。您还可以省略 'April.csv' 的数据整理代码。话虽如此...
Sebastian Raschka 创建了 mlxtend 包,它有一个非常棒的绘图功能来执行此操作。它在底层使用 matplotlib。
import numpy as np
import pandas as pd
from sklearn import svm
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
# Create arbitrary dataset for example
df = pd.DataFrame({'Planned_End': np.random.uniform(low=-5, high=5, size=50),
'Actual_End': np.random.uniform(low=-1, high=1, size=50),
'Late': np.random.random_integers(low=0, high=2, size=50)}
)
# Fit Support Vector Machine Classifier
X = df[['Planned_End', 'Actual_End']]
y = df['Late']
clf = svm.SVC(decision_function_shape='ovo')
clf.fit(X.values, y.values)
# Plot Decision Region using mlxtend's awesome plotting function
plot_decision_regions(X=X.values,
y=y.values,
clf=clf,
legend=2)
# Update plot object with X/Y axis labels and Figure Title
plt.xlabel(X.columns[0], size=14)
plt.ylabel(X.columns[1], size=14)
plt.title('SVM Decision Region Boundary', size=16)
我有一些有趣的用户数据。它提供了一些关于要求用户执行的某些任务的及时性的信息。我想知道,如果 late
- 它告诉我用户是否准时 (0
)、有点晚 (1
) 或很晚 (2
) - 是 predictable/explainable。我从提供交通灯信息的列中生成 late
(绿色 = 未迟到,红色 = 超晚)。
这是我的做法:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import svm
import sklearn.metrics as sm
#load user data
df = pd.read_csv('April.csv', error_bad_lines=False, encoding='iso8859_15', delimiter=';')
#convert objects to datetime data types
cols = ['Planned Start', 'Actual Start', 'Planned End', 'Actual End']
df = df[cols].apply(
pd.to_datetime, dayfirst=True, errors='ignore'
).join(df.drop(cols, 1))
#convert datetime to numeric data types
cols = ['Planned Start', 'Actual Start', 'Planned End', 'Actual End']
df = df[cols].apply(
pd.to_numeric, errors='ignore'
).join(df.drop(cols, 1))
#add likert scale for green, yellow and red traffic lights
df['late'] = 0
df.ix[df['End Time Traffic Light'].isin(['Yellow']), 'late'] = 1
df.ix[df['End Time Traffic Light'].isin(['Red']), 'late'] = 2
#Supervised Learning
#X and y arrays
# X = np.array(df.drop(['late'], axis=1))
X = df[['Planned Start', 'Actual Start', 'Planned End', 'Actual End', 'Measure Package', 'Measure' , 'Responsible User']].as_matrix()
y = np.array(df['late'])
#preprocessing the data
X = preprocessing.scale(X)
#Supper Vector Machine
clf = svm.SVC(decision_function_shape='ovo')
clf.fit(X, y)
print(clf.score(X, y))
我现在正在尝试了解如何绘制决策 boundaries.My 目标是使用 Actual End
和 Planned End
绘制双向散点图。自然地,我检查了文档(参见 here)。但我不能绕过它。这是如何工作的?
作为对未来的提醒,如果您提供公开可用的数据集和您尝试的绘图代码,您通常会得到更快(更好)的响应,因为我们没有 'April.csv'。您还可以省略 'April.csv' 的数据整理代码。话虽如此...
Sebastian Raschka 创建了 mlxtend 包,它有一个非常棒的绘图功能来执行此操作。它在底层使用 matplotlib。
import numpy as np
import pandas as pd
from sklearn import svm
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
# Create arbitrary dataset for example
df = pd.DataFrame({'Planned_End': np.random.uniform(low=-5, high=5, size=50),
'Actual_End': np.random.uniform(low=-1, high=1, size=50),
'Late': np.random.random_integers(low=0, high=2, size=50)}
)
# Fit Support Vector Machine Classifier
X = df[['Planned_End', 'Actual_End']]
y = df['Late']
clf = svm.SVC(decision_function_shape='ovo')
clf.fit(X.values, y.values)
# Plot Decision Region using mlxtend's awesome plotting function
plot_decision_regions(X=X.values,
y=y.values,
clf=clf,
legend=2)
# Update plot object with X/Y axis labels and Figure Title
plt.xlabel(X.columns[0], size=14)
plt.ylabel(X.columns[1], size=14)
plt.title('SVM Decision Region Boundary', size=16)