如何计算和绘制两列之间的精度
How to calculate and plot accuracy between two columns
我想使用 matplotlib 创建条形图中每个字母的准确率。
示例数据集
data = {'Actual Letter': ['U', 'A', 'X', 'P', 'C', 'R', 'C', 'U', 'J', 'D'], 'Predicted Letter': ['U', 'A', 'X', 'P', 'C', 'R', 'C', 'U', 'J', 'D']}
df = pd.DataFrame(data, index=[10113, 19164, 12798, 12034, 17719, 17886, 4624, 6047, 15608, 11815])
Actual Letter Predicted Letter
10113 U U
19164 A A
12798 X X
12034 P P
17719 C C
17886 R R
4624 C C
6047 U U
15608 J J
11815 D D
df.plot(kind='bar')
- 错误
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-a5f21be4f14b> in <module>
3 df = pd.DataFrame(data, index=[10113, 19164, 12798, 12034, 17719, 17886, 4624, 6047, 15608, 11815])
4
----> 5 df.plot(kind='bar')
e:\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, *args, **kwargs)
970 data.columns = label_name
971
--> 972 return plot_backend.plot(data, kind=kind, **kwargs)
973
974 __call__.__doc__ = __doc__
e:\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\__init__.py in plot(data, kind, **kwargs)
69 kwargs["ax"] = getattr(ax, "left_ax", ax)
70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
72 plot_obj.draw()
73 return plot_obj.result
e:\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in generate(self)
284 def generate(self):
285 self._args_adjust()
--> 286 self._compute_plot_data()
287 self._setup_subplots()
288 self._make_plot()
e:\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _compute_plot_data(self)
451 # no non-numeric frames or series allowed
452 if is_empty:
--> 453 raise TypeError("no numeric data to plot")
454
455 self.data = numeric_data.apply(self._convert_to_ndarray)
TypeError: no numeric data to plot
我想要一个像这样的条形图。但是我不知道该怎么做。
导入和示例 DataFrame
import pandas as pd
import numpy as np # for sample data only
import string # for sample data only
# create sample dataframe for testing
np.random.seed(365)
rows = 1100
data = {'Actual': np.random.choice(list(string.ascii_uppercase), size=rows),
'Predicted': np.random.choice(list(string.ascii_uppercase), size=rows)}
df = pd.DataFrame(data)
计算和绘图
已更新
- 下面的实现比较简洁;删除了不必要的步骤。
- 根据
'Predicted'
和 'Actual'
之间是否匹配,创建一个布尔值 'Match'
列
.groupby
on 'Actual'
, aggregate .mean()
,乘以 100,四舍五入,得到百分比。
- 每个字母的组将对布尔值求和并除以计数。对于
'A'
,总和为1,因为有1个True
,除以该组的总数,33。因此,1/33 = 0.030303030303030304
- 使用
pandas.DataFrame.plot
绘制所选数据的条形图
- 请注意,步骤 (1) 和 (2) 可以简化并合并为以下内容:
dfa = df.Predicted.eq(df.Actual).groupby(df.Actual).mean().mul(100).round(2)
# determine where Predicted equals Actual
df['Match'] = df.Predicted.eq(df.Actual)
# display(df.head())
Actual Predicted Match
0 S Z False
1 U J False
2 B L False
3 M V False
4 F C False
# groupby and get percent
dfa = df.groupby('Actual').Match.mean().mul(100).round(2)
# display(dfa.head())
Actual
A 3.03
B 2.63
C 4.44
D 6.82
E 5.77
Name: Match, dtype: float64
# plot
ax = dfa.plot(kind='bar', x='Actual', y='%', rot=0, legend=False, grid=True, figsize=(8, 5),
ylabel='Percent %', xlabel='Letter', title='Accuracy Rate % per letter')
原代码
- 这也有效
# determine where Predicted equals Actual and convert to an int; True = 1 and False = 0
df['Match'] = df.Predicted.eq(df.Actual).astype(int)
# get the normalized value counts
dfg = df.groupby('Actual').Match.value_counts(normalize=True).mul(100).round(2).reset_index(name='%')
# get the accuracy scores where there is a Match
df_accuracy = dfg[dfg.Match.eq(1)]
# display(df_accuracy.head())
Actual Match %
1 A 1 3.03
3 B 1 2.63
5 C 1 4.44
7 D 1 6.82
9 E 1 5.77
# plot
ax = df_accuracy.plot(kind='bar', x='Actual', y='%', rot=0, legend=False, grid=True, figsize=(8, 5),
ylabel='Percent %', xlabel='Letter', title='Accuracy Rate % per letter')
- 有您记下的模拟数据
- 如果先计算百分比,图表会非常简单
import numpy as np
import pandas as pd
# simulate some data...
df = pd.DataFrame(
{"Actual Letter": np.random.choice(list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), 200)}
).assign(
**{
"Predicted Letter": lambda d: d["Actual Letter"].apply(
lambda l: np.random.choice(
[l] + list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), 1, p=tuple([0.95]+ [0.05/26]*26)
)[0]
)
}
)
# now just calc percentage of where actual and predicted are the same
# graph it...
df.groupby("Actual Letter").apply(lambda d: (d["Actual Letter"]==d["Predicted Letter"]).sum()/len(d)).plot(kind="bar")
我想使用 matplotlib 创建条形图中每个字母的准确率。
示例数据集
data = {'Actual Letter': ['U', 'A', 'X', 'P', 'C', 'R', 'C', 'U', 'J', 'D'], 'Predicted Letter': ['U', 'A', 'X', 'P', 'C', 'R', 'C', 'U', 'J', 'D']}
df = pd.DataFrame(data, index=[10113, 19164, 12798, 12034, 17719, 17886, 4624, 6047, 15608, 11815])
Actual Letter Predicted Letter
10113 U U
19164 A A
12798 X X
12034 P P
17719 C C
17886 R R
4624 C C
6047 U U
15608 J J
11815 D D
df.plot(kind='bar')
- 错误
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-a5f21be4f14b> in <module>
3 df = pd.DataFrame(data, index=[10113, 19164, 12798, 12034, 17719, 17886, 4624, 6047, 15608, 11815])
4
----> 5 df.plot(kind='bar')
e:\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, *args, **kwargs)
970 data.columns = label_name
971
--> 972 return plot_backend.plot(data, kind=kind, **kwargs)
973
974 __call__.__doc__ = __doc__
e:\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\__init__.py in plot(data, kind, **kwargs)
69 kwargs["ax"] = getattr(ax, "left_ax", ax)
70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
72 plot_obj.draw()
73 return plot_obj.result
e:\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in generate(self)
284 def generate(self):
285 self._args_adjust()
--> 286 self._compute_plot_data()
287 self._setup_subplots()
288 self._make_plot()
e:\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _compute_plot_data(self)
451 # no non-numeric frames or series allowed
452 if is_empty:
--> 453 raise TypeError("no numeric data to plot")
454
455 self.data = numeric_data.apply(self._convert_to_ndarray)
TypeError: no numeric data to plot
我想要一个像这样的条形图。但是我不知道该怎么做。
导入和示例 DataFrame
import pandas as pd
import numpy as np # for sample data only
import string # for sample data only
# create sample dataframe for testing
np.random.seed(365)
rows = 1100
data = {'Actual': np.random.choice(list(string.ascii_uppercase), size=rows),
'Predicted': np.random.choice(list(string.ascii_uppercase), size=rows)}
df = pd.DataFrame(data)
计算和绘图
已更新
- 下面的实现比较简洁;删除了不必要的步骤。
- 根据
'Predicted'
和'Actual'
之间是否匹配,创建一个布尔值 .groupby
on'Actual'
, aggregate.mean()
,乘以 100,四舍五入,得到百分比。- 每个字母的组将对布尔值求和并除以计数。对于
'A'
,总和为1,因为有1个True
,除以该组的总数,33。因此,1/33 = 0.030303030303030304
- 每个字母的组将对布尔值求和并除以计数。对于
- 使用
pandas.DataFrame.plot
绘制所选数据的条形图
'Match'
列
- 请注意,步骤 (1) 和 (2) 可以简化并合并为以下内容:
dfa = df.Predicted.eq(df.Actual).groupby(df.Actual).mean().mul(100).round(2)
# determine where Predicted equals Actual
df['Match'] = df.Predicted.eq(df.Actual)
# display(df.head())
Actual Predicted Match
0 S Z False
1 U J False
2 B L False
3 M V False
4 F C False
# groupby and get percent
dfa = df.groupby('Actual').Match.mean().mul(100).round(2)
# display(dfa.head())
Actual
A 3.03
B 2.63
C 4.44
D 6.82
E 5.77
Name: Match, dtype: float64
# plot
ax = dfa.plot(kind='bar', x='Actual', y='%', rot=0, legend=False, grid=True, figsize=(8, 5),
ylabel='Percent %', xlabel='Letter', title='Accuracy Rate % per letter')
原代码
- 这也有效
# determine where Predicted equals Actual and convert to an int; True = 1 and False = 0
df['Match'] = df.Predicted.eq(df.Actual).astype(int)
# get the normalized value counts
dfg = df.groupby('Actual').Match.value_counts(normalize=True).mul(100).round(2).reset_index(name='%')
# get the accuracy scores where there is a Match
df_accuracy = dfg[dfg.Match.eq(1)]
# display(df_accuracy.head())
Actual Match %
1 A 1 3.03
3 B 1 2.63
5 C 1 4.44
7 D 1 6.82
9 E 1 5.77
# plot
ax = df_accuracy.plot(kind='bar', x='Actual', y='%', rot=0, legend=False, grid=True, figsize=(8, 5),
ylabel='Percent %', xlabel='Letter', title='Accuracy Rate % per letter')
- 有您记下的模拟数据
- 如果先计算百分比,图表会非常简单
import numpy as np
import pandas as pd
# simulate some data...
df = pd.DataFrame(
{"Actual Letter": np.random.choice(list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), 200)}
).assign(
**{
"Predicted Letter": lambda d: d["Actual Letter"].apply(
lambda l: np.random.choice(
[l] + list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), 1, p=tuple([0.95]+ [0.05/26]*26)
)[0]
)
}
)
# now just calc percentage of where actual and predicted are the same
# graph it...
df.groupby("Actual Letter").apply(lambda d: (d["Actual Letter"]==d["Predicted Letter"]).sum()/len(d)).plot(kind="bar")