Python - 如何使用 python pandas 交叉表创建混淆矩阵统计
Python - How to create confusion matrix statistics using python pandas crosstab
以下是我的 Phyton 脚本,它生成以下混淆矩阵
# /usr/bin/python -tt
from __future__ import division
import csv
import os
import pandas as pd
#----------------------------------------------------------------------------
def get_tcp_variant(filepath):
def tcp_congestion_variant(beta):
print('predict({}; abs({})'.format(beta, abs(beta)))
if (beta>0.61 and beta<=0.75):
return "Cubic"
if (beta>=0.45 and beta<0.61):
return "Reno"
if (beta>0.75 and beta<=0.99):
return "BIC"
return "None"
#----------------------------------------------------------------------------
with open(filepath, "r") as csvfile:
ff = csv.reader(csvfile)
beta_values = []
cwnd_loss = 0
for current_cwnd, col2 in ff:
value = int(current_cwnd)
if value >= cwnd_loss:
cwnd_loss = value
else:
beta_value = int(current_cwnd)/cwnd_loss
beta_value=(round(beta_value,2))
beta_values.append(beta_value)
cwnd_loss = value
return tcp_congestion_variant(sum(beta_values)/len(beta_values))
print ("*********************************************")
print ("Confusion matrix ")
print ("*********************************************")
matrix = {'actual':[], 'predict':[]}
path = './csv_files'
#----------------------------------------------------------------------------
def get_variant_predict(filename):
if 'cubic' in filename:
return 'Cubic'
if 'reno' in filename:
return "Reno"
if 'bic' in filename:
return "BIC"
else:
return filename [0]
#----------------------------------------------------------------------------
for filename in os.listdir(path):
#matrix['predict'].append(filename[:4])
matrix['predict'].append(get_variant_predict(filename))
matrix['actual'].append(get_tcp_variant(os.path.join(path, filename)))
data_frame = pd.crosstab(pd.Series(matrix['actual'], name='Actual'),
pd.Series(matrix['predict'], name=' Predicted'))
#,margins=True) # To add "All"
print (" ")
print(data_frame)
我们如何添加混淆矩阵统计(例如:accuracy
到Python
pandas交叉表?如果我们手动添加,accuracy
将是( 4+24+21)/(4+24+4+1+1+21) - 但我想自动生成统计数据
(df * np.eye(3)).values.sum() / df.values.sum()
输出:
0.89090909090909087
看看这个sklearn.metrics.classification_report
print(classification_report(matrix['actual'],
matrix['predict'],
target_names=['BIC', 'Cubic', 'Reno']))
以下是我的 Phyton 脚本,它生成以下混淆矩阵
# /usr/bin/python -tt
from __future__ import division
import csv
import os
import pandas as pd
#----------------------------------------------------------------------------
def get_tcp_variant(filepath):
def tcp_congestion_variant(beta):
print('predict({}; abs({})'.format(beta, abs(beta)))
if (beta>0.61 and beta<=0.75):
return "Cubic"
if (beta>=0.45 and beta<0.61):
return "Reno"
if (beta>0.75 and beta<=0.99):
return "BIC"
return "None"
#----------------------------------------------------------------------------
with open(filepath, "r") as csvfile:
ff = csv.reader(csvfile)
beta_values = []
cwnd_loss = 0
for current_cwnd, col2 in ff:
value = int(current_cwnd)
if value >= cwnd_loss:
cwnd_loss = value
else:
beta_value = int(current_cwnd)/cwnd_loss
beta_value=(round(beta_value,2))
beta_values.append(beta_value)
cwnd_loss = value
return tcp_congestion_variant(sum(beta_values)/len(beta_values))
print ("*********************************************")
print ("Confusion matrix ")
print ("*********************************************")
matrix = {'actual':[], 'predict':[]}
path = './csv_files'
#----------------------------------------------------------------------------
def get_variant_predict(filename):
if 'cubic' in filename:
return 'Cubic'
if 'reno' in filename:
return "Reno"
if 'bic' in filename:
return "BIC"
else:
return filename [0]
#----------------------------------------------------------------------------
for filename in os.listdir(path):
#matrix['predict'].append(filename[:4])
matrix['predict'].append(get_variant_predict(filename))
matrix['actual'].append(get_tcp_variant(os.path.join(path, filename)))
data_frame = pd.crosstab(pd.Series(matrix['actual'], name='Actual'),
pd.Series(matrix['predict'], name=' Predicted'))
#,margins=True) # To add "All"
print (" ")
print(data_frame)
我们如何添加混淆矩阵统计(例如:accuracy
到Python
pandas交叉表?如果我们手动添加,accuracy
将是( 4+24+21)/(4+24+4+1+1+21) - 但我想自动生成统计数据
(df * np.eye(3)).values.sum() / df.values.sum()
输出:
0.89090909090909087
看看这个sklearn.metrics.classification_report
print(classification_report(matrix['actual'],
matrix['predict'],
target_names=['BIC', 'Cubic', 'Reno']))