如何在平均点上突出显示 kdeplot?
How to highlight kdeplot in average points?
我有一个问题陈述要在算法的 5 个 CSV 文件 上绘制图形并比较其中更好的算法
csv 文件仅包含 100 行 * 4 列的浮点数
我绘制了比较 5 个 csv 文件的 第一列的 kdeplot
所以我把问题编码成这样:
from cProfile import label
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
plt.style.use("fivethirtyeight")
sns.set_theme()
sns.color_palette("bright")
data1 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg1/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data2 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg2/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data3 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg3/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data4 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg4/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data5 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg5/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
sns.kdeplot(np.array(data1[0]), shade = True, linewidth = 2, label = 'arg1')
sns.kdeplot(np.array(data2[0]), shade = True, linewidth = 2, label = 'arg2')
sns.kdeplot(np.array(data3[0]), shade = True, linewidth = 2, label = 'arg3')
sns.kdeplot(np.array(data4[0]), shade = True, linewidth = 2, label = 'arg4')
sns.kdeplot(np.array(data5[0]), shade = True, linewidth = 2, label = 'arg5')
plt.xlabel("Accuracy")
plt.ylabel("Accuracy-Density")
plt.title("Accuracy graph visualisation")
plt.legend()
plt.show()
它负责绘制图表,但我主要需要的是在每个图表中突出显示 平均点。那么如何做到这一点请帮助我
您可以对 5 条曲线中的每一条曲线应用 的方法:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib.colors import to_rgba
plt.style.use("fivethirtyeight")
sns.set_theme()
sns.set_palette("bright")
directories = [f'alg{i}' for i in range(1, 6)]
# all_data = [pd.read_csv(f"D:/C++/Programs/Python/Input/appendicitis/{dir}/AverageIter1000.csv",
# on_bad_lines='skip', nrows=100, usecols=[0, 1, 2, 3], header=None) for dir in directories]
# creates some test data, a bit similar to what could be in the files
all_data = [pd.DataFrame(np.random.normal(0.1, 1, (np.random.randint(100, 300), 1)).cumsum()) for _ in directories]
fig, ax = plt.subplots(figsize=(12, 5))
for data_i in all_data:
sns.kdeplot(np.array(data_i[0]), shade=False, linewidth=2, ax=ax)
for data_i, kdeline_i, dir in zip(all_data, ax.lines, directories):
mean = data_i[0].mean()
xs = kdeline_i.get_xdata()
ys = kdeline_i.get_ydata()
height = np.interp(mean, xs, ys)
color = kdeline_i.get_color()
ax.vlines(mean, 0, height, color=color, ls=':', lw=3)
ax.fill_between(xs, ys, facecolor=to_rgba(color, alpha=0.2), edgecolor=color, lw=2, label=dir)
ax.set_xlabel("Accuracy")
ax.set_ylabel("Accuracy-Density")
ax.set_title("Accuracy graph visualisation")
ax.legend()
plt.tight_layout()
plt.show()
我有一个问题陈述要在算法的 5 个 CSV 文件 上绘制图形并比较其中更好的算法
csv 文件仅包含 100 行 * 4 列的浮点数 我绘制了比较 5 个 csv 文件的 第一列的 kdeplot
所以我把问题编码成这样:
from cProfile import label
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
plt.style.use("fivethirtyeight")
sns.set_theme()
sns.color_palette("bright")
data1 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg1/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data2 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg2/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data3 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg3/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data4 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg4/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
data5 = pd.read_csv("D:/C++/Programs/Python/Input/appendicitis/alg5/AverageIter1000.csv", on_bad_lines='skip', nrows= 100 , usecols=[0,1,2,3] , header = None)
sns.kdeplot(np.array(data1[0]), shade = True, linewidth = 2, label = 'arg1')
sns.kdeplot(np.array(data2[0]), shade = True, linewidth = 2, label = 'arg2')
sns.kdeplot(np.array(data3[0]), shade = True, linewidth = 2, label = 'arg3')
sns.kdeplot(np.array(data4[0]), shade = True, linewidth = 2, label = 'arg4')
sns.kdeplot(np.array(data5[0]), shade = True, linewidth = 2, label = 'arg5')
plt.xlabel("Accuracy")
plt.ylabel("Accuracy-Density")
plt.title("Accuracy graph visualisation")
plt.legend()
plt.show()
它负责绘制图表,但我主要需要的是在每个图表中突出显示 平均点。那么如何做到这一点请帮助我
您可以对 5 条曲线中的每一条曲线应用
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib.colors import to_rgba
plt.style.use("fivethirtyeight")
sns.set_theme()
sns.set_palette("bright")
directories = [f'alg{i}' for i in range(1, 6)]
# all_data = [pd.read_csv(f"D:/C++/Programs/Python/Input/appendicitis/{dir}/AverageIter1000.csv",
# on_bad_lines='skip', nrows=100, usecols=[0, 1, 2, 3], header=None) for dir in directories]
# creates some test data, a bit similar to what could be in the files
all_data = [pd.DataFrame(np.random.normal(0.1, 1, (np.random.randint(100, 300), 1)).cumsum()) for _ in directories]
fig, ax = plt.subplots(figsize=(12, 5))
for data_i in all_data:
sns.kdeplot(np.array(data_i[0]), shade=False, linewidth=2, ax=ax)
for data_i, kdeline_i, dir in zip(all_data, ax.lines, directories):
mean = data_i[0].mean()
xs = kdeline_i.get_xdata()
ys = kdeline_i.get_ydata()
height = np.interp(mean, xs, ys)
color = kdeline_i.get_color()
ax.vlines(mean, 0, height, color=color, ls=':', lw=3)
ax.fill_between(xs, ys, facecolor=to_rgba(color, alpha=0.2), edgecolor=color, lw=2, label=dir)
ax.set_xlabel("Accuracy")
ax.set_ylabel("Accuracy-Density")
ax.set_title("Accuracy graph visualisation")
ax.legend()
plt.tight_layout()
plt.show()