许多样本的密度图显示总体趋势 - 如何?
Density plot for many samples showing overall trend - how?
我想显示许多样本的密度图。每个样本都属于一个特定的分组变量。我可以像这样绘制每个单独的密度图:
import seaborn as sns
fmri = sns.load_dataset("fmri")[['subject','timepoint','region','signal']].drop_duplicates(['subject','timepoint','region'])
region2col={'parietal':'red', 'frontal':'blue'}
fig, ax= plt.subplots(figsize=(22,10))
for subject in fmri.subject.unique():
temp=fmri.loc[fmri.subject==subject,]
for region in temp['region'].unique():
temp2=temp.loc[temp.region==region,]
sns.distplot(
temp2['signal'],
label = region,
color=region2col[region],
kde=True, hist=False,
ax=ax
)
但是,我想绘制每个区域分布的总体密度(与上面相同的轴,signal
和 density
),但有一个阴影区域用于极值(最大值和每个信号点的最小值)和描述总体趋势的总体拟合线。类似这样:
#example only to show formatting wanted.
# XX axis should show "signal"
# YY axis should show density
g = sns.relplot(x="timepoint", y="signal",
hue="region",
kind="line", data=fmri)
plt.show()
这可能吗?
Scipy的gaussian_kde
可以用来计算所有单独的kdes。
sns.lineplot
(由 sns.relplot
调用以绘制线图的函数)在相同的 x 值对应于多个 y 值时创建置信区间。因此,重复一个 x 值数组,对每个对应的 kde 值数组重复一次,启用所需的绘图。
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import gaussian_kde
fmri = sns.load_dataset("fmri")[['subject', 'timepoint', 'region', 'signal']].drop_duplicates(
['subject', 'timepoint', 'region'])
region2col = {'parietal': 'red', 'frontal': 'blue'}
x_min = fmri['signal'].min()
x_max = fmri['signal'].max()
dx = 0.1 * (x_max - x_min)
x_min -= dx
x_max += dx
xs = np.linspace(x_min, x_max, 100)
fig, ax = plt.subplots(figsize=(12, 5))
for region in fmri['region'].unique():
df_region = fmri.loc[fmri.region == region,]
kdes = []
for subject in fmri.subject.unique():
signal = df_region.loc[(df_region.subject == subject),]['signal']
kde = gaussian_kde(signal)
kdes.append(kde(xs))
num_subjects = len(kdes)
kdes = np.concatenate(kdes)
sns.lineplot(x=np.tile(xs, num_subjects), y=kdes, label=region, color=region2col[region], ax=ax)
ax.set_xlabel('signal')
ax.set_ylabel('density')
plt.legend(title='Region')
plt.show()
这可能不是最快的方法,但您可以计算每个 subject/region 在一定范围内的 kde,然后让 lineplot
完成剩下的事情
from scipy.stats import gaussian_kde
x = np.linspace(fmri['signal'].min(),fmri['signal'].max(),100)
temp = fmri.groupby(['subject','region'])['signal'].apply(lambda temp: pd.Series(gaussian_kde(temp).evaluate(x), index=pd.Index(x, name='x')))
temp = temp.reset_index(name='kde')
plt.figure()
sns.lineplot(data=temp, x='x', y='kde', hue='region')
我想显示许多样本的密度图。每个样本都属于一个特定的分组变量。我可以像这样绘制每个单独的密度图:
import seaborn as sns
fmri = sns.load_dataset("fmri")[['subject','timepoint','region','signal']].drop_duplicates(['subject','timepoint','region'])
region2col={'parietal':'red', 'frontal':'blue'}
fig, ax= plt.subplots(figsize=(22,10))
for subject in fmri.subject.unique():
temp=fmri.loc[fmri.subject==subject,]
for region in temp['region'].unique():
temp2=temp.loc[temp.region==region,]
sns.distplot(
temp2['signal'],
label = region,
color=region2col[region],
kde=True, hist=False,
ax=ax
)
但是,我想绘制每个区域分布的总体密度(与上面相同的轴,signal
和 density
),但有一个阴影区域用于极值(最大值和每个信号点的最小值)和描述总体趋势的总体拟合线。类似这样:
#example only to show formatting wanted.
# XX axis should show "signal"
# YY axis should show density
g = sns.relplot(x="timepoint", y="signal",
hue="region",
kind="line", data=fmri)
plt.show()
这可能吗?
Scipy的gaussian_kde
可以用来计算所有单独的kdes。
sns.lineplot
(由 sns.relplot
调用以绘制线图的函数)在相同的 x 值对应于多个 y 值时创建置信区间。因此,重复一个 x 值数组,对每个对应的 kde 值数组重复一次,启用所需的绘图。
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import gaussian_kde
fmri = sns.load_dataset("fmri")[['subject', 'timepoint', 'region', 'signal']].drop_duplicates(
['subject', 'timepoint', 'region'])
region2col = {'parietal': 'red', 'frontal': 'blue'}
x_min = fmri['signal'].min()
x_max = fmri['signal'].max()
dx = 0.1 * (x_max - x_min)
x_min -= dx
x_max += dx
xs = np.linspace(x_min, x_max, 100)
fig, ax = plt.subplots(figsize=(12, 5))
for region in fmri['region'].unique():
df_region = fmri.loc[fmri.region == region,]
kdes = []
for subject in fmri.subject.unique():
signal = df_region.loc[(df_region.subject == subject),]['signal']
kde = gaussian_kde(signal)
kdes.append(kde(xs))
num_subjects = len(kdes)
kdes = np.concatenate(kdes)
sns.lineplot(x=np.tile(xs, num_subjects), y=kdes, label=region, color=region2col[region], ax=ax)
ax.set_xlabel('signal')
ax.set_ylabel('density')
plt.legend(title='Region')
plt.show()
这可能不是最快的方法,但您可以计算每个 subject/region 在一定范围内的 kde,然后让 lineplot
完成剩下的事情
from scipy.stats import gaussian_kde
x = np.linspace(fmri['signal'].min(),fmri['signal'].max(),100)
temp = fmri.groupby(['subject','region'])['signal'].apply(lambda temp: pd.Series(gaussian_kde(temp).evaluate(x), index=pd.Index(x, name='x')))
temp = temp.reset_index(name='kde')
plt.figure()
sns.lineplot(data=temp, x='x', y='kde', hue='region')