Pandas 箱线图将列中的所有数据与同一列的过滤版本进行比较

Pandas boxplot compare all data from column with filtered version of same column

这看起来应该很容易,但似乎无法正常工作。

data = {'Name':['Tom', 'nick', 'krish', 'jack', 'Tom', 'nick', 'krish', 'jack'],
        'Age':[31, 46, 21, 37, 31, 46, 21, 37],
        'Times':[20, 21, 19, 18, 19, 20, 20, 19]}

df = pd.DataFrame(data)
df

# basic boxplot for 'Times'
df['Times'].plot(kind='box')

# Filtered version
filt = df['Name'] == 'Tom'
df.loc[filt, 'Times'].plot(kind='box')

# comparing two columns is easy but I want to compare the same column with different row filters.
df[['Times', 'Age']].plot(kind='box')

那么如何并排比较同一专栏的这两个版本? 非常感谢

您只需将列表传递给 plt.boxplot():

box = plt.boxplot([df['Times'], df[df['Name'] == 'Tom']['Times']],
                  labels=['all','Toms'])

我比较了汤姆、其他人和所有人

data = {'Name':['Tom', 'nick', 'krish', 'jack', 'Tom', 'nick', 'krish', 'jack'],
        'Age':[31, 46, 21, 37, 31, 46, 21, 37],
        'Times':[20, 21, 19, 18, 19, 20, 20, 19]}

df = pd.DataFrame(data)

print(df)
df.boxplot(column='Times', by='Age')


grouped=df.groupby(['Name','Times']).any().unstack().reset_index().transpose()
df2=pd.DataFrame(grouped)
new_header = df2.iloc[0]
df2 = df2[1:]
df2.columns = new_header
df2.reset_index(inplace=True)
others=[x for x in df2.columns if x not in(['Tom','Times'])]
all=[x for x in df2.columns if x not in(['Times'])]
df2['Others']=df2[others].any(axis=1)
df2['All']=df2[all].any(axis=1)
print(df2.columns)
print(df2)
df2.boxplot(column='Times',by=['Others'])
df2.boxplot(column='Times',by=['Tom'])
df2.boxplot(column='Times',by=['All'])
plt.show()

与已接受的答案类似的方法,无需对名称进行硬编码

import pandas as pd
import matplotlib.pyplot as plt


data = {'Name':['Tom', 'nick', 'krish', 'jack', 'Tom', 'nick', 'krish', 'jack'],
        'Age':[31, 46, 21, 37, 31, 46, 21, 37],
        'Times':[20, 21, 19, 18, 19, 20, 20, 19]}

df = pd.DataFrame(data)

df_list = [df["Times"]]
labels_list = ["all"]
# if you dont want all, just set them to empty list
#df_list = []
#labels_list = []

grouped_df = df.groupby("Name")

for name in grouped_df.groups.keys():
        labels_list.append(name)
        df_list.append(grouped_df.get_group(name)["Times"])

plt.boxplot(df_list, labels = labels_list)
plt.show()

for name in grouped_df.groups.keys():
        labels_list.append(name)
        df_list.append(grouped_df.get_group(name)["Times"])

plt.boxplot(df_list, labels = labels_list)
plt.show()

这是结果