Seaborn barplot 中的错误栏问题 - Python
Issue in Error bars in Seaborn barplot - Python
(Have already looked at similar questions but they don't answer this
query)
我有一个结构如下的数据框 df1
{'token': {0: '180816_031', 1: '180816_031', 2: '180816_031', 3: '180816_031', 4: '180816_031', 5: '180816_031', 6: '180816_031', 7: '180816_031', 8: '180816_031', 9: '180816_031'}, 'variable': {0: 'Unnamed: 0', 1: 'adj_active_polymerase', 2: 'adj_functional_sequencing_pores', 3: 'adj_high_quality_reads', 4: 'adj_single_pores', 5: 'cell_mask_bilayers_sum', 6: 'num_align_high_quality_reads', 7: 'num_total_cells', 8: 'potential_pore', 9: 'short_pass'}, 'value': {0: 21.0, 1: 615850.51515151514, 2: 615850.51515151514, 3: 486008.39393939392, 4: 803784.06060606055, 5: 1665347.5757575757, 6: 468638.03030303027, 7: 2097152.0, 8: 1158527.0, 9: 2067189.2424242424}}
我正在使用以下代码重新创建我的数据,然后显示条形图
df1 = df.groupby(['token','variable']).agg({'value':['mean','std']})
df1.reset_index(inplace=True)
df1.sort_values('value',inplace=True,ascending=False)
fig,ax = plt.subplots()
fig.set_size_inches(16,8)
#to get different colors for each of the variable assign the variable to hue
g = sns.factorplot(x='token',y='value',data=df1, hue='variable',ax=ax, yerr='std')
#g.map_dataframe(plt.errorbar, x="followup (months)", y="probability", yerr='sd')
#Code for to put legend outside the plot
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
# Adding respective values to the top of each bar
for p in ax.patches:
ax.annotate("%d" % p.get_height(), (p.get_x() + p.get_width() / 2, p.get_height()),
ha='center', va='center', fontsize=11, color='black', xytext=(0, 10),
textcoords='offset points',fontweight='bold')
plt.show()
但我无法在输出中获得所需的错误栏
在我的数据帧中只使用 mean 之前我已经生成了一个输出,但是我也需要错误栏,所以我想添加 std 并在 yerr 中使用它(在阅读了这么多之后)
请帮忙
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
d = {'token': {361: '180816_031', 119: '180816_031', 101: '180816_031', 135: '180816_031', 292: '180816_031',
133: '180816_031', 99: '180816_031', 270: '180816_031', 19: '180816_031', 382: '180816_031',
414: '180816_031', 267: '180816_031', 218: '180816_031', 398: '180816_031', 287: '180816_031',
155: '180816_031', 392: '180816_031', 265: '180816_031', 239: '180816_031', 237: '180816_031'},
'station': {361: 'deneb', 119: 'callisto', 101: 'callisto', 135: 'callisto', 292: 'callisto', 133: 'deneb',
99: 'callisto', 270: 'callisto', 19: 'deneb', 382: 'callisto', 414: 'deneb', 267: 'callisto',
218: 'deneb', 398: 'callisto', 287: 'deneb', 155: 'deneb', 392: 'deneb', 265: 'callisto',
239: 'callisto', 237: 'callisto'},
'cycle_number': {361: 'cycle09', 119: 'cycle06', 101: 'cycle04', 135: 'cycle01', 292: 'cycle04', 133: 'cycle05',
99: 'cycle06', 270: 'cycle07', 19: 'cycle04', 382: 'cycle08', 414: 'cycle04', 267: 'cycle10',
218: 'cycle07', 398: 'cycle08', 287: 'cycle09', 155: 'cycle08', 392: 'cycle06', 265: 'cycle02',
239: 'cycle09', 237: 'cycle07'},
'variable': {361: 'adj_high_quality_reads', 119: 'short_pass', 101: 'short_pass', 135: 'cell_mask_bilayers_sum',
292: 'adj_active_polymerase', 133: 'cell_mask_bilayers_sum', 99: 'short_pass',
270: 'adj_active_polymerase', 19: 'Unnamed: 0', 382: 'adj_high_quality_reads',
414: 'num_align_high_quality_reads', 267: 'adj_active_polymerase', 218: 'adj_single_pores',
398: 'num_align_high_quality_reads', 287: 'adj_active_polymerase', 155: 'cell_mask_bilayers_sum',
392: 'num_align_high_quality_reads', 265: 'adj_active_polymerase', 239: 'adj_single_pores',
237: 'adj_single_pores'},
'value': {361: 99704.0, 119: 2072785.0, 101: 2061059.0, 135: 1682208.0, 292: 675306.0, 133: 1714292.0,
99: 2072785.0, 270: 687988.0, 19: 19.0, 382: np.nan, 414: 285176.0, 267: 86914.0, 218: 948971.0,
398: 405196.0, 287: 137926.0, 155: 1830032.0, 392: 480081.0, 265: 951689.0, 239: 681452.0,
237: 882671.0}}
df = pd.DataFrame(d)
g = sns.barplot('token', 'value', data=df, hue='variable', capsize=0.1)
df5 = pd.DataFrame(df.groupby(['variable'])['value'].mean().reset_index())
i = 0
for p in g.patches:
height = p.get_height()
g.text(p.get_x() + p.get_width() / 2.,
height + 3,
"%.3f" % df5.at[i, 'value'],
ha="center")
i += 1
plt.show()
(Have already looked at similar questions but they don't answer this query)
我有一个结构如下的数据框 df1
{'token': {0: '180816_031', 1: '180816_031', 2: '180816_031', 3: '180816_031', 4: '180816_031', 5: '180816_031', 6: '180816_031', 7: '180816_031', 8: '180816_031', 9: '180816_031'}, 'variable': {0: 'Unnamed: 0', 1: 'adj_active_polymerase', 2: 'adj_functional_sequencing_pores', 3: 'adj_high_quality_reads', 4: 'adj_single_pores', 5: 'cell_mask_bilayers_sum', 6: 'num_align_high_quality_reads', 7: 'num_total_cells', 8: 'potential_pore', 9: 'short_pass'}, 'value': {0: 21.0, 1: 615850.51515151514, 2: 615850.51515151514, 3: 486008.39393939392, 4: 803784.06060606055, 5: 1665347.5757575757, 6: 468638.03030303027, 7: 2097152.0, 8: 1158527.0, 9: 2067189.2424242424}}
我正在使用以下代码重新创建我的数据,然后显示条形图
df1 = df.groupby(['token','variable']).agg({'value':['mean','std']})
df1.reset_index(inplace=True)
df1.sort_values('value',inplace=True,ascending=False)
fig,ax = plt.subplots()
fig.set_size_inches(16,8)
#to get different colors for each of the variable assign the variable to hue
g = sns.factorplot(x='token',y='value',data=df1, hue='variable',ax=ax, yerr='std')
#g.map_dataframe(plt.errorbar, x="followup (months)", y="probability", yerr='sd')
#Code for to put legend outside the plot
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# Put a legend to the right of the current axis
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
# Adding respective values to the top of each bar
for p in ax.patches:
ax.annotate("%d" % p.get_height(), (p.get_x() + p.get_width() / 2, p.get_height()),
ha='center', va='center', fontsize=11, color='black', xytext=(0, 10),
textcoords='offset points',fontweight='bold')
plt.show()
但我无法在输出中获得所需的错误栏
在我的数据帧中只使用 mean 之前我已经生成了一个输出,但是我也需要错误栏,所以我想添加 std 并在 yerr 中使用它(在阅读了这么多之后)
请帮忙
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
d = {'token': {361: '180816_031', 119: '180816_031', 101: '180816_031', 135: '180816_031', 292: '180816_031',
133: '180816_031', 99: '180816_031', 270: '180816_031', 19: '180816_031', 382: '180816_031',
414: '180816_031', 267: '180816_031', 218: '180816_031', 398: '180816_031', 287: '180816_031',
155: '180816_031', 392: '180816_031', 265: '180816_031', 239: '180816_031', 237: '180816_031'},
'station': {361: 'deneb', 119: 'callisto', 101: 'callisto', 135: 'callisto', 292: 'callisto', 133: 'deneb',
99: 'callisto', 270: 'callisto', 19: 'deneb', 382: 'callisto', 414: 'deneb', 267: 'callisto',
218: 'deneb', 398: 'callisto', 287: 'deneb', 155: 'deneb', 392: 'deneb', 265: 'callisto',
239: 'callisto', 237: 'callisto'},
'cycle_number': {361: 'cycle09', 119: 'cycle06', 101: 'cycle04', 135: 'cycle01', 292: 'cycle04', 133: 'cycle05',
99: 'cycle06', 270: 'cycle07', 19: 'cycle04', 382: 'cycle08', 414: 'cycle04', 267: 'cycle10',
218: 'cycle07', 398: 'cycle08', 287: 'cycle09', 155: 'cycle08', 392: 'cycle06', 265: 'cycle02',
239: 'cycle09', 237: 'cycle07'},
'variable': {361: 'adj_high_quality_reads', 119: 'short_pass', 101: 'short_pass', 135: 'cell_mask_bilayers_sum',
292: 'adj_active_polymerase', 133: 'cell_mask_bilayers_sum', 99: 'short_pass',
270: 'adj_active_polymerase', 19: 'Unnamed: 0', 382: 'adj_high_quality_reads',
414: 'num_align_high_quality_reads', 267: 'adj_active_polymerase', 218: 'adj_single_pores',
398: 'num_align_high_quality_reads', 287: 'adj_active_polymerase', 155: 'cell_mask_bilayers_sum',
392: 'num_align_high_quality_reads', 265: 'adj_active_polymerase', 239: 'adj_single_pores',
237: 'adj_single_pores'},
'value': {361: 99704.0, 119: 2072785.0, 101: 2061059.0, 135: 1682208.0, 292: 675306.0, 133: 1714292.0,
99: 2072785.0, 270: 687988.0, 19: 19.0, 382: np.nan, 414: 285176.0, 267: 86914.0, 218: 948971.0,
398: 405196.0, 287: 137926.0, 155: 1830032.0, 392: 480081.0, 265: 951689.0, 239: 681452.0,
237: 882671.0}}
df = pd.DataFrame(d)
g = sns.barplot('token', 'value', data=df, hue='variable', capsize=0.1)
df5 = pd.DataFrame(df.groupby(['variable'])['value'].mean().reset_index())
i = 0
for p in g.patches:
height = p.get_height()
g.text(p.get_x() + p.get_width() / 2.,
height + 3,
"%.3f" % df5.at[i, 'value'],
ha="center")
i += 1
plt.show()