如何根据 groupby 使 matplotlib 中的 xticks 均匀分布,尽管它们很有价值?
How to make xticks in matplotlib evenly spaced despite their value, based on groupby?
我正在尝试绘制不同公司按年龄划分的平均值 return。我想让 age 变量均匀分布,尽管步长增加并且分布不均匀。我的目标是这样的:
fig, ax = plt.subplots(figsize=(10,6), sharex=True, sharey=True)
ax.plot(ipo_cut[["IR", "Age", "Timespan"]][(ipo_cut["Timespan"] == "1980-1989") & (ipo_cut["Age"] >= 2)].groupby("Age").mean(), color="r")
ax2 = ax.twinx()
ax2.plot(ipo_cut[["IR", "Age", "Timespan"]][(ipo_cut["Timespan"] == "1990-1998") & (ipo_cut["Age"] >= 2)].groupby("Age").mean(), color = "g")
ax3 = ax.twinx()
ax3.plot(ipo_cut[["IR", "Age", "Timespan"]][(ipo_cut["Timespan"] == "1999-2000") & (ipo_cut["Age"] >= 2)].groupby("Age").mean(), color="grey")
ax4 = ax.twinx()
ax4.plot(ipo_cut[["IR", "Age", "Timespan"]][(ipo_cut["Timespan"] == "2001-2003") & (ipo_cut["Age"] >= 2)].groupby("Age").mean())
ax.set(title ="Average First-day Returns by Age of Firm at Time of IPO",
xlabel = "Age",
ylabel = "Average First-Day Return in %")
ax.set_ylim([0,1])
ax2.set_ylim([0,1])
ax3.set_ylim([0,1])
ax4.set_ylim([0,1])
ax.set_xlim([0,70])
ax2.set_xlim([0,70])
ax3.set_xlim([0,70])
ax4.set_xlim([0,70])
ax2.get_yaxis().set_visible(False)
ax3.get_yaxis().set_visible(False)
ax4.get_yaxis().set_visible(False)
plt.xticks([0,1,2,3,4,5,6,7,8,9,10, 11,12,13,15,17,20,30,40,50,60,70])
plt.setp(ax.get_xticklabels(), rotation = 90)
fig.show()
输出如下所示:
根据其他帖子,我只能显示所需的“年龄”年份,但遗憾的是轴分布不均匀。我觉得我真的很接近解决方案,但我自己找不到答案。
seaborn 中的解决方案对我也很有帮助。
使用一些附加信息进行编辑:x 向量的形状与数据不匹配。我只针对相关年龄过滤了我的数据框,但缺少一些信息。这是一个最小的可重现示例:
import matplotlib.pyplot as py
import numpy as np
#create dataframe
x = np.arange(0,11)
df = pd.DataFrame(data = np.random.rand(len(x)), index = x, columns=["IR"])
df.index.name = "Age"
df.loc[[0, 9], "IR"] = np.nan
df.dropna(inplace = True)
# open figure
fig, ax = plt.subplots(figsize=(10,6))
# create x-values
x = [0,1,2,5,7,9,10]
# plot
ax.plot(x, df, color="r")
ax.set(
title ="Average First-day Returns by Age of Firm at Time of IPO",
xlabel = "Age",
ylabel = "Average First-Day Return in %",
ylim = [0,1],
xlim = [0,10])
ax.tick_params(axis='x', labelrotation=90)
fig.show()
我的目标是无间隙地绘制整个数据框。所以我的想法是我在背景中有线图,在 x 轴上有所需的 x 值。
希望这些信息对您有所帮助。
解决方案:
import matplotlib.pyplot as py
import numpy as np
#create dataframe
x = np.arange(0,10)
df = pd.DataFrame(data = np.random.rand(len(x)), index = x, columns=["IR"])
df.index.name = "Age"
#create nan in sample
df.loc[[0, 9], "IR"] = np.nan
# slice data for unbroken line (solution by max)
lg = df["IR"].isna()
# create ticks for even distribution
n = 10
a = np.arange(n)
# open figure
fig, ax = plt.subplots(figsize=(10,6))
# create x-values fir tick labels
x = (0,5,10,13,15,24,30,40,55,70)
# plot
ax.plot(a[~lg], df[~lg], color="r")
ax.set(
title ="Average First-day Returns by Age of Firm at Time of IPO",
xlabel = "Age",
ylabel = "Average First-Day Return in %",
ylim = [0,1],
xlim = [0,10])
ax.xaxis.set_ticks(a) #set the ticks to be a
ax.xaxis.set_ticklabels(x) # change the ticks' names to x
ax.tick_params(axis='x', labelrotation=90)
fig.show()
感谢 max 和“”的输入,我找到了解决问题的方法。如果您的原始数据框的 x 值的步长不是一个,我建议简单地 reset_index()
.
我建议在一开始就创建所需的 x 向量并在其上绘制数据。然后你可以让 matplotlib 完成剩下的工作。相反,您将人为地创建一个均匀分布的图(如果您不提供 x 值,matplotlib 假定数据是步长为 1 的点序列) ,您只是更改了 x 轴的外观,而不是图表本身。
import matplotlib.pyplot as py
import numpy as np
# open figure
fig, ax = plt.subplots(figsize=(10,6))
# create x-values
x = [0,1,2,3,4,5,6,7,8,9,10, 11,12,13,15,17,20,30,40,50,60,70]
# create (random) y-values
y = np.random.rand(len(x))
# plot
ax.plot(x, y, color="r")
ax.set(
title ="Average First-day Returns by Age of Firm at Time of IPO",
xlabel = "Age",
ylabel = "Average First-Day Return in %",
ylim = [0,1],
xlim = [0,70])
ax.tick_params(axis='x', labelrotation=90)
fig.show()
我剥离了你的代码并创建了一些人工虚拟数据。看看:How to create a minimal reproducible example.
我正在尝试绘制不同公司按年龄划分的平均值 return。我想让 age 变量均匀分布,尽管步长增加并且分布不均匀。我的目标是这样的:
fig, ax = plt.subplots(figsize=(10,6), sharex=True, sharey=True)
ax.plot(ipo_cut[["IR", "Age", "Timespan"]][(ipo_cut["Timespan"] == "1980-1989") & (ipo_cut["Age"] >= 2)].groupby("Age").mean(), color="r")
ax2 = ax.twinx()
ax2.plot(ipo_cut[["IR", "Age", "Timespan"]][(ipo_cut["Timespan"] == "1990-1998") & (ipo_cut["Age"] >= 2)].groupby("Age").mean(), color = "g")
ax3 = ax.twinx()
ax3.plot(ipo_cut[["IR", "Age", "Timespan"]][(ipo_cut["Timespan"] == "1999-2000") & (ipo_cut["Age"] >= 2)].groupby("Age").mean(), color="grey")
ax4 = ax.twinx()
ax4.plot(ipo_cut[["IR", "Age", "Timespan"]][(ipo_cut["Timespan"] == "2001-2003") & (ipo_cut["Age"] >= 2)].groupby("Age").mean())
ax.set(title ="Average First-day Returns by Age of Firm at Time of IPO",
xlabel = "Age",
ylabel = "Average First-Day Return in %")
ax.set_ylim([0,1])
ax2.set_ylim([0,1])
ax3.set_ylim([0,1])
ax4.set_ylim([0,1])
ax.set_xlim([0,70])
ax2.set_xlim([0,70])
ax3.set_xlim([0,70])
ax4.set_xlim([0,70])
ax2.get_yaxis().set_visible(False)
ax3.get_yaxis().set_visible(False)
ax4.get_yaxis().set_visible(False)
plt.xticks([0,1,2,3,4,5,6,7,8,9,10, 11,12,13,15,17,20,30,40,50,60,70])
plt.setp(ax.get_xticklabels(), rotation = 90)
fig.show()
输出如下所示:
根据其他帖子,我只能显示所需的“年龄”年份,但遗憾的是轴分布不均匀。我觉得我真的很接近解决方案,但我自己找不到答案。
seaborn 中的解决方案对我也很有帮助。
使用一些附加信息进行编辑:x 向量的形状与数据不匹配。我只针对相关年龄过滤了我的数据框,但缺少一些信息。这是一个最小的可重现示例:
import matplotlib.pyplot as py
import numpy as np
#create dataframe
x = np.arange(0,11)
df = pd.DataFrame(data = np.random.rand(len(x)), index = x, columns=["IR"])
df.index.name = "Age"
df.loc[[0, 9], "IR"] = np.nan
df.dropna(inplace = True)
# open figure
fig, ax = plt.subplots(figsize=(10,6))
# create x-values
x = [0,1,2,5,7,9,10]
# plot
ax.plot(x, df, color="r")
ax.set(
title ="Average First-day Returns by Age of Firm at Time of IPO",
xlabel = "Age",
ylabel = "Average First-Day Return in %",
ylim = [0,1],
xlim = [0,10])
ax.tick_params(axis='x', labelrotation=90)
fig.show()
我的目标是无间隙地绘制整个数据框。所以我的想法是我在背景中有线图,在 x 轴上有所需的 x 值。 希望这些信息对您有所帮助。
解决方案:
import matplotlib.pyplot as py
import numpy as np
#create dataframe
x = np.arange(0,10)
df = pd.DataFrame(data = np.random.rand(len(x)), index = x, columns=["IR"])
df.index.name = "Age"
#create nan in sample
df.loc[[0, 9], "IR"] = np.nan
# slice data for unbroken line (solution by max)
lg = df["IR"].isna()
# create ticks for even distribution
n = 10
a = np.arange(n)
# open figure
fig, ax = plt.subplots(figsize=(10,6))
# create x-values fir tick labels
x = (0,5,10,13,15,24,30,40,55,70)
# plot
ax.plot(a[~lg], df[~lg], color="r")
ax.set(
title ="Average First-day Returns by Age of Firm at Time of IPO",
xlabel = "Age",
ylabel = "Average First-Day Return in %",
ylim = [0,1],
xlim = [0,10])
ax.xaxis.set_ticks(a) #set the ticks to be a
ax.xaxis.set_ticklabels(x) # change the ticks' names to x
ax.tick_params(axis='x', labelrotation=90)
fig.show()
感谢 max 和“reset_index()
.
我建议在一开始就创建所需的 x 向量并在其上绘制数据。然后你可以让 matplotlib 完成剩下的工作。相反,您将人为地创建一个均匀分布的图(如果您不提供 x 值,matplotlib 假定数据是步长为 1 的点序列) ,您只是更改了 x 轴的外观,而不是图表本身。
import matplotlib.pyplot as py
import numpy as np
# open figure
fig, ax = plt.subplots(figsize=(10,6))
# create x-values
x = [0,1,2,3,4,5,6,7,8,9,10, 11,12,13,15,17,20,30,40,50,60,70]
# create (random) y-values
y = np.random.rand(len(x))
# plot
ax.plot(x, y, color="r")
ax.set(
title ="Average First-day Returns by Age of Firm at Time of IPO",
xlabel = "Age",
ylabel = "Average First-Day Return in %",
ylim = [0,1],
xlim = [0,70])
ax.tick_params(axis='x', labelrotation=90)
fig.show()
我剥离了你的代码并创建了一些人工虚拟数据。看看:How to create a minimal reproducible example.