ggplot2: add line for average per group (Error: No stat called StatHline.)
ggplot2: add line for average per group (Error: No stat called StatHline.)
我最近将 ggplot2
包和 运行 更新为使用 facets
为每组平均值绘制水平线的主要问题。
我相信 this post 不再有效?
我正在使用以下代码创建时间序列图:
ggplot(p2p_dt_SKILL_A,aes(x=Date,y=Prod_DL)) +
geom_line(aes(colour="red"),lwd=1.3) +
geom_smooth() +
geom_line(stat = "hline", yintercept = "mean")+
scale_x_date(labels=date_format("%b-%y"),breaks ="2 month")+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-09-18"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-02"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-23"]))+
ylab("DL Prod for All Skills")+
ggtitle("BVG1 DL Prod for All Skills 2014-2015")+
theme(axis.title.y = element_text(size = 15,face="bold",color="red"),
plot.title = element_text(size = 15,lineheight = .8,face="bold",color="red"),
axis.title.x = element_blank(),
legend.position="none")+
facet_wrap(~Patch)
第 1 个问题是我不能再在 geom_line(stat = "hline", yintercept = "mean")
中使用 stat = "hline"
,因为它给出了以下错误:Error: No stat called StatHline
。
因此我将其更改为:
ggplot(p2p_dt_SKILL_A,aes(x=Date,y=Prod_DL)) +
geom_line(aes(colour="red"),lwd=1.3) +
geom_smooth() +
geom_hline(yintercept = mean(p2p_dt_SKILL_A$Prod_DL))+
scale_x_date(labels=date_format("%b-%y"),date_breaks ="2 month")+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-09-18"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-02"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-23"]))+
ylab("DL Prod for All Skills")+
ggtitle("BVG1 DL Prod for All Skills 2014-2015")+
theme(axis.title.y = element_text(size = 15,face="bold",color="red"),
plot.title = element_text(size = 15,lineheight = .8,face="bold",color="red"),
axis.title.x = element_blank(),
legend.position="none")+
facet_wrap(~Patch)
但这并没有画出平均每个补丁的水平线。它只取 Prod_DL
的总体平均值
见下文:
现在有没有新的方法来计算每组的平均值并绘制水平线?
谢谢
更新
这是我所做的:
#first create a dataframe which holds patch and mean values for prod dl, this will then be used in geom_hline()
mean_Prod_DL <- p2p_dt_SKILL_A%>%
group_by(Patch)%>%
summarise(mean_Prod_DL_per_patch = mean(Prod_DL))
ggplot(p2p_dt_SKILL_A,aes(x=Date,y=Prod_DL)) +
scale_x_date(labels=date_format("%b-%y"),date_breaks ="2 months")+
geom_line(aes(colour="red"),lwd=1.3) +
geom_smooth() +
geom_hline(data = mean_Prod_DL,aes(yintercept = mean_Prod_DL_per_patch),lty=2)+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-09-18"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-02"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-23"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-12-04"]))+
ylab("DL Prod for All Skills")+
ggtitle("BVG1 DL Prod for All Skills 2014-2016")+
theme(axis.title.y = element_text(size = 15,face="bold",color="red"),
plot.title = element_text(size = 15,lineheight = .8,face="bold",color="red"),
axis.title.x = element_blank(),
legend.position="none")+
facet_wrap(~Patch)
我同意@MLavoie 的观点,即计算感兴趣的数量是最简单的解决方案。不确定您以何种方式查找内容 'better'.
示例:
# sample data
my_df <- data.frame(x=rep(1:100, 4),
y=cumsum(rnorm(400)),
category=rep(letters[1:4], each=100))
# calculate the hline data in one line with data.table
library(data.table)
setDT(my_df)[, cat_mean := mean(y), by=category]
# plot
ggplot(my_df, aes(x=x, y=y, group=category)) +
geom_line(color='red') +
geom_smooth(color='blue') +
geom_hline(aes(yintercept=cat_mean)) +
facet_wrap(~category)
结果:
我最近将 ggplot2
包和 运行 更新为使用 facets
为每组平均值绘制水平线的主要问题。
我相信 this post 不再有效?
我正在使用以下代码创建时间序列图:
ggplot(p2p_dt_SKILL_A,aes(x=Date,y=Prod_DL)) +
geom_line(aes(colour="red"),lwd=1.3) +
geom_smooth() +
geom_line(stat = "hline", yintercept = "mean")+
scale_x_date(labels=date_format("%b-%y"),breaks ="2 month")+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-09-18"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-02"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-23"]))+
ylab("DL Prod for All Skills")+
ggtitle("BVG1 DL Prod for All Skills 2014-2015")+
theme(axis.title.y = element_text(size = 15,face="bold",color="red"),
plot.title = element_text(size = 15,lineheight = .8,face="bold",color="red"),
axis.title.x = element_blank(),
legend.position="none")+
facet_wrap(~Patch)
第 1 个问题是我不能再在 geom_line(stat = "hline", yintercept = "mean")
中使用 stat = "hline"
,因为它给出了以下错误:Error: No stat called StatHline
。
因此我将其更改为:
ggplot(p2p_dt_SKILL_A,aes(x=Date,y=Prod_DL)) +
geom_line(aes(colour="red"),lwd=1.3) +
geom_smooth() +
geom_hline(yintercept = mean(p2p_dt_SKILL_A$Prod_DL))+
scale_x_date(labels=date_format("%b-%y"),date_breaks ="2 month")+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-09-18"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-02"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-23"]))+
ylab("DL Prod for All Skills")+
ggtitle("BVG1 DL Prod for All Skills 2014-2015")+
theme(axis.title.y = element_text(size = 15,face="bold",color="red"),
plot.title = element_text(size = 15,lineheight = .8,face="bold",color="red"),
axis.title.x = element_blank(),
legend.position="none")+
facet_wrap(~Patch)
但这并没有画出平均每个补丁的水平线。它只取 Prod_DL
的总体平均值
见下文:
现在有没有新的方法来计算每组的平均值并绘制水平线?
谢谢
更新
这是我所做的:
#first create a dataframe which holds patch and mean values for prod dl, this will then be used in geom_hline()
mean_Prod_DL <- p2p_dt_SKILL_A%>%
group_by(Patch)%>%
summarise(mean_Prod_DL_per_patch = mean(Prod_DL))
ggplot(p2p_dt_SKILL_A,aes(x=Date,y=Prod_DL)) +
scale_x_date(labels=date_format("%b-%y"),date_breaks ="2 months")+
geom_line(aes(colour="red"),lwd=1.3) +
geom_smooth() +
geom_hline(data = mean_Prod_DL,aes(yintercept = mean_Prod_DL_per_patch),lty=2)+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-09-18"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-02"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-10-23"]))+
geom_vline(xintercept = as.numeric(p2p_dt_SKILL_A$Date[p2p_dt_SKILL_A$Date=="2015-12-04"]))+
ylab("DL Prod for All Skills")+
ggtitle("BVG1 DL Prod for All Skills 2014-2016")+
theme(axis.title.y = element_text(size = 15,face="bold",color="red"),
plot.title = element_text(size = 15,lineheight = .8,face="bold",color="red"),
axis.title.x = element_blank(),
legend.position="none")+
facet_wrap(~Patch)
我同意@MLavoie 的观点,即计算感兴趣的数量是最简单的解决方案。不确定您以何种方式查找内容 'better'.
示例:
# sample data
my_df <- data.frame(x=rep(1:100, 4),
y=cumsum(rnorm(400)),
category=rep(letters[1:4], each=100))
# calculate the hline data in one line with data.table
library(data.table)
setDT(my_df)[, cat_mean := mean(y), by=category]
# plot
ggplot(my_df, aes(x=x, y=y, group=category)) +
geom_line(color='red') +
geom_smooth(color='blue') +
geom_hline(aes(yintercept=cat_mean)) +
facet_wrap(~category)
结果: