使用 ggplot2 和 purrr 创建密度图;基于组的密度线颜色
Creating density plots using ggplot2 and purrr; colour of density line based on group
我在 R Studio 中结合使用 ggplot2 和 purrr 来遍历数据帧并生成密度图。这是一个模拟数据框,类似于我正在使用的结构:-
#load relevant libraries
library(ggplot2)
library(dplyr)
library(purrr)
library(gridExtra)
#mock dataframe
set.seed(123)
Duration<-floor(rnorm(1000, mean=200, sd=50))
DateTime<-seq.POSIXt(from = as.POSIXct("2020-08-01 01:00:00", tz = Sys.timezone()), length.out = 1000, by = "hours")
df<-cbind(Duration,DateTime)
df<-as.data.frame(df)
df$Duration<-as.integer(df$Duration)
df$DateTime<-seq.POSIXt(from = as.POSIXct("2020-08-01 01:00:00", tz = Sys.timezone()),
length.out = 1000, by = "hours")#re-doing this to stop the annoying change back to numeric
df$WeekNumber<-isoweek(df$DateTime)
#create a "period" column
setDT(df)[WeekNumber>=31 & WeekNumber <=32, Period:="Period 1"]
df[WeekNumber>=33 & WeekNumber <=35, Period:="Period 2"]
df[WeekNumber>=36 & WeekNumber <=37, Period:="Period 3"]
df$Period<-factor(df$Period, levels = c("Period 1", "Period 2", "Period 3"))
下面是使用 purrr 循环遍历数据帧以生成每周密度图的代码:-
densplot<-df %>%
group_by(WeekNumber) %>%
summarise() %>%
pull() %>%
# run map() instead of for()
map(~{
df %>%
# filter for each value
filter(WeekNumber == .x) %>%
# run unique density plot
ggplot(aes(group=WeekNumber)) +
geom_density(aes(Duration))+
ggtitle(paste0("Week ",.x," duration"), subtitle = "Log10")+
scale_x_log10()
})
#call grid.arrange to create a faceted version of the plot
do.call(grid.arrange,densplot)
这给出了这个:-
我想做的是用“句点”为密度线着色以帮助解释。单独使用 ggplot2 会很容易,但我想在我的 purrr 管道中使用它。但是,如果我指定 ggplot(aes(group=WeekNumber, colour=Period))
或 geom_density(aes(Duration))
,我会得到:-
此外,每个情节都有一个图例,看起来确实不整洁。我希望能够为每个单独的 Period 和一个显示所有三个 Period 颜色的图例着色(可能放在右侧)。有办法吗?
最好使用facet_wrap()
以避免颜色问题。这里是您的选项代码:
library(ggplot2)
library(dplyr)
#Code
df %>% mutate(WeekNumber=paste0("Week ",WeekNumber," duration")) %>%
ggplot(aes(x=Duration,group=WeekNumber,color=Period)) +
geom_density()+
scale_x_log10()+
facet_wrap(.~WeekNumber,scales='free')
输出:
更新: 如果您想迭代,可以通过按句点拆分 df
来调整列表策略。然后使用绘图函数和 patchwork
包,您可以获得预期的绘图。作为补充说明,如果您想要不同的颜色,您可以通过在拆分之前在数据框中定义颜色来破解管道。我以一种实用的方式做了,但如果存在更多的周期,你可以使用调色板。这里的代码:
library(patchwork)
#Add Colors to df
dfcol <- data.frame(Period=unique(df$Period),color=c('blue','red','green'),stringsAsFactors = F)
#Add to df
df$Colors <- dfcol[match(df$Period,dfcol$Period),"color"]
#Approach 2
#Create a list
List <- split(df,df$WeekNumber)
#Plot function
myplot <- function(x)
{
#Extract color
mycol <- unique(x$Colors)
#Plots
p1 <- ggplot(x,aes(x=Duration,group=WeekNumber,color=Period)) +
geom_density()+
scale_x_log10()+
scale_color_manual(values = mycol)+
ggtitle(paste0("Week ",unique(x$WeekNumber)," duration"), subtitle = "Log10")+
theme(legend.title = element_blank())
return(p1)
}
#Apply
L1 <- lapply(List,myplot)
#Wrap plots
combined <- wrap_plots(L1,ncol = 3)
combined + plot_layout(guides = "collect")
输出:
我在 R Studio 中结合使用 ggplot2 和 purrr 来遍历数据帧并生成密度图。这是一个模拟数据框,类似于我正在使用的结构:-
#load relevant libraries
library(ggplot2)
library(dplyr)
library(purrr)
library(gridExtra)
#mock dataframe
set.seed(123)
Duration<-floor(rnorm(1000, mean=200, sd=50))
DateTime<-seq.POSIXt(from = as.POSIXct("2020-08-01 01:00:00", tz = Sys.timezone()), length.out = 1000, by = "hours")
df<-cbind(Duration,DateTime)
df<-as.data.frame(df)
df$Duration<-as.integer(df$Duration)
df$DateTime<-seq.POSIXt(from = as.POSIXct("2020-08-01 01:00:00", tz = Sys.timezone()),
length.out = 1000, by = "hours")#re-doing this to stop the annoying change back to numeric
df$WeekNumber<-isoweek(df$DateTime)
#create a "period" column
setDT(df)[WeekNumber>=31 & WeekNumber <=32, Period:="Period 1"]
df[WeekNumber>=33 & WeekNumber <=35, Period:="Period 2"]
df[WeekNumber>=36 & WeekNumber <=37, Period:="Period 3"]
df$Period<-factor(df$Period, levels = c("Period 1", "Period 2", "Period 3"))
下面是使用 purrr 循环遍历数据帧以生成每周密度图的代码:-
densplot<-df %>%
group_by(WeekNumber) %>%
summarise() %>%
pull() %>%
# run map() instead of for()
map(~{
df %>%
# filter for each value
filter(WeekNumber == .x) %>%
# run unique density plot
ggplot(aes(group=WeekNumber)) +
geom_density(aes(Duration))+
ggtitle(paste0("Week ",.x," duration"), subtitle = "Log10")+
scale_x_log10()
})
#call grid.arrange to create a faceted version of the plot
do.call(grid.arrange,densplot)
这给出了这个:-
我想做的是用“句点”为密度线着色以帮助解释。单独使用 ggplot2 会很容易,但我想在我的 purrr 管道中使用它。但是,如果我指定 ggplot(aes(group=WeekNumber, colour=Period))
或 geom_density(aes(Duration))
,我会得到:-
此外,每个情节都有一个图例,看起来确实不整洁。我希望能够为每个单独的 Period 和一个显示所有三个 Period 颜色的图例着色(可能放在右侧)。有办法吗?
最好使用facet_wrap()
以避免颜色问题。这里是您的选项代码:
library(ggplot2)
library(dplyr)
#Code
df %>% mutate(WeekNumber=paste0("Week ",WeekNumber," duration")) %>%
ggplot(aes(x=Duration,group=WeekNumber,color=Period)) +
geom_density()+
scale_x_log10()+
facet_wrap(.~WeekNumber,scales='free')
输出:
更新: 如果您想迭代,可以通过按句点拆分 df
来调整列表策略。然后使用绘图函数和 patchwork
包,您可以获得预期的绘图。作为补充说明,如果您想要不同的颜色,您可以通过在拆分之前在数据框中定义颜色来破解管道。我以一种实用的方式做了,但如果存在更多的周期,你可以使用调色板。这里的代码:
library(patchwork)
#Add Colors to df
dfcol <- data.frame(Period=unique(df$Period),color=c('blue','red','green'),stringsAsFactors = F)
#Add to df
df$Colors <- dfcol[match(df$Period,dfcol$Period),"color"]
#Approach 2
#Create a list
List <- split(df,df$WeekNumber)
#Plot function
myplot <- function(x)
{
#Extract color
mycol <- unique(x$Colors)
#Plots
p1 <- ggplot(x,aes(x=Duration,group=WeekNumber,color=Period)) +
geom_density()+
scale_x_log10()+
scale_color_manual(values = mycol)+
ggtitle(paste0("Week ",unique(x$WeekNumber)," duration"), subtitle = "Log10")+
theme(legend.title = element_blank())
return(p1)
}
#Apply
L1 <- lapply(List,myplot)
#Wrap plots
combined <- wrap_plots(L1,ncol = 3)
combined + plot_layout(guides = "collect")
输出: