我如何使用 stat_density 和时间序列(x 轴上的 Posixct)?
How can I work with stat_density and a timeseries (Posixct on x axis)?
基于这个例子:
#example from https://ggplot2.tidyverse.org/reference/geom_tile.html
cars <- ggplot(mtcars, aes(mpg,factor(cyl)))
cars + stat_density(aes(fill = after_stat(density)), geom = "raster", position = "identity")
我想创建一个图表,其中每小时垂直绘制我的数据集的密度。原始数据集很长。我还想将单个数据点和平均值显示为一条线。
这里是代码的简化基本版本:
#reproducable example for density plot
library(reshape2)
library(ggplot2)
library(scales)
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
g1 <- ggplot(data = df_melt, aes(factor(x), value)) +
stat_density(aes(fill = after_stat(ndensity)),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
g1
这行得通,但原始数据集的小时数太多,以至于 x 轴的标记不太好。我还想确定标签的日期格式应该是什么样子以及绘图的限制。在使用 stat_density
之前,我曾经使用 scale_x_datetime
来做这件事。但是对于密度图,我必须使用 factor(x)
而不是原来的 x
,即 PosixcT。所以下面的缩放会产生错误,因为 x 是一个因子而不是日期,显然:
#scale x datetime (does not work)
g1 <- g1 + scale_x_datetime(labels = date_format("%b/%d", tz="UTC"),
limits = c(startdate, enddate),
breaks = function(x)
seq.POSIXt(from = startdate, to = enddate, by = "2 days"),
date_minor_breaks = "12 hours",
expand = c(0,0))
g1
我设法 scale_x_discrete
但这使得很难确定标签格式和更大数据集的限制:
#scale x discrete
g1 <- g1 + scale_x_discrete(limits = c(as.character(df$x)),
breaks = as.character(df$x)[c(2,4)])
g1
因子的下一个问题是我无法将每小时的平均值添加为 geom_line,因为每个因子仅包含 1 个观察值。
#plot mean
g1 + geom_point(data = df, aes(factor(x), mean), col = "red")
g1 + geom_line(data = df, aes(factor(x), mean), col = "red")
那么,有没有一种方法可以生成具有每小时密度、过度绘制的点和过度绘制的平均线的所需图?我想尽可能舒适地编辑 x 标签和限制。也许有一种方法可以使用 x 而不是 factor(x)...
我认为解决方案可能就像删除 as.factor()
并在密度中设置一个显式组一样简单。以下是否适用于您的真实案例?
library(reshape2)
library(ggplot2)
library(scales)
#> Warning: package 'scales' was built under R version 4.0.3
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
ggplot(data = df_melt, aes(x, value)) +
stat_density(aes(fill = after_stat(ndensity),
group = x),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
由 reprex package (v0.3.0)
于 2021-01-29 创建
基于这个例子:
#example from https://ggplot2.tidyverse.org/reference/geom_tile.html
cars <- ggplot(mtcars, aes(mpg,factor(cyl)))
cars + stat_density(aes(fill = after_stat(density)), geom = "raster", position = "identity")
我想创建一个图表,其中每小时垂直绘制我的数据集的密度。原始数据集很长。我还想将单个数据点和平均值显示为一条线。
这里是代码的简化基本版本:
#reproducable example for density plot
library(reshape2)
library(ggplot2)
library(scales)
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
g1 <- ggplot(data = df_melt, aes(factor(x), value)) +
stat_density(aes(fill = after_stat(ndensity)),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
g1
这行得通,但原始数据集的小时数太多,以至于 x 轴的标记不太好。我还想确定标签的日期格式应该是什么样子以及绘图的限制。在使用 stat_density
之前,我曾经使用 scale_x_datetime
来做这件事。但是对于密度图,我必须使用 factor(x)
而不是原来的 x
,即 PosixcT。所以下面的缩放会产生错误,因为 x 是一个因子而不是日期,显然:
#scale x datetime (does not work)
g1 <- g1 + scale_x_datetime(labels = date_format("%b/%d", tz="UTC"),
limits = c(startdate, enddate),
breaks = function(x)
seq.POSIXt(from = startdate, to = enddate, by = "2 days"),
date_minor_breaks = "12 hours",
expand = c(0,0))
g1
我设法 scale_x_discrete
但这使得很难确定标签格式和更大数据集的限制:
#scale x discrete
g1 <- g1 + scale_x_discrete(limits = c(as.character(df$x)),
breaks = as.character(df$x)[c(2,4)])
g1
因子的下一个问题是我无法将每小时的平均值添加为 geom_line,因为每个因子仅包含 1 个观察值。
#plot mean
g1 + geom_point(data = df, aes(factor(x), mean), col = "red")
g1 + geom_line(data = df, aes(factor(x), mean), col = "red")
那么,有没有一种方法可以生成具有每小时密度、过度绘制的点和过度绘制的平均线的所需图?我想尽可能舒适地编辑 x 标签和限制。也许有一种方法可以使用 x 而不是 factor(x)...
我认为解决方案可能就像删除 as.factor()
并在密度中设置一个显式组一样简单。以下是否适用于您的真实案例?
library(reshape2)
library(ggplot2)
library(scales)
#> Warning: package 'scales' was built under R version 4.0.3
startdate <- as.POSIXct("2020-01-01 01:00", tz="UTC")
enddate <- as.POSIXct("2020-01-01 05:00", tz="UTC")
#dataframe
df <- data.frame(x = seq.POSIXt(startdate, enddate, "hour"),
y1 = c(1,2,3,4,5),
y2 = c(2,4,6,8,10),
y3 = c(3,6,9,12,15))
df$mean <- rowMeans(df[,-1])
df_melt <- melt(df, id.vars = 1, measure.vars = c(2,3,4))
#plot
ggplot(data = df_melt, aes(x, value)) +
stat_density(aes(fill = after_stat(ndensity),
group = x),
geom = "raster", position = "identity", orientation = "y") +
geom_point()
由 reprex package (v0.3.0)
于 2021-01-29 创建