在 ggplot 图表上突出显示数据差距
Highlight data gaps on ggplot graph
我有时间序列(日期时间、实例、值),在值中有一些 NA。如果所有实例的值 - 相同日期时间的 NA,则意味着数据收集存在差距。我需要强调那个时期。
我的示例脚本和数据:
library(tidyr)
library(ggplot2)
example.data1 <- data.frame( Instance = rep("A",11),
datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
Value = c(0,1,2,3,4,5,6,NA,NA,9,10)
)
example.data2 <- data.frame( Instance = rep("B",11),
datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
Value = c(1,2,NA,4,5,6,7,NA,NA,10,11)
)
example.data3 <- data.frame( Instance = rep("C",11),
datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
Value = c(2,3,4,5,NA,7,8,NA,NA,11,12)
)
example.data <- bind_rows(example.data1, example.data2, example.data3)
ggplot (data = example.data, aes(x=datetime,y=Value, color = Instance)) +
geom_line(size = 1.2) +
theme_bw()
我的结果图片:
我真正需要的是:
如何实现?
UPD.
下面的代码是答案不能正常工作。看看那个:
example.data.gap <- example.data %>%
group_by(datetime) %>%
summarise(is_gap = all(is.na(Value))) %>%
# Start and End
mutate(xmin = lag(datetime), xmax = lead(datetime)) %>%
filter(is_gap)
结果是 2 个重叠间隔而不是 1 个:
# A tibble: 2 x 4
datetime is_gap xmin xmax
<dttm> <lgl> <dttm> <dttm>
1 2020-12-26 10:01:45 TRUE 2020-12-26 10:01:30 2020-12-26 10:02:00
2 2020-12-26 10:02:00 TRUE 2020-12-26 10:01:45 2020-12-26 10:02:15
图片 - 如果我们使用 alpha,我们可以看到重叠:
ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
geom_line(size = 1.2) +
geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.5, inherit.aes = FALSE) +
theme_bw()
一个选项是创建一个仅包含间隙以及间隙的开始和结束的数据框,并使用 geom_rect
来“突出显示”间隙:
library(dplyr)
library(ggplot2)
example.data <- bind_rows(example.data1, example.data2, example.data3)
example.data.gap <- example.data %>%
group_by(datetime) %>%
summarise(is_gap = all(is.na(Value))) %>%
# Start and End
mutate(xmin = lag(datetime), xmax = lead(datetime)) %>%
filter(is_gap)
ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
geom_line(size = 1.2) +
geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", inherit.aes = FALSE) +
theme_bw()
轻微修改:
example.data.gap <- example.data %>%
group_by(datetime) %>%
summarise(is_gap = all(is.na(Value)), .groups = "drop") %>%
mutate(
grp = data.table::rleid(is_gap),
prevtime = lag(datetime),
nexttime = lead(datetime)
) %>%
filter(is_gap) %>%
group_by(grp) %>%
summarize(xmin = min(prevtime), xmax = max(nexttime), .groups = "drop")
ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
geom_line(size = 1.2) +
geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.5, inherit.aes = FALSE) +
theme_bw()
如果您没有安装 data.table
,rleid
的替代品(只有一个向量,不像 data.table::rleid
那样可扩展)是:
my_rleid <- function(x) { r <- rle(x)$lengths; rep(seq_along(r), times = r); }
基于 Stefan 的想法,但改为使用 ggforce::geom_mark_rect
。需要更少的数据准备。
你可以调整宽度,但我有点喜欢它不会填满整个空白
example.data.gap <- example.data %>%
group_by(datetime) %>%
filter(all(is.na(Value)))
ylims<- range(example.data$Value, na.rm = TRUE)
ggplot(data = example.data, aes(x = datetime, y = Value)) +
geom_line(size = 1.2, aes(color = Instance)) +
ggforce::geom_mark_rect(data = example.data.gap, aes(x = datetime, fill = is.na(Value),
y = seq(ylims[1], ylims[2], length = nrow(example.data.gap))))
我有时间序列(日期时间、实例、值),在值中有一些 NA。如果所有实例的值 - 相同日期时间的 NA,则意味着数据收集存在差距。我需要强调那个时期。
我的示例脚本和数据:
library(tidyr)
library(ggplot2)
example.data1 <- data.frame( Instance = rep("A",11),
datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
Value = c(0,1,2,3,4,5,6,NA,NA,9,10)
)
example.data2 <- data.frame( Instance = rep("B",11),
datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
Value = c(1,2,NA,4,5,6,7,NA,NA,10,11)
)
example.data3 <- data.frame( Instance = rep("C",11),
datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
Value = c(2,3,4,5,NA,7,8,NA,NA,11,12)
)
example.data <- bind_rows(example.data1, example.data2, example.data3)
ggplot (data = example.data, aes(x=datetime,y=Value, color = Instance)) +
geom_line(size = 1.2) +
theme_bw()
我的结果图片:
我真正需要的是:
如何实现?
UPD.
下面的代码是答案不能正常工作。看看那个:
example.data.gap <- example.data %>%
group_by(datetime) %>%
summarise(is_gap = all(is.na(Value))) %>%
# Start and End
mutate(xmin = lag(datetime), xmax = lead(datetime)) %>%
filter(is_gap)
结果是 2 个重叠间隔而不是 1 个:
# A tibble: 2 x 4
datetime is_gap xmin xmax
<dttm> <lgl> <dttm> <dttm>
1 2020-12-26 10:01:45 TRUE 2020-12-26 10:01:30 2020-12-26 10:02:00
2 2020-12-26 10:02:00 TRUE 2020-12-26 10:01:45 2020-12-26 10:02:15
图片 - 如果我们使用 alpha,我们可以看到重叠:
ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
geom_line(size = 1.2) +
geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.5, inherit.aes = FALSE) +
theme_bw()
一个选项是创建一个仅包含间隙以及间隙的开始和结束的数据框,并使用 geom_rect
来“突出显示”间隙:
library(dplyr)
library(ggplot2)
example.data <- bind_rows(example.data1, example.data2, example.data3)
example.data.gap <- example.data %>%
group_by(datetime) %>%
summarise(is_gap = all(is.na(Value))) %>%
# Start and End
mutate(xmin = lag(datetime), xmax = lead(datetime)) %>%
filter(is_gap)
ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
geom_line(size = 1.2) +
geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", inherit.aes = FALSE) +
theme_bw()
轻微修改:
example.data.gap <- example.data %>%
group_by(datetime) %>%
summarise(is_gap = all(is.na(Value)), .groups = "drop") %>%
mutate(
grp = data.table::rleid(is_gap),
prevtime = lag(datetime),
nexttime = lead(datetime)
) %>%
filter(is_gap) %>%
group_by(grp) %>%
summarize(xmin = min(prevtime), xmax = max(nexttime), .groups = "drop")
ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
geom_line(size = 1.2) +
geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.5, inherit.aes = FALSE) +
theme_bw()
如果您没有安装 data.table
,rleid
的替代品(只有一个向量,不像 data.table::rleid
那样可扩展)是:
my_rleid <- function(x) { r <- rle(x)$lengths; rep(seq_along(r), times = r); }
基于 Stefan 的想法,但改为使用 ggforce::geom_mark_rect
。需要更少的数据准备。
你可以调整宽度,但我有点喜欢它不会填满整个空白
example.data.gap <- example.data %>%
group_by(datetime) %>%
filter(all(is.na(Value)))
ylims<- range(example.data$Value, na.rm = TRUE)
ggplot(data = example.data, aes(x = datetime, y = Value)) +
geom_line(size = 1.2, aes(color = Instance)) +
ggforce::geom_mark_rect(data = example.data.gap, aes(x = datetime, fill = is.na(Value),
y = seq(ylims[1], ylims[2], length = nrow(example.data.gap))))