在 ggplot 图表上突出显示数据差距

Highlight data gaps on ggplot graph

我有时间序列(日期时间、实例、值),在值中有一些 NA。如果所有实例的值 - 相同日期时间的 NA,则意味着数据收集存在差距。我需要强调那个时期。

我的示例脚本和数据:

library(tidyr)
library(ggplot2)


example.data1 <- data.frame( Instance = rep("A",11),
                            datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
                            Value = c(0,1,2,3,4,5,6,NA,NA,9,10)
)   

example.data2 <- data.frame( Instance = rep("B",11),
                             datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
                             Value = c(1,2,NA,4,5,6,7,NA,NA,10,11)
)   

example.data3 <- data.frame( Instance = rep("C",11),
                             datetime = seq.POSIXt(as.POSIXct("2020-12-26 10:00:00"), as.POSIXct("2020-12-26 10:00:00") + 15*10, "15 sec"),
                             Value = c(2,3,4,5,NA,7,8,NA,NA,11,12)
)   

example.data <- bind_rows(example.data1, example.data2, example.data3)

ggplot (data = example.data, aes(x=datetime,y=Value, color = Instance)) + 
    geom_line(size = 1.2) +
    theme_bw()

我的结果图片:

我真正需要的是:

如何实现?

UPD.

下面的代码是答案不能正常工作。看看那个:

example.data.gap <- example.data %>%
    group_by(datetime) %>%
    summarise(is_gap = all(is.na(Value))) %>%
    # Start and End 
    mutate(xmin = lag(datetime), xmax = lead(datetime)) %>%
    filter(is_gap)

结果是 2 个重叠间隔而不是 1 个:

# A tibble: 2 x 4
  datetime            is_gap xmin                xmax               
  <dttm>              <lgl>  <dttm>              <dttm>             
1 2020-12-26 10:01:45 TRUE   2020-12-26 10:01:30 2020-12-26 10:02:00
2 2020-12-26 10:02:00 TRUE   2020-12-26 10:01:45 2020-12-26 10:02:15

图片 - 如果我们使用 alpha,我们可以看到重叠:

ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
    geom_line(size = 1.2) +
    geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.5, inherit.aes = FALSE) +
    theme_bw()

一个选项是创建一个仅包含间隙以及间隙的开始和结束的数据框,并使用 geom_rect 来“突出显示”间隙:

library(dplyr)
library(ggplot2)

example.data <- bind_rows(example.data1, example.data2, example.data3)

example.data.gap <- example.data %>%
  group_by(datetime) %>%
  summarise(is_gap = all(is.na(Value))) %>%
  # Start and End 
  mutate(xmin = lag(datetime), xmax = lead(datetime)) %>%
  filter(is_gap)

ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
  geom_line(size = 1.2) +
  geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", inherit.aes = FALSE) +
  theme_bw()

轻微修改:

example.data.gap <- example.data %>%
  group_by(datetime) %>%
  summarise(is_gap = all(is.na(Value)), .groups = "drop") %>%
  mutate(
    grp = data.table::rleid(is_gap),
    prevtime = lag(datetime),
    nexttime = lead(datetime)
  ) %>%
  filter(is_gap) %>%
  group_by(grp) %>%
  summarize(xmin = min(prevtime), xmax = max(nexttime), .groups = "drop")

ggplot(data = example.data, aes(x = datetime, y = Value, color = Instance)) +
  geom_line(size = 1.2) +
  geom_rect(data = example.data.gap, aes(xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.5, inherit.aes = FALSE) +
  theme_bw()

如果您没有安装 data.tablerleid 的替代品(只有一个向量,不像 data.table::rleid 那样可扩展)是:

my_rleid <- function(x) { r <- rle(x)$lengths; rep(seq_along(r), times = r); }

基于 Stefan 的想法,但改为使用 ggforce::geom_mark_rect。需要更少的数据准备。

你可以调整宽度,但我有点喜欢它不会填满整个空白

example.data.gap <- example.data %>%
  group_by(datetime) %>%
  filter(all(is.na(Value)))

ylims<- range(example.data$Value, na.rm = TRUE)

ggplot(data = example.data, aes(x = datetime, y = Value)) +
  geom_line(size = 1.2, aes(color = Instance)) +
  ggforce::geom_mark_rect(data = example.data.gap, aes(x = datetime, fill = is.na(Value), 
                          y = seq(ylims[1], ylims[2], length = nrow(example.data.gap))))