Geom_tile 绘制时间轴数据中不存在的不连续性

Geom_tile plots non-existent discontinuities in data for time axis

如果我有以下名为 data

的数据框
year    month   id  group   returns
2016    2   asset_a group1  0.11592118
2016    3   asset_a group1  0.104526128
2016    4   asset_a group1  0.244925532
2016    5   asset_a group1  0.252377372
2016    6   asset_a group1  0.282602889
2016    7   asset_a group1  0.607148925
2016    8   asset_a group1  0.257815581
2016    9   asset_a group1  0.202712468
2016    10  asset_a group1  0.177455704
2016    11  asset_a group1  0.208526305
2016    12  asset_a group1  0.179808043
2017    1   asset_a group1  0.204425208
2017    2   asset_a group1  0.167787787
2017    3   asset_a group1  0.122357671
2017    4   asset_a group1  0.095889965
2017    5   asset_a group1  0.180117687
2017    6   asset_a group1  0.146912234
2017    7   asset_a group1  0.286743829
2017    8   asset_a group1  0.201531197
2017    9   asset_a group1  0.166819132
2017    10  asset_a group1  0.136262625
2017    11  asset_a group1  0.128844762
2017    12  asset_a group1  0.147595906
2018    1   asset_a group1  0.099843877
2018    2   asset_a group1  0.1928918
2018    3   asset_a group1  0.188344307
2018    4   asset_a group1  0.155801889
2018    5   asset_a group1  0.185813076
2018    6   asset_a group1  0.217531263
2018    7   asset_a group1  0.269840901
2018    8   asset_a group1  0.267351364
2018    9   asset_a group1  0.183753448
2018    10  asset_a group1  0.195182592
2018    11  asset_a group1  0.228886115
2018    12  asset_a group1  0.166964407

为了在热图中绘制它,我用

创建了一个日期向量
data <- data %>%
 mutate(date= make_datetime(year, month))

我得到了

的数据库结构
 $ year     : int [1:564] 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
 $ month    : int [1:564] 2 2 2 2 2 2 2 2 3 3 ...
 $ id       : chr [1:564] "asset_a" "asset_b" "asset_c" "asset_d" ...
 $ group    : chr [1:564] "group1" "group2" "group3" "group4" ...
 $ returns  : num [1:564] 0.115 0.3 0.105 0.245 0.28 ...
 $ date     : POSIXct[1:564], format: "2016-02-01" "2016-02-01" "2016-02-01" "2016-02-01" ...

并将其输入到 ggplot 热图中

data %>% 
  ggplot(aes(x = date, y = asset))  + 
  geom_tile(aes(fill = returns)) +
  theme_classic() +
  scale_fill_gradientn(colours=c("#66bf7b", "#a1d07e", "#dce182", 
                                 "#ffeb84",
                                 "#fedb81", "#faa075", "#faa075"),
                       values=rescale(c(-3, -2, -1,
                                        0,
                                        1, 2, 3)),
                       guide="colorbar") +
 
  labs(x="",y="")

我明白了

鉴于我在数据框中的数据没有任何月度不连续性,为什么 ggplot 会莫名其妙地创建缺失数据?我该如何修复它以便没有白色日期之间的差距,是否与日期格式中的小时和秒有关?

如果我将日期绘制为字符,我会得到想要的结果,但是,在这种情况下,我如何才能减少日期轴上的刻度数以使其可读?

更新: 根据 stefan 的建议输出并没有解决这个问题,因为每个资产 ID 都应该有自己的热图行。现在,它们被绘制在彼此之上。

更新 2

对我来说这没用

breaks <- sort(unique(as.numeric(factor(data$id)))) - .5
labels <- levels(factor(data$id))

手动输入:

  mutate(xmin = date,
         xmax = date + months(1),
         ymin = case_when(
               id == "asset_a" ~ 0,
               id == "asset_b" ~ 1,
               id == "asset_c" ~ 2,
               id == "asset_d" ~ 3,
               id == "asset_e" ~ 4,
               id == "asset_f" ~ 5,
               id == "asset_g" ~ 6,
               id == "asset_h" ~ 7,
               id == "asset_i" ~ 8,
         ),
         ymax = case_when(
               id == "asset_a" ~ 1,
               id == "asset_b" ~ 2,
               id == "asset_c" ~ 3,
               id == "asset_d" ~ 4,
               id == "asset_e" ~ 5,
               id == "asset_f" ~ 6,
               id == "asset_g" ~ 7,
               id == "asset_h" ~ 8,
               id == "asset_i" ~ 9)
  )

解决了问题,每个资产 id 都堆叠在一起。

不能 100% 确定问题出在哪里,但我的猜测是 geom_tile 为每个图块选择了相同的宽度和高度。但是,由于月份的天数不同,因此会出现不连续性。

在仍然使用日期或日期时间的同时实现所需结果的一个选项是切换到 geom_rect,但是需要一些额外的步骤来计算四个角的坐标:

EDIT 为了使示例更符合您的真实数据,我添加了另外两个资产,我只是简单地复制了您的示例数据,但在 returns.我还修复了原始代码中的错误,该错误导致轴标签错误,因为我在计算中断时没有对值进行排序。

library(ggplot2)
library(dplyr)
library(lubridate)
library(scales)

set.seed(123)

data2 <- data
data2$id <- "asset_b"
data2$returns <- data2$returns + runif(nrow(data2), 0, .2)

data3 <- data
data3$id <- "asset_c"
data3$returns <- data3$returns + runif(nrow(data3), 0, .2)
data <- bind_rows(data2, data, data3)

data <- data %>%
  mutate(date = make_datetime(year, month),
         xmin = date,
         xmax = date + months(1),
         ymin = as.numeric(factor(id)) - 1,
         ymax = as.numeric(factor(id)))

breaks <- sort(unique(as.numeric(factor(data$id)))) - .5
labels <- levels(factor(data$id))

data %>% 
  ggplot(aes(x = date))  + 
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = returns)) +
  scale_y_continuous(breaks = breaks, labels = labels) +
  theme_classic() +
  scale_fill_gradientn(colours=c("#66bf7b", "#a1d07e", "#dce182", 
                                 "#ffeb84",
                                 "#fedb81", "#faa075", "#faa075"),
                       values=rescale(c(-3, -2, -1,
                                        0,
                                        1, 2, 3)),
                       guide="colorbar") +
  
  labs(x="",y="")

另一种获得所需结果的方法是将日期列转换为您在 post 中建议的字符。在这里,我们必须进行一些数据整理以设置轴中断、标签和限制以模仿日期轴:

data <- data %>%
  mutate(date = make_datetime(year, month))

limits <- expand.grid(
  year = 2016:2018, 
  month = 1:12
) %>% 
  add_row(year = 2019, month = 1) %>% 
  mutate(date = make_datetime(year, month)) %>% 
  pull(date) %>% 
  sort()
breaks <- make_datetime(2016:2019, 1)

data %>% 
  ggplot(aes(x = as.character(date), y = id))  + 
  geom_tile(aes(fill = returns)) +
  scale_x_discrete(breaks = as.character(breaks), labels = year(breaks), limits = as.character(limits)) +
  theme_classic() +
  scale_fill_gradientn(colours=c("#66bf7b", "#a1d07e", "#dce182", 
                                 "#ffeb84",
                                 "#fedb81", "#faa075", "#faa075"),
                       values=rescale(c(-3, -2, -1,
                                        0,
                                        1, 2, 3)),
                       guide="colorbar") +
  
  labs(x="",y="")

数据

structure(list(year = c(2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2017L, 2017L, 2017L, 2017L, 
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 
2018L, 2018L), month = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), id = c("asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a"), group = c("group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1"), 
    returns = c(0.11592118, 0.104526128, 0.244925532, 0.252377372, 
    0.282602889, 0.607148925, 0.257815581, 0.202712468, 0.177455704, 
    0.208526305, 0.179808043, 0.204425208, 0.167787787, 0.122357671, 
    0.095889965, 0.180117687, 0.146912234, 0.286743829, 0.201531197, 
    0.166819132, 0.136262625, 0.128844762, 0.147595906, 0.099843877, 
    0.1928918, 0.188344307, 0.155801889, 0.185813076, 0.217531263, 
    0.269840901, 0.267351364, 0.183753448, 0.195182592, 0.228886115, 
    0.166964407)), class = "data.frame", row.names = c(NA, -35L
))