在原始数据中的日期之外填充数周

padding for weeks outside of the dates in the original data

如何在原始数据中的日期之外填充数周?

library(tidyverse)
df <- data.frame(x=c("2019-01-02", 
                     "2019-01-02",
                     #"2019-01-03",
                     "2019-01-04",
                     "2019-01-04",
                     "2019-01-04",
                     "2019-01-09",
                     "2019-01-19"),
                 y=c(1, 0, 1, 1, 0, 1, 0))

# aggregate by week
df %>% 
  mutate(x = lubridate::ymd(as.character(x))) %>%
  group_by(date = lubridate::floor_date(x, "1 week")) 

# # A tibble: 7 x 3
# # Groups:   date [3]
#     x              y date      
#     <date>     <dbl> <date>    
#   1 2019-01-02     1 2018-12-30
#   2 2019-01-02     0 2018-12-30
#   3 2019-01-04     1 2018-12-30
#   4 2019-01-04     1 2018-12-30
#   5 2019-01-04     0 2018-12-30
#   6 2019-01-09     1 2019-01-06
#   7 2019-01-19     0 2019-01-13

df %>%
  mutate(x = lubridate::ymd(as.character(x))) %>%
  group_by(date = lubridate::floor_date(x, "1 week")) %>%
  count(name = "count") %>%
  ungroup() %>%
  padr::pad(interval = "week",
            start_val = lubridate::ymd("2019-01-01"),
            end_val = lubridate::ymd("2019-02-20")) %>% 
  replace(is.na(.), 0) 

# Error: The specified interval is invalid for the datetime variable.

它适用于天间隔:

df %>% 
  mutate(x = lubridate::ymd(as.character(x))) %>%
  group_by(date = lubridate::floor_date(x, "1 day")) %>%
  count(name = "count") %>%
  ungroup() %>%
  padr::pad(interval = "day",
            start_val = lubridate::ymd("2019-01-01"),
            end_val = lubridate::ymd("2019-02-20")) %>% 
  replace(is.na(.), 0) 

问题是 floor_date() 给出的日期(例如 2018/12/30)与您给 padr::pad() 的时间段(例如 2019/01/01 到 2019/ 02/20)。

如果您将它们更改为相同,那么它就可以工作。

例如:


    library(lubridate)

    Start_val <- ymd("2019-01-01")
    End_val <- ymd("2019-02-20")

    Adjust_days <- Start_val - floor_date(Start_val, "1 week")

    # Example: Changing floor_date dates

    df %>%
      mutate(x = ymd(as.character(x))) %>%
      group_by(date = floor_date(x, "1 week") + Adjust_days) %>%
      count(name = "count") %>%
      ungroup() %>%
      padr::pad(interval = "1 week",
                start_val = Start_val,
                end_val = End_val) %>% 
      replace(is.na(.), 0)

    #> # A tibble: 8 x 3
    #>   date       name      n
    #>   <date>     <chr> <dbl>
    #> 1 2019-01-01 count     5
    #> 2 2019-01-08 count     1
    #> 3 2019-01-15 count     1
    #> 4 2019-01-22 0         0
    #> 5 2019-01-29 0         0
    #> 6 2019-02-05 0         0
    #> 7 2019-02-12 0         0
    #> 8 2019-02-19 0         0


    # Example: Changing padr dates

    df %>%
      mutate(x = ymd(as.character(x))) %>%
      group_by(date = floor_date(x, "1 week")) %>%
      count(name = "count") %>%
      ungroup() %>%
      padr::pad(interval = "1 week",
                start_val = floor_date(Start_val, "1 week"),
                end_val = floor_date(End_val, "1 week")) %>% 
      replace(is.na(.), 0)

    #> # A tibble: 8 x 3
    #>   date       name      n
    #>   <date>     <chr> <dbl>
    #> 1 2018-12-30 count     5
    #> 2 2019-01-06 count     1
    #> 3 2019-01-13 count     1
    #> 4 2019-01-20 0         0
    #> 5 2019-01-27 0         0
    #> 6 2019-02-03 0         0
    #> 7 2019-02-10 0         0
    #> 8 2019-02-17 0         0