在原始数据中的日期之外填充数周
padding for weeks outside of the dates in the original data
如何在原始数据中的日期之外填充数周?
library(tidyverse)
df <- data.frame(x=c("2019-01-02",
"2019-01-02",
#"2019-01-03",
"2019-01-04",
"2019-01-04",
"2019-01-04",
"2019-01-09",
"2019-01-19"),
y=c(1, 0, 1, 1, 0, 1, 0))
# aggregate by week
df %>%
mutate(x = lubridate::ymd(as.character(x))) %>%
group_by(date = lubridate::floor_date(x, "1 week"))
# # A tibble: 7 x 3
# # Groups: date [3]
# x y date
# <date> <dbl> <date>
# 1 2019-01-02 1 2018-12-30
# 2 2019-01-02 0 2018-12-30
# 3 2019-01-04 1 2018-12-30
# 4 2019-01-04 1 2018-12-30
# 5 2019-01-04 0 2018-12-30
# 6 2019-01-09 1 2019-01-06
# 7 2019-01-19 0 2019-01-13
df %>%
mutate(x = lubridate::ymd(as.character(x))) %>%
group_by(date = lubridate::floor_date(x, "1 week")) %>%
count(name = "count") %>%
ungroup() %>%
padr::pad(interval = "week",
start_val = lubridate::ymd("2019-01-01"),
end_val = lubridate::ymd("2019-02-20")) %>%
replace(is.na(.), 0)
# Error: The specified interval is invalid for the datetime variable.
它适用于天间隔:
df %>%
mutate(x = lubridate::ymd(as.character(x))) %>%
group_by(date = lubridate::floor_date(x, "1 day")) %>%
count(name = "count") %>%
ungroup() %>%
padr::pad(interval = "day",
start_val = lubridate::ymd("2019-01-01"),
end_val = lubridate::ymd("2019-02-20")) %>%
replace(is.na(.), 0)
问题是 floor_date()
给出的日期(例如 2018/12/30)与您给 padr::pad()
的时间段(例如 2019/01/01 到 2019/ 02/20)。
如果您将它们更改为相同,那么它就可以工作。
例如:
library(lubridate)
Start_val <- ymd("2019-01-01")
End_val <- ymd("2019-02-20")
Adjust_days <- Start_val - floor_date(Start_val, "1 week")
# Example: Changing floor_date dates
df %>%
mutate(x = ymd(as.character(x))) %>%
group_by(date = floor_date(x, "1 week") + Adjust_days) %>%
count(name = "count") %>%
ungroup() %>%
padr::pad(interval = "1 week",
start_val = Start_val,
end_val = End_val) %>%
replace(is.na(.), 0)
#> # A tibble: 8 x 3
#> date name n
#> <date> <chr> <dbl>
#> 1 2019-01-01 count 5
#> 2 2019-01-08 count 1
#> 3 2019-01-15 count 1
#> 4 2019-01-22 0 0
#> 5 2019-01-29 0 0
#> 6 2019-02-05 0 0
#> 7 2019-02-12 0 0
#> 8 2019-02-19 0 0
# Example: Changing padr dates
df %>%
mutate(x = ymd(as.character(x))) %>%
group_by(date = floor_date(x, "1 week")) %>%
count(name = "count") %>%
ungroup() %>%
padr::pad(interval = "1 week",
start_val = floor_date(Start_val, "1 week"),
end_val = floor_date(End_val, "1 week")) %>%
replace(is.na(.), 0)
#> # A tibble: 8 x 3
#> date name n
#> <date> <chr> <dbl>
#> 1 2018-12-30 count 5
#> 2 2019-01-06 count 1
#> 3 2019-01-13 count 1
#> 4 2019-01-20 0 0
#> 5 2019-01-27 0 0
#> 6 2019-02-03 0 0
#> 7 2019-02-10 0 0
#> 8 2019-02-17 0 0
如何在原始数据中的日期之外填充数周?
library(tidyverse)
df <- data.frame(x=c("2019-01-02",
"2019-01-02",
#"2019-01-03",
"2019-01-04",
"2019-01-04",
"2019-01-04",
"2019-01-09",
"2019-01-19"),
y=c(1, 0, 1, 1, 0, 1, 0))
# aggregate by week
df %>%
mutate(x = lubridate::ymd(as.character(x))) %>%
group_by(date = lubridate::floor_date(x, "1 week"))
# # A tibble: 7 x 3
# # Groups: date [3]
# x y date
# <date> <dbl> <date>
# 1 2019-01-02 1 2018-12-30
# 2 2019-01-02 0 2018-12-30
# 3 2019-01-04 1 2018-12-30
# 4 2019-01-04 1 2018-12-30
# 5 2019-01-04 0 2018-12-30
# 6 2019-01-09 1 2019-01-06
# 7 2019-01-19 0 2019-01-13
df %>%
mutate(x = lubridate::ymd(as.character(x))) %>%
group_by(date = lubridate::floor_date(x, "1 week")) %>%
count(name = "count") %>%
ungroup() %>%
padr::pad(interval = "week",
start_val = lubridate::ymd("2019-01-01"),
end_val = lubridate::ymd("2019-02-20")) %>%
replace(is.na(.), 0)
# Error: The specified interval is invalid for the datetime variable.
它适用于天间隔:
df %>%
mutate(x = lubridate::ymd(as.character(x))) %>%
group_by(date = lubridate::floor_date(x, "1 day")) %>%
count(name = "count") %>%
ungroup() %>%
padr::pad(interval = "day",
start_val = lubridate::ymd("2019-01-01"),
end_val = lubridate::ymd("2019-02-20")) %>%
replace(is.na(.), 0)
问题是 floor_date()
给出的日期(例如 2018/12/30)与您给 padr::pad()
的时间段(例如 2019/01/01 到 2019/ 02/20)。
如果您将它们更改为相同,那么它就可以工作。
例如:
library(lubridate)
Start_val <- ymd("2019-01-01")
End_val <- ymd("2019-02-20")
Adjust_days <- Start_val - floor_date(Start_val, "1 week")
# Example: Changing floor_date dates
df %>%
mutate(x = ymd(as.character(x))) %>%
group_by(date = floor_date(x, "1 week") + Adjust_days) %>%
count(name = "count") %>%
ungroup() %>%
padr::pad(interval = "1 week",
start_val = Start_val,
end_val = End_val) %>%
replace(is.na(.), 0)
#> # A tibble: 8 x 3
#> date name n
#> <date> <chr> <dbl>
#> 1 2019-01-01 count 5
#> 2 2019-01-08 count 1
#> 3 2019-01-15 count 1
#> 4 2019-01-22 0 0
#> 5 2019-01-29 0 0
#> 6 2019-02-05 0 0
#> 7 2019-02-12 0 0
#> 8 2019-02-19 0 0
# Example: Changing padr dates
df %>%
mutate(x = ymd(as.character(x))) %>%
group_by(date = floor_date(x, "1 week")) %>%
count(name = "count") %>%
ungroup() %>%
padr::pad(interval = "1 week",
start_val = floor_date(Start_val, "1 week"),
end_val = floor_date(End_val, "1 week")) %>%
replace(is.na(.), 0)
#> # A tibble: 8 x 3
#> date name n
#> <date> <chr> <dbl>
#> 1 2018-12-30 count 5
#> 2 2019-01-06 count 1
#> 3 2019-01-13 count 1
#> 4 2019-01-20 0 0
#> 5 2019-01-27 0 0
#> 6 2019-02-03 0 0
#> 7 2019-02-10 0 0
#> 8 2019-02-17 0 0