基于DATE的R中多列的差异
the difference of multiple columns in R based in DATE
我有这个数据:
structure(list(new_col = c(1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5,
6, 7, 1, 2, 3, 4, 5, 6, 7), DATE = structure(c(1608249600, 1608249600,
1608249600, 1608249600, 1608249600, 1608249600, 1608249600, 1608336000,
1608336000, 1608336000, 1608336000, 1608336000, 1608336000, 1608336000,
1608422400, 1608422400, 1608422400, 1608422400, 1608422400, 1608422400,
1608422400), tzone = "UTC", class = c("POSIXct", "POSIXt")),
HOSP_COUNT = c(582, 931, 1472, 2175, 2791, 3024, 2310, 588,
932, 1477, 2186, 2810, 3051, 2330, 590, 932, 1479, 2188,
2817, 3060, 2335)), row.names = c(NA, -21L), class = c("tbl_df",
"tbl", "data.frame"))
HOPS_COUNT是一个累积变量,而我需要每天测量。像这样:
structure(list(X1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21), new_col = c(1, 2, 3, 4,
5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7), DATE = c("12/18/2020",
"12/18/2020", "12/18/2020", "12/18/2020", "12/18/2020", "12/18/2020",
"12/18/2020", "12/19/2020", "12/19/2020", "12/19/2020", "12/19/2020",
"12/19/2020", "12/19/2020", "12/19/2020", "12/20/2020", "12/20/2020",
"12/20/2020", "12/20/2020", "12/20/2020", "12/20/2020", "12/20/2020"
), HOSP_COUNT = c(582, 931, 1472, 2175, 2791, 3024, 2310, 6,
1, 5, 11, 19, 27, 20, 2, 1, 2, 2, 7, 9, 15)), row.names = c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame"))
所以我需要一个新列,公式是
(HOPS_CASE of new_col(1) 在第二个日期 (12/19/2020)= (CASE_HOSP of new_col(1) 在第一天 12/18/ 2020)-(第一天 new_col(1) 的当前 CASE_HOSP 12/19/2020)
这应该适合你:
library(dplyr)
df %>%
group_by(new_col) %>%
mutate(new_count = HOSP_COUNT - lag(HOSP_COUNT),
new_count = ifelse(is.na(new_count), HOSP_COUNT, new_count))
其中 new_count
是您更正后的计数:
# A tibble: 20 x 4
# Groups: new_col [7]
new_col DATE HOSP_COUNT new_count
<dbl> <dttm> <dbl> <dbl>
1 1 2020-12-18 00:00:00 582 582
2 2 2020-12-18 00:00:00 931 931
3 3 2020-12-18 00:00:00 1472 1472
4 4 2020-12-18 00:00:00 2175 2175
5 5 2020-12-18 00:00:00 2791 2791
6 6 2020-12-18 00:00:00 3024 3024
7 7 2020-12-18 00:00:00 2310 2310
8 1 2020-12-19 00:00:00 588 6
9 2 2020-12-19 00:00:00 932 1
10 3 2020-12-19 00:00:00 1477 5
11 4 2020-12-19 00:00:00 2186 11
12 5 2020-12-19 00:00:00 2810 19
13 6 2020-12-19 00:00:00 3051 27
14 7 2020-12-19 00:00:00 2330 20
15 1 2020-12-20 00:00:00 590 2
16 2 2020-12-20 00:00:00 932 0
17 3 2020-12-20 00:00:00 1479 2
18 4 2020-12-20 00:00:00 2188 2
19 5 2020-12-20 00:00:00 2817 7
20 6 2020-12-20 00:00:00 3060 9
我有这个数据:
structure(list(new_col = c(1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5,
6, 7, 1, 2, 3, 4, 5, 6, 7), DATE = structure(c(1608249600, 1608249600,
1608249600, 1608249600, 1608249600, 1608249600, 1608249600, 1608336000,
1608336000, 1608336000, 1608336000, 1608336000, 1608336000, 1608336000,
1608422400, 1608422400, 1608422400, 1608422400, 1608422400, 1608422400,
1608422400), tzone = "UTC", class = c("POSIXct", "POSIXt")),
HOSP_COUNT = c(582, 931, 1472, 2175, 2791, 3024, 2310, 588,
932, 1477, 2186, 2810, 3051, 2330, 590, 932, 1479, 2188,
2817, 3060, 2335)), row.names = c(NA, -21L), class = c("tbl_df",
"tbl", "data.frame"))
HOPS_COUNT是一个累积变量,而我需要每天测量。像这样:
structure(list(X1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21), new_col = c(1, 2, 3, 4,
5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7), DATE = c("12/18/2020",
"12/18/2020", "12/18/2020", "12/18/2020", "12/18/2020", "12/18/2020",
"12/18/2020", "12/19/2020", "12/19/2020", "12/19/2020", "12/19/2020",
"12/19/2020", "12/19/2020", "12/19/2020", "12/20/2020", "12/20/2020",
"12/20/2020", "12/20/2020", "12/20/2020", "12/20/2020", "12/20/2020"
), HOSP_COUNT = c(582, 931, 1472, 2175, 2791, 3024, 2310, 6,
1, 5, 11, 19, 27, 20, 2, 1, 2, 2, 7, 9, 15)), row.names = c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame"))
所以我需要一个新列,公式是 (HOPS_CASE of new_col(1) 在第二个日期 (12/19/2020)= (CASE_HOSP of new_col(1) 在第一天 12/18/ 2020)-(第一天 new_col(1) 的当前 CASE_HOSP 12/19/2020)
这应该适合你:
library(dplyr)
df %>%
group_by(new_col) %>%
mutate(new_count = HOSP_COUNT - lag(HOSP_COUNT),
new_count = ifelse(is.na(new_count), HOSP_COUNT, new_count))
其中 new_count
是您更正后的计数:
# A tibble: 20 x 4
# Groups: new_col [7]
new_col DATE HOSP_COUNT new_count
<dbl> <dttm> <dbl> <dbl>
1 1 2020-12-18 00:00:00 582 582
2 2 2020-12-18 00:00:00 931 931
3 3 2020-12-18 00:00:00 1472 1472
4 4 2020-12-18 00:00:00 2175 2175
5 5 2020-12-18 00:00:00 2791 2791
6 6 2020-12-18 00:00:00 3024 3024
7 7 2020-12-18 00:00:00 2310 2310
8 1 2020-12-19 00:00:00 588 6
9 2 2020-12-19 00:00:00 932 1
10 3 2020-12-19 00:00:00 1477 5
11 4 2020-12-19 00:00:00 2186 11
12 5 2020-12-19 00:00:00 2810 19
13 6 2020-12-19 00:00:00 3051 27
14 7 2020-12-19 00:00:00 2330 20
15 1 2020-12-20 00:00:00 590 2
16 2 2020-12-20 00:00:00 932 0
17 3 2020-12-20 00:00:00 1479 2
18 4 2020-12-20 00:00:00 2188 2
19 5 2020-12-20 00:00:00 2817 7
20 6 2020-12-20 00:00:00 3060 9