计算自上次条件满足以来的时间,不满足时重置为 0
Count time-since last condition met and reset to 0 when not
我想计算一个条件满足后的时间,但是每次条件不满足,时间又要归零。用 dplyr
实现这一目标会很棒,但我愿意接受任何建议。
有了代码更容易看懂:
library(dplyr)
d <- structure(list(date = structure(c(17105, 17182, 17275, 17359,
17437, 17472, 17500, 17539,
17624, 17658, 17693, 17742,
17828, 17877, 18004, 18053,
18087, 18130, 18186, 18214,
18298, 18415, 18527, 18583,
18610),
class = "Date"),
condition = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE,
TRUE, FALSE, TRUE, TRUE)),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,-25L))
# I can easily calculate the time since the last observation when the
# condition has been met:
dd <- d %>%
mutate(time_condition = case_when(
condition == FALSE ~ date - date, #So it is 0
condition == TRUE & lag(condition) == FALSE ~ date - date, # again, we want 0
condition == TRUE & lag(condition) == TRUE ~ date - lag(date)),
time_condition = as.numeric(time_condition))
# This is how it looks now
dd %>%
print(n = 25)
#> # A tibble: 25 × 3
#> date condition time_condition
#> <date> <lgl> <dbl>
#> 1 2016-10-31 FALSE 0
#> 2 2017-01-16 FALSE 0
#> 3 2017-04-19 FALSE 0
#> 4 2017-07-12 FALSE 0
#> 5 2017-09-28 TRUE 0
#> 6 2017-11-02 TRUE 35
#> 7 2017-11-30 TRUE 28
#> 8 2018-01-08 TRUE 39
#> 9 2018-04-03 TRUE 85
#> 10 2018-05-07 FALSE 0
#> 11 2018-06-11 FALSE 0
#> 12 2018-07-30 FALSE 0
#> 13 2018-10-24 TRUE 0
#> 14 2018-12-12 FALSE 0
#> 15 2019-04-18 TRUE 0
#> 16 2019-06-06 TRUE 49
#> 17 2019-07-10 TRUE 34
#> 18 2019-08-22 TRUE 43
#> 19 2019-10-17 TRUE 56
#> 20 2019-11-14 TRUE 28
#> 21 2020-02-06 FALSE 0
#> 22 2020-06-02 TRUE 0
#> 23 2020-09-22 FALSE 0
#> 24 2020-11-17 TRUE 0
#> 25 2020-12-14 TRUE 27
我想要的是一种 cumsum()
当不再满足条件时重置为 0 的类型。数据应如下所示:
should_be <- c(0, 0, 0, 0, 0, 35, 35 + 28, 35 + 28 + 39, 35 + 28 + 39 + 85,
0, 0, 0, 0, 0, 0, 49, 49 + 34, 49 + 34 + 43, 49 + 34 + 43 + 56,
49 + 34 + 43 + 56 + 28, 0, 0, 0, 0, 27)
dd %>%
mutate(time_condition_wanted = should_be) %>%
print(n = 25)
#> # A tibble: 25 × 4
#> date condition time_condition time_condition_wanted
#> <date> <lgl> <dbl> <dbl>
#> 1 2016-10-31 FALSE 0 0
#> 2 2017-01-16 FALSE 0 0
#> 3 2017-04-19 FALSE 0 0
#> 4 2017-07-12 FALSE 0 0
#> 5 2017-09-28 TRUE 0 0
#> 6 2017-11-02 TRUE 35 35
#> 7 2017-11-30 TRUE 28 63
#> 8 2018-01-08 TRUE 39 102
#> 9 2018-04-03 TRUE 85 187
#> 10 2018-05-07 FALSE 0 0
#> 11 2018-06-11 FALSE 0 0
#> 12 2018-07-30 FALSE 0 0
#> 13 2018-10-24 TRUE 0 0
#> 14 2018-12-12 FALSE 0 0
#> 15 2019-04-18 TRUE 0 0
#> 16 2019-06-06 TRUE 49 49
#> 17 2019-07-10 TRUE 34 83
#> 18 2019-08-22 TRUE 43 126
#> 19 2019-10-17 TRUE 56 182
#> 20 2019-11-14 TRUE 28 210
#> 21 2020-02-06 FALSE 0 0
#> 22 2020-06-02 TRUE 0 0
#> 23 2020-09-22 FALSE 0 0
#> 24 2020-11-17 TRUE 0 0
#> 25 2020-12-14 TRUE 27 27
由 reprex package (v2.0.1)
于 2021-11-12 创建
我刚刚想出了(有点复杂的)问题解决方案,post 在这里 post 求教。如果有更简单的解决方案就太好了。
library(dplyr)
d <- structure(list(date = structure(c(17105, 17182, 17275, 17359,
17437, 17472, 17500, 17539,
17624, 17658, 17693, 17742,
17828, 17877, 18004, 18053,
18087, 18130, 18186, 18214,
18298, 18415, 18527, 18583,
18610),
class = "Date"),
condition = c(FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE,
TRUE, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, FALSE,
TRUE, FALSE, TRUE, TRUE)),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA,-25L))
dd <- d %>%
mutate(time_condition = case_when(
condition == FALSE ~ date - date, #So it is 0
condition == TRUE & lag(condition) == FALSE ~ date - date, # again, we want 0
condition == TRUE & lag(condition) == TRUE ~ date - lag(date)),
time_condition = as.numeric(time_condition))
should_be <- c(0, 0, 0, 0, 0, 35, 35 + 28, 35 + 28 + 39, 35 + 28 + 39 + 85,
0, 0, 0, 0, 0, 0, 49, 49 + 34, 49 + 34 + 43, 49 + 34 + 43 + 56,
49 + 34 + 43 + 56 + 28, 0, 0, 0, 0, 27)
我可以创建一个 change
变量,它会在每次状态变化时增加。之后,cumsum()
可以在数据集 group_by
上使用 change
变量。
dd %>%
mutate(time_condition_wanted = should_be) %>%
# create that increases at each status change
mutate(change = if_else(condition != lag(condition), 1, 0),
change = if_else(is.na(change), 0, change),
change = cumsum(change)) %>%
group_by(change) %>%
mutate(y = cumsum(time_condition)) %>%
print(n = 25)
#> # A tibble: 25 × 6
#> # Groups: change [10]
#> date condition time_condition time_condition_wanted change y
#> <date> <lgl> <dbl> <dbl> <dbl> <dbl>
#> 1 2016-10-31 FALSE 0 0 0 0
#> 2 2017-01-16 FALSE 0 0 0 0
#> 3 2017-04-19 FALSE 0 0 0 0
#> 4 2017-07-12 FALSE 0 0 0 0
#> 5 2017-09-28 TRUE 0 0 1 0
#> 6 2017-11-02 TRUE 35 35 1 35
#> 7 2017-11-30 TRUE 28 63 1 63
#> 8 2018-01-08 TRUE 39 102 1 102
#> 9 2018-04-03 TRUE 85 187 1 187
#> 10 2018-05-07 FALSE 0 0 2 0
#> 11 2018-06-11 FALSE 0 0 2 0
#> 12 2018-07-30 FALSE 0 0 2 0
#> 13 2018-10-24 TRUE 0 0 3 0
#> 14 2018-12-12 FALSE 0 0 4 0
#> 15 2019-04-18 TRUE 0 0 5 0
#> 16 2019-06-06 TRUE 49 49 5 49
#> 17 2019-07-10 TRUE 34 83 5 83
#> 18 2019-08-22 TRUE 43 126 5 126
#> 19 2019-10-17 TRUE 56 182 5 182
#> 20 2019-11-14 TRUE 28 210 5 210
#> 21 2020-02-06 FALSE 0 0 6 0
#> 22 2020-06-02 TRUE 0 0 7 0
#> 23 2020-09-22 FALSE 0 0 8 0
#> 24 2020-11-17 TRUE 0 0 9 0
#> 25 2020-12-14 TRUE 27 27 9 27
由 reprex package (v2.0.1)
于 2021-11-12 创建
这是另一个版本-
library(dplyr)
dd %>%
group_by(grp = cumsum(time_condition == 0)) %>%
mutate(result = cumsum(time_condition)) %>%
ungroup %>%
select(-grp)
这个returns-
# date condition time_condition result
#1 2016-10-31 FALSE 0 0
#2 2017-01-16 FALSE 0 0
#3 2017-04-19 FALSE 0 0
#4 2017-07-12 FALSE 0 0
#5 2017-09-28 TRUE 0 0
#6 2017-11-02 TRUE 35 35
#7 2017-11-30 TRUE 28 63
#8 2018-01-08 TRUE 39 102
#9 2018-04-03 TRUE 85 187
#10 2018-05-07 FALSE 0 0
#11 2018-06-11 FALSE 0 0
#12 2018-07-30 FALSE 0 0
#13 2018-10-24 TRUE 0 0
#14 2018-12-12 FALSE 0 0
#15 2019-04-18 TRUE 0 0
#16 2019-06-06 TRUE 49 49
#17 2019-07-10 TRUE 34 83
#18 2019-08-22 TRUE 43 126
#19 2019-10-17 TRUE 56 182
#20 2019-11-14 TRUE 28 210
#21 2020-02-06 FALSE 0 0
#22 2020-06-02 TRUE 0 0
#23 2020-09-22 FALSE 0 0
#24 2020-11-17 TRUE 0 0
#25 2020-12-14 TRUE 27 27
我想计算一个条件满足后的时间,但是每次条件不满足,时间又要归零。用 dplyr
实现这一目标会很棒,但我愿意接受任何建议。
有了代码更容易看懂:
library(dplyr)
d <- structure(list(date = structure(c(17105, 17182, 17275, 17359,
17437, 17472, 17500, 17539,
17624, 17658, 17693, 17742,
17828, 17877, 18004, 18053,
18087, 18130, 18186, 18214,
18298, 18415, 18527, 18583,
18610),
class = "Date"),
condition = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE,
TRUE, FALSE, TRUE, TRUE)),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,-25L))
# I can easily calculate the time since the last observation when the
# condition has been met:
dd <- d %>%
mutate(time_condition = case_when(
condition == FALSE ~ date - date, #So it is 0
condition == TRUE & lag(condition) == FALSE ~ date - date, # again, we want 0
condition == TRUE & lag(condition) == TRUE ~ date - lag(date)),
time_condition = as.numeric(time_condition))
# This is how it looks now
dd %>%
print(n = 25)
#> # A tibble: 25 × 3
#> date condition time_condition
#> <date> <lgl> <dbl>
#> 1 2016-10-31 FALSE 0
#> 2 2017-01-16 FALSE 0
#> 3 2017-04-19 FALSE 0
#> 4 2017-07-12 FALSE 0
#> 5 2017-09-28 TRUE 0
#> 6 2017-11-02 TRUE 35
#> 7 2017-11-30 TRUE 28
#> 8 2018-01-08 TRUE 39
#> 9 2018-04-03 TRUE 85
#> 10 2018-05-07 FALSE 0
#> 11 2018-06-11 FALSE 0
#> 12 2018-07-30 FALSE 0
#> 13 2018-10-24 TRUE 0
#> 14 2018-12-12 FALSE 0
#> 15 2019-04-18 TRUE 0
#> 16 2019-06-06 TRUE 49
#> 17 2019-07-10 TRUE 34
#> 18 2019-08-22 TRUE 43
#> 19 2019-10-17 TRUE 56
#> 20 2019-11-14 TRUE 28
#> 21 2020-02-06 FALSE 0
#> 22 2020-06-02 TRUE 0
#> 23 2020-09-22 FALSE 0
#> 24 2020-11-17 TRUE 0
#> 25 2020-12-14 TRUE 27
我想要的是一种 cumsum()
当不再满足条件时重置为 0 的类型。数据应如下所示:
should_be <- c(0, 0, 0, 0, 0, 35, 35 + 28, 35 + 28 + 39, 35 + 28 + 39 + 85,
0, 0, 0, 0, 0, 0, 49, 49 + 34, 49 + 34 + 43, 49 + 34 + 43 + 56,
49 + 34 + 43 + 56 + 28, 0, 0, 0, 0, 27)
dd %>%
mutate(time_condition_wanted = should_be) %>%
print(n = 25)
#> # A tibble: 25 × 4
#> date condition time_condition time_condition_wanted
#> <date> <lgl> <dbl> <dbl>
#> 1 2016-10-31 FALSE 0 0
#> 2 2017-01-16 FALSE 0 0
#> 3 2017-04-19 FALSE 0 0
#> 4 2017-07-12 FALSE 0 0
#> 5 2017-09-28 TRUE 0 0
#> 6 2017-11-02 TRUE 35 35
#> 7 2017-11-30 TRUE 28 63
#> 8 2018-01-08 TRUE 39 102
#> 9 2018-04-03 TRUE 85 187
#> 10 2018-05-07 FALSE 0 0
#> 11 2018-06-11 FALSE 0 0
#> 12 2018-07-30 FALSE 0 0
#> 13 2018-10-24 TRUE 0 0
#> 14 2018-12-12 FALSE 0 0
#> 15 2019-04-18 TRUE 0 0
#> 16 2019-06-06 TRUE 49 49
#> 17 2019-07-10 TRUE 34 83
#> 18 2019-08-22 TRUE 43 126
#> 19 2019-10-17 TRUE 56 182
#> 20 2019-11-14 TRUE 28 210
#> 21 2020-02-06 FALSE 0 0
#> 22 2020-06-02 TRUE 0 0
#> 23 2020-09-22 FALSE 0 0
#> 24 2020-11-17 TRUE 0 0
#> 25 2020-12-14 TRUE 27 27
由 reprex package (v2.0.1)
于 2021-11-12 创建我刚刚想出了(有点复杂的)问题解决方案,post 在这里 post 求教。如果有更简单的解决方案就太好了。
library(dplyr)
d <- structure(list(date = structure(c(17105, 17182, 17275, 17359,
17437, 17472, 17500, 17539,
17624, 17658, 17693, 17742,
17828, 17877, 18004, 18053,
18087, 18130, 18186, 18214,
18298, 18415, 18527, 18583,
18610),
class = "Date"),
condition = c(FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE,
TRUE, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, FALSE,
TRUE, FALSE, TRUE, TRUE)),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA,-25L))
dd <- d %>%
mutate(time_condition = case_when(
condition == FALSE ~ date - date, #So it is 0
condition == TRUE & lag(condition) == FALSE ~ date - date, # again, we want 0
condition == TRUE & lag(condition) == TRUE ~ date - lag(date)),
time_condition = as.numeric(time_condition))
should_be <- c(0, 0, 0, 0, 0, 35, 35 + 28, 35 + 28 + 39, 35 + 28 + 39 + 85,
0, 0, 0, 0, 0, 0, 49, 49 + 34, 49 + 34 + 43, 49 + 34 + 43 + 56,
49 + 34 + 43 + 56 + 28, 0, 0, 0, 0, 27)
我可以创建一个 change
变量,它会在每次状态变化时增加。之后,cumsum()
可以在数据集 group_by
上使用 change
变量。
dd %>%
mutate(time_condition_wanted = should_be) %>%
# create that increases at each status change
mutate(change = if_else(condition != lag(condition), 1, 0),
change = if_else(is.na(change), 0, change),
change = cumsum(change)) %>%
group_by(change) %>%
mutate(y = cumsum(time_condition)) %>%
print(n = 25)
#> # A tibble: 25 × 6
#> # Groups: change [10]
#> date condition time_condition time_condition_wanted change y
#> <date> <lgl> <dbl> <dbl> <dbl> <dbl>
#> 1 2016-10-31 FALSE 0 0 0 0
#> 2 2017-01-16 FALSE 0 0 0 0
#> 3 2017-04-19 FALSE 0 0 0 0
#> 4 2017-07-12 FALSE 0 0 0 0
#> 5 2017-09-28 TRUE 0 0 1 0
#> 6 2017-11-02 TRUE 35 35 1 35
#> 7 2017-11-30 TRUE 28 63 1 63
#> 8 2018-01-08 TRUE 39 102 1 102
#> 9 2018-04-03 TRUE 85 187 1 187
#> 10 2018-05-07 FALSE 0 0 2 0
#> 11 2018-06-11 FALSE 0 0 2 0
#> 12 2018-07-30 FALSE 0 0 2 0
#> 13 2018-10-24 TRUE 0 0 3 0
#> 14 2018-12-12 FALSE 0 0 4 0
#> 15 2019-04-18 TRUE 0 0 5 0
#> 16 2019-06-06 TRUE 49 49 5 49
#> 17 2019-07-10 TRUE 34 83 5 83
#> 18 2019-08-22 TRUE 43 126 5 126
#> 19 2019-10-17 TRUE 56 182 5 182
#> 20 2019-11-14 TRUE 28 210 5 210
#> 21 2020-02-06 FALSE 0 0 6 0
#> 22 2020-06-02 TRUE 0 0 7 0
#> 23 2020-09-22 FALSE 0 0 8 0
#> 24 2020-11-17 TRUE 0 0 9 0
#> 25 2020-12-14 TRUE 27 27 9 27
由 reprex package (v2.0.1)
于 2021-11-12 创建这是另一个版本-
library(dplyr)
dd %>%
group_by(grp = cumsum(time_condition == 0)) %>%
mutate(result = cumsum(time_condition)) %>%
ungroup %>%
select(-grp)
这个returns-
# date condition time_condition result
#1 2016-10-31 FALSE 0 0
#2 2017-01-16 FALSE 0 0
#3 2017-04-19 FALSE 0 0
#4 2017-07-12 FALSE 0 0
#5 2017-09-28 TRUE 0 0
#6 2017-11-02 TRUE 35 35
#7 2017-11-30 TRUE 28 63
#8 2018-01-08 TRUE 39 102
#9 2018-04-03 TRUE 85 187
#10 2018-05-07 FALSE 0 0
#11 2018-06-11 FALSE 0 0
#12 2018-07-30 FALSE 0 0
#13 2018-10-24 TRUE 0 0
#14 2018-12-12 FALSE 0 0
#15 2019-04-18 TRUE 0 0
#16 2019-06-06 TRUE 49 49
#17 2019-07-10 TRUE 34 83
#18 2019-08-22 TRUE 43 126
#19 2019-10-17 TRUE 56 182
#20 2019-11-14 TRUE 28 210
#21 2020-02-06 FALSE 0 0
#22 2020-06-02 TRUE 0 0
#23 2020-09-22 FALSE 0 0
#24 2020-11-17 TRUE 0 0
#25 2020-12-14 TRUE 27 27