根据另一列的值重新编码时间变量
Recode time variable based on the value of another column
我有一个 time
变量,我想根据另一列中事件的变化对其进行重新编码。具体来说,如果 y
中的值从 1 变为 0,则 time
将从 0 重新开始。我还需要在同一个人内完成此刷新(即按 [=15= 排序) ]).总时间长度可能因人而异。这是原始数据集:
df <- data.frame(id = rep(1:3, c(5,6,7)), time = c(seq(from = 0, to = 2000, by = 500), seq(from = 0, to = 2500, by = 500), seq(from = 0, to = 3000, by = 500)), y = c(0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,1,1,0))
df
id time y
1 1 0 0
2 1 500 0
3 1 1000 1
4 1 1500 0
5 1 2000 0
6 2 0 0
7 2 500 0
8 2 1000 1
9 2 1500 1
10 2 2000 0
11 2 2500 1
12 3 0 0
13 3 500 1
14 3 1000 1
15 3 1500 0
16 3 2000 1
17 3 2500 1
18 3 3000 0
最终结果应该是这样的(注意第4、10、15、18行刷新的时间。三个人的时间长度也不一样):
id time y
1 1 0 0
2 1 500 0
3 1 1000 1
4 1 0 0
5 1 500 0
6 2 0 0
7 2 500 0
8 2 1000 1
9 2 1500 1
10 2 0 0
11 2 500 1
12 3 0 0
13 3 500 1
14 3 1000 1
15 3 0 0
16 3 500 1
17 3 1000 1
18 3 0 0
如何实现?谢谢!
dplyr
library(dplyr)
df %>%
group_by(id) %>%
mutate(grp = cumsum(y == 0 & lag(y == 1, default = FALSE))) %>%
group_by(id, grp) %>%
mutate(time = time - first(time)) %>%
ungroup()
# # A tibble: 18 x 4
# id time y grp
# <int> <dbl> <dbl> <int>
# 1 1 0 0 0
# 2 1 500 0 0
# 3 1 1000 1 0
# 4 1 0 0 1
# 5 1 500 0 1
# 6 2 0 0 0
# 7 2 500 0 0
# 8 2 1000 1 0
# 9 2 1500 1 0
# 10 2 0 0 1
# 11 2 500 1 1
# 12 3 0 0 0
# 13 3 500 1 0
# 14 3 1000 1 0
# 15 3 0 0 1
# 16 3 500 1 1
# 17 3 1000 1 1
# 18 3 0 0 2
基础 R
df$grp <- ave(df$y, df$id, FUN = function(z) cumsum(c(FALSE, z[-1] == 0 & z[-length(z)] == 1)))
df$time <- ave(df$time, df[,c("id", "grp")], FUN = function(z) z - z[1])
data.table
library(data.table)
as.data.table(df)[, grp := cumsum(y == 0 & shift(y == 1, type = "lag", fill = FALSE)), by = id
][, time := time - first(time), by = .(id, grp)][]
另一个不涉及 time
变量的解决方案是:
df %>% group_by(id) %>%
mutate(count = cumsum(y == 0 & lag(y, default = 0) == 1)) %>%
group_by(id, count) %>%
mutate(time = 500*(row_number()-1))
id time y count
<int> <dbl> <dbl> <int>
1 1 0 0 0
2 1 500 0 0
3 1 1000 1 0
4 1 0 0 1
5 1 500 0 1
6 2 0 0 0
7 2 500 0 0
8 2 1000 1 0
9 2 1500 1 0
10 2 0 0 1
11 2 500 1 1
12 3 0 0 0
13 3 500 1 0
14 3 1000 1 0
15 3 0 0 1
16 3 500 1 1
17 3 1000 1 1
18 3 0 0 2
我有一个 time
变量,我想根据另一列中事件的变化对其进行重新编码。具体来说,如果 y
中的值从 1 变为 0,则 time
将从 0 重新开始。我还需要在同一个人内完成此刷新(即按 [=15= 排序) ]).总时间长度可能因人而异。这是原始数据集:
df <- data.frame(id = rep(1:3, c(5,6,7)), time = c(seq(from = 0, to = 2000, by = 500), seq(from = 0, to = 2500, by = 500), seq(from = 0, to = 3000, by = 500)), y = c(0,0,1,0,0,0,0,1,1,0,1,0,1,1,0,1,1,0))
df
id time y
1 1 0 0
2 1 500 0
3 1 1000 1
4 1 1500 0
5 1 2000 0
6 2 0 0
7 2 500 0
8 2 1000 1
9 2 1500 1
10 2 2000 0
11 2 2500 1
12 3 0 0
13 3 500 1
14 3 1000 1
15 3 1500 0
16 3 2000 1
17 3 2500 1
18 3 3000 0
最终结果应该是这样的(注意第4、10、15、18行刷新的时间。三个人的时间长度也不一样):
id time y
1 1 0 0
2 1 500 0
3 1 1000 1
4 1 0 0
5 1 500 0
6 2 0 0
7 2 500 0
8 2 1000 1
9 2 1500 1
10 2 0 0
11 2 500 1
12 3 0 0
13 3 500 1
14 3 1000 1
15 3 0 0
16 3 500 1
17 3 1000 1
18 3 0 0
如何实现?谢谢!
dplyr
library(dplyr)
df %>%
group_by(id) %>%
mutate(grp = cumsum(y == 0 & lag(y == 1, default = FALSE))) %>%
group_by(id, grp) %>%
mutate(time = time - first(time)) %>%
ungroup()
# # A tibble: 18 x 4
# id time y grp
# <int> <dbl> <dbl> <int>
# 1 1 0 0 0
# 2 1 500 0 0
# 3 1 1000 1 0
# 4 1 0 0 1
# 5 1 500 0 1
# 6 2 0 0 0
# 7 2 500 0 0
# 8 2 1000 1 0
# 9 2 1500 1 0
# 10 2 0 0 1
# 11 2 500 1 1
# 12 3 0 0 0
# 13 3 500 1 0
# 14 3 1000 1 0
# 15 3 0 0 1
# 16 3 500 1 1
# 17 3 1000 1 1
# 18 3 0 0 2
基础 R
df$grp <- ave(df$y, df$id, FUN = function(z) cumsum(c(FALSE, z[-1] == 0 & z[-length(z)] == 1)))
df$time <- ave(df$time, df[,c("id", "grp")], FUN = function(z) z - z[1])
data.table
library(data.table)
as.data.table(df)[, grp := cumsum(y == 0 & shift(y == 1, type = "lag", fill = FALSE)), by = id
][, time := time - first(time), by = .(id, grp)][]
另一个不涉及 time
变量的解决方案是:
df %>% group_by(id) %>%
mutate(count = cumsum(y == 0 & lag(y, default = 0) == 1)) %>%
group_by(id, count) %>%
mutate(time = 500*(row_number()-1))
id time y count
<int> <dbl> <dbl> <int>
1 1 0 0 0
2 1 500 0 0
3 1 1000 1 0
4 1 0 0 1
5 1 500 0 1
6 2 0 0 0
7 2 500 0 0
8 2 1000 1 0
9 2 1500 1 0
10 2 0 0 1
11 2 500 1 1
12 3 0 0 0
13 3 500 1 0
14 3 1000 1 0
15 3 0 0 1
16 3 500 1 1
17 3 1000 1 1
18 3 0 0 2