根据匹配 ID 的值填充值
fill values based on value for matched IDs
我正在寻找 data.table
解决一个简单问题的方法。我有这样的数据
library(data.table)
data <- data.table(
id = seq(1:20),
match_id = c( rep(1,5), rep(2,5), rep(3,5), rep(4,5) ),
exp = c( 1, rep(0,4), 1 , rep(0,4), 1 , rep(0,4), 1 , rep(0,4) )
)
data[, var := ifelse(exp==1, runif(1,min=0,max=100), 0), by = id]
我想根据 exp==1 的 var
的值,为每个 exp==0 的唯一 match_id 填充 var
。数据结束:
data_want <- data.table(
id = seq(1:20),
match_id = c( rep(1,5), rep(2,5), rep(3,5), rep(4,5) ),
exp = c( 1, rep(0,4), 1 , rep(0,4), 1 , rep(0,4), 1 , rep(0,4) ),
var = c(rep(data[1]$var,5), rep(data[6]$var,5), rep(data[11]$var,5), rep(data[16]$var,5))
)
使用 na_if
(从 dplyr
)将 0 转换为 NA
,然后使用 nafill
library(data.table)
library(dplyr)
data[, var := nafill(na_if(var, 0), type = "locf")]
-输出
> data
id match_id exp var
<int> <num> <num> <num>
1: 1 1 1 42.81752
2: 2 1 0 42.81752
3: 3 1 0 42.81752
4: 4 1 0 42.81752
5: 5 1 0 42.81752
6: 6 2 1 28.05760
7: 7 2 0 28.05760
8: 8 2 0 28.05760
9: 9 2 0 28.05760
10: 10 2 0 28.05760
11: 11 3 1 76.53963
12: 12 3 0 76.53963
13: 13 3 0 76.53963
14: 14 3 0 76.53963
15: 15 3 0 76.53963
16: 16 4 1 50.95047
17: 17 4 0 50.95047
18: 18 4 0 50.95047
19: 19 4 0 50.95047
20: 20 4 0 50.95047
id match_id exp var
或者另一种选择是
data[, var := var[var!=0][cumsum(exp)]]
data[, var:=var[which(exp==1)], by=match_id]
id match_id exp var
<int> <num> <num> <num>
1: 1 1 1 75.342705
2: 2 1 0 75.342705
3: 3 1 0 75.342705
4: 4 1 0 75.342705
5: 5 1 0 75.342705
6: 6 2 1 81.822968
7: 7 2 0 81.822968
8: 8 2 0 81.822968
9: 9 2 0 81.822968
10: 10 2 0 81.822968
11: 11 3 1 3.309884
12: 12 3 0 3.309884
13: 13 3 0 3.309884
14: 14 3 0 3.309884
15: 15 3 0 3.309884
16: 16 4 1 2.047301
17: 17 4 0 2.047301
18: 18 4 0 2.047301
19: 19 4 0 2.047301
20: 20 4 0 2.047301
id match_id exp var
library(data.table)
data <- data.table(
id = seq(1:20),
match_id = c(rep(1, 5), rep(2, 5), rep(3, 5), rep(4, 5)),
exp = c(1, rep(0, 4), 1 , rep(0, 4), 1 , rep(0, 4), 1 , rep(0, 4))
)
data[, var := ifelse(exp==1, runif(1,min=0,max=100), 0), by = id]
data[, var := max(var), by = list(match_id)][]
#> id match_id exp var
#> 1: 1 1 1 40.474630
#> 2: 2 1 0 40.474630
#> 3: 3 1 0 40.474630
#> 4: 4 1 0 40.474630
#> 5: 5 1 0 40.474630
#> 6: 6 2 1 91.434384
#> 7: 7 2 0 91.434384
#> 8: 8 2 0 91.434384
#> 9: 9 2 0 91.434384
#> 10: 10 2 0 91.434384
#> 11: 11 3 1 61.503224
#> 12: 12 3 0 61.503224
#> 13: 13 3 0 61.503224
#> 14: 14 3 0 61.503224
#> 15: 15 3 0 61.503224
#> 16: 16 4 1 8.480375
#> 17: 17 4 0 8.480375
#> 18: 18 4 0 8.480375
#> 19: 19 4 0 8.480375
#> 20: 20 4 0 8.480375
由 reprex package (v2.0.1)
于 2022-03-23 创建
我正在寻找 data.table
解决一个简单问题的方法。我有这样的数据
library(data.table)
data <- data.table(
id = seq(1:20),
match_id = c( rep(1,5), rep(2,5), rep(3,5), rep(4,5) ),
exp = c( 1, rep(0,4), 1 , rep(0,4), 1 , rep(0,4), 1 , rep(0,4) )
)
data[, var := ifelse(exp==1, runif(1,min=0,max=100), 0), by = id]
我想根据 exp==1 的 var
的值,为每个 exp==0 的唯一 match_id 填充 var
。数据结束:
data_want <- data.table(
id = seq(1:20),
match_id = c( rep(1,5), rep(2,5), rep(3,5), rep(4,5) ),
exp = c( 1, rep(0,4), 1 , rep(0,4), 1 , rep(0,4), 1 , rep(0,4) ),
var = c(rep(data[1]$var,5), rep(data[6]$var,5), rep(data[11]$var,5), rep(data[16]$var,5))
)
使用 na_if
(从 dplyr
)将 0 转换为 NA
,然后使用 nafill
library(data.table)
library(dplyr)
data[, var := nafill(na_if(var, 0), type = "locf")]
-输出
> data
id match_id exp var
<int> <num> <num> <num>
1: 1 1 1 42.81752
2: 2 1 0 42.81752
3: 3 1 0 42.81752
4: 4 1 0 42.81752
5: 5 1 0 42.81752
6: 6 2 1 28.05760
7: 7 2 0 28.05760
8: 8 2 0 28.05760
9: 9 2 0 28.05760
10: 10 2 0 28.05760
11: 11 3 1 76.53963
12: 12 3 0 76.53963
13: 13 3 0 76.53963
14: 14 3 0 76.53963
15: 15 3 0 76.53963
16: 16 4 1 50.95047
17: 17 4 0 50.95047
18: 18 4 0 50.95047
19: 19 4 0 50.95047
20: 20 4 0 50.95047
id match_id exp var
或者另一种选择是
data[, var := var[var!=0][cumsum(exp)]]
data[, var:=var[which(exp==1)], by=match_id]
id match_id exp var
<int> <num> <num> <num>
1: 1 1 1 75.342705
2: 2 1 0 75.342705
3: 3 1 0 75.342705
4: 4 1 0 75.342705
5: 5 1 0 75.342705
6: 6 2 1 81.822968
7: 7 2 0 81.822968
8: 8 2 0 81.822968
9: 9 2 0 81.822968
10: 10 2 0 81.822968
11: 11 3 1 3.309884
12: 12 3 0 3.309884
13: 13 3 0 3.309884
14: 14 3 0 3.309884
15: 15 3 0 3.309884
16: 16 4 1 2.047301
17: 17 4 0 2.047301
18: 18 4 0 2.047301
19: 19 4 0 2.047301
20: 20 4 0 2.047301
id match_id exp var
library(data.table)
data <- data.table(
id = seq(1:20),
match_id = c(rep(1, 5), rep(2, 5), rep(3, 5), rep(4, 5)),
exp = c(1, rep(0, 4), 1 , rep(0, 4), 1 , rep(0, 4), 1 , rep(0, 4))
)
data[, var := ifelse(exp==1, runif(1,min=0,max=100), 0), by = id]
data[, var := max(var), by = list(match_id)][]
#> id match_id exp var
#> 1: 1 1 1 40.474630
#> 2: 2 1 0 40.474630
#> 3: 3 1 0 40.474630
#> 4: 4 1 0 40.474630
#> 5: 5 1 0 40.474630
#> 6: 6 2 1 91.434384
#> 7: 7 2 0 91.434384
#> 8: 8 2 0 91.434384
#> 9: 9 2 0 91.434384
#> 10: 10 2 0 91.434384
#> 11: 11 3 1 61.503224
#> 12: 12 3 0 61.503224
#> 13: 13 3 0 61.503224
#> 14: 14 3 0 61.503224
#> 15: 15 3 0 61.503224
#> 16: 16 4 1 8.480375
#> 17: 17 4 0 8.480375
#> 18: 18 4 0 8.480375
#> 19: 19 4 0 8.480375
#> 20: 20 4 0 8.480375
由 reprex package (v2.0.1)
于 2022-03-23 创建