根据 data.table 中的条件查找索引
find index based on condition in data.table
我有这样的数据:
library(data.table)
dt1 <- data.table(
id = 1,
week = c( seq(1:260))
)
dt1[0:100, status := "A"][101:260, status := "B"]
dt2 <- data.table(
id = 2,
week = c( seq(1:260))
)
dt2[0:34, status := "A"][35:70, status := "B"][71:260, status := "A"]
dt3 <- data.table(
id = 3,
week = c( seq(1:260))
)
dt3[0:80, status := "A"][81:90, status := "B"][91:100, status := "A"][101:260, status := "B"]
data <- rbind(dt1,dt2,dt3)
我想为每个唯一 ID 找到索引,在此之前 52 周的时间段至少有 75%(39 周)的状态为“B”。我想要以下最终产品:
data_want <- data.table(
id = c(1,3),
week = c(153, 133)
)
对于 id 1,101 之后的 52 周(第一行状态==B)为 153,并且至少有 75% 的时间段状态==B。等等。
有什么建议吗?
如果我理解您的需求(并且忽略您的 data_want
),那么我认为这可行:
data[, fr := frollmean(status == "B", 52, align = "right", fill = 0), by = .(id)]
data[, .SD[shift(fr >= 0.75, type = "lead"),][1,], by = id]
# id week status fr
# <num> <int> <char> <num>
# 1: 1 138 B 0.7307692
# 2: 2 NA <NA> NA
# 3: 3 128 B 0.7307692
验证:
data[id == 1 & between(week, 130, 140),]
# id week status fr
# <num> <int> <char> <num>
# 1: 1 130 B 0.5769231
# 2: 1 131 B 0.5961538
# 3: 1 132 B 0.6153846
# 4: 1 133 B 0.6346154
# 5: 1 134 B 0.6538462
# 6: 1 135 B 0.6730769
# 7: 1 136 B 0.6923077
# 8: 1 137 B 0.7115385
# 9: 1 138 B 0.7307692
# 10: 1 139 B 0.7500000 <-- 52-week window had 75% "B"
# 11: 1 140 B 0.7692308
我有这样的数据:
library(data.table)
dt1 <- data.table(
id = 1,
week = c( seq(1:260))
)
dt1[0:100, status := "A"][101:260, status := "B"]
dt2 <- data.table(
id = 2,
week = c( seq(1:260))
)
dt2[0:34, status := "A"][35:70, status := "B"][71:260, status := "A"]
dt3 <- data.table(
id = 3,
week = c( seq(1:260))
)
dt3[0:80, status := "A"][81:90, status := "B"][91:100, status := "A"][101:260, status := "B"]
data <- rbind(dt1,dt2,dt3)
我想为每个唯一 ID 找到索引,在此之前 52 周的时间段至少有 75%(39 周)的状态为“B”。我想要以下最终产品:
data_want <- data.table(
id = c(1,3),
week = c(153, 133)
)
对于 id 1,101 之后的 52 周(第一行状态==B)为 153,并且至少有 75% 的时间段状态==B。等等。
有什么建议吗?
如果我理解您的需求(并且忽略您的 data_want
),那么我认为这可行:
data[, fr := frollmean(status == "B", 52, align = "right", fill = 0), by = .(id)]
data[, .SD[shift(fr >= 0.75, type = "lead"),][1,], by = id]
# id week status fr
# <num> <int> <char> <num>
# 1: 1 138 B 0.7307692
# 2: 2 NA <NA> NA
# 3: 3 128 B 0.7307692
验证:
data[id == 1 & between(week, 130, 140),]
# id week status fr
# <num> <int> <char> <num>
# 1: 1 130 B 0.5769231
# 2: 1 131 B 0.5961538
# 3: 1 132 B 0.6153846
# 4: 1 133 B 0.6346154
# 5: 1 134 B 0.6538462
# 6: 1 135 B 0.6730769
# 7: 1 136 B 0.6923077
# 8: 1 137 B 0.7115385
# 9: 1 138 B 0.7307692
# 10: 1 139 B 0.7500000 <-- 52-week window had 75% "B"
# 11: 1 140 B 0.7692308