当唯一的行 ID 和列名称匹配时,从另一个 data.table 中减去 data.table
Subtract data.table from another data.table when unique row ID and column names match
我想从 dt1
中减去 dt2
以生成 dt3
中所示的输出。然后我想子集 dt3
所以只保留包含负值的行(如 dt4
)。
dt1 <- data.table(
UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
Var1=c(100, 200, 300, 400, 500,600,700,800,900),
Var2=c(1000, 2000, 3000, 4000, 5000,6000,7000,8000,9000),
Var3=c(10000, 20000, 30000, 40000, 50000,60000,70000,80000,90000),
Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))
dt2 <- data.table(
UID=c("A001","A003","B001","B003","C001","C003"),
Var1=c(10, 20, 30, 40, 50,950),
Var2=c(100, 2500, 300, 400, 500,600),
Var3=c(1000, 2000, 3000, 4000, 5000,6000))
dt3 <- data.table(
UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
Var1=c(90, 200, 280, 370, 500,560,650,800,-50),
Var2=c(900, 2000, -500, 3700, 5000,5600,6500,8000,8400),
Var3=c(9000, 20000, 28000, 37000, 50000,56000,65000,80000,84000),
Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))
dt4 <- dt3[c(3,9),]
我发现先将其转换为长格式更容易。
library(data.table)
dt1 <- data.table(
UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
Var1=c(100, 200, 300, 400, 500,600,700,800,900),
Var2=c(1000, 2000, 3000, 4000, 5000,6000,7000,8000,9000),
Var3=c(10000, 20000, 30000, 40000, 50000,60000,70000,80000,90000),
Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))
dt2 <- data.table(
UID=c("A001","A003","B001","B003","C001","C003"),
Var1=c(10, 20, 30, 40, 50,950),
Var2=c(100, 2500, 300, 400, 500,600),
Var3=c(1000, 2000, 3000, 4000, 5000,6000))
dt2 <-
melt(dt2, "UID")[melt(dt1, "UID"), on = c("UID", "variable")][
, sum(i.value, -1 * value, na.rm = TRUE), .(UID, variable)]
dt3 <- dcast(dt2, UID ~ variable)
dt4 <- dcast(dt2[UID %in% dt2[V1 < 0, UID]], UID ~ variable)
另一种可能的方法:
DT <- copy(dt1)
#subtracting
cols <- setdiff(names(dt2), "UID")
DT[dt2, on=.(UID), (cols) := .SD - mget(paste0("i.", cols)), .SDcols=cols]
#subsetting
DT[Reduce(`|`, lapply(DT, `<`, 0))]
输出:
UID Var1 Var2 Var3 Var4
1: C003 -50 8400 84000 95000
我想从 dt1
中减去 dt2
以生成 dt3
中所示的输出。然后我想子集 dt3
所以只保留包含负值的行(如 dt4
)。
dt1 <- data.table(
UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
Var1=c(100, 200, 300, 400, 500,600,700,800,900),
Var2=c(1000, 2000, 3000, 4000, 5000,6000,7000,8000,9000),
Var3=c(10000, 20000, 30000, 40000, 50000,60000,70000,80000,90000),
Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))
dt2 <- data.table(
UID=c("A001","A003","B001","B003","C001","C003"),
Var1=c(10, 20, 30, 40, 50,950),
Var2=c(100, 2500, 300, 400, 500,600),
Var3=c(1000, 2000, 3000, 4000, 5000,6000))
dt3 <- data.table(
UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
Var1=c(90, 200, 280, 370, 500,560,650,800,-50),
Var2=c(900, 2000, -500, 3700, 5000,5600,6500,8000,8400),
Var3=c(9000, 20000, 28000, 37000, 50000,56000,65000,80000,84000),
Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))
dt4 <- dt3[c(3,9),]
我发现先将其转换为长格式更容易。
library(data.table)
dt1 <- data.table(
UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
Var1=c(100, 200, 300, 400, 500,600,700,800,900),
Var2=c(1000, 2000, 3000, 4000, 5000,6000,7000,8000,9000),
Var3=c(10000, 20000, 30000, 40000, 50000,60000,70000,80000,90000),
Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))
dt2 <- data.table(
UID=c("A001","A003","B001","B003","C001","C003"),
Var1=c(10, 20, 30, 40, 50,950),
Var2=c(100, 2500, 300, 400, 500,600),
Var3=c(1000, 2000, 3000, 4000, 5000,6000))
dt2 <-
melt(dt2, "UID")[melt(dt1, "UID"), on = c("UID", "variable")][
, sum(i.value, -1 * value, na.rm = TRUE), .(UID, variable)]
dt3 <- dcast(dt2, UID ~ variable)
dt4 <- dcast(dt2[UID %in% dt2[V1 < 0, UID]], UID ~ variable)
另一种可能的方法:
DT <- copy(dt1)
#subtracting
cols <- setdiff(names(dt2), "UID")
DT[dt2, on=.(UID), (cols) := .SD - mget(paste0("i.", cols)), .SDcols=cols]
#subsetting
DT[Reduce(`|`, lapply(DT, `<`, 0))]
输出:
UID Var1 Var2 Var3 Var4
1: C003 -50 8400 84000 95000