如何使用具有匹配变量的不同长度的两个不同数据帧减去一个值

How to subtract a value using two different data frames of different lengths with matching variables

我有两个数据集。我想通过变量 A 匹配数据集,然后从数据集 1 中减去数据集 2 的值。

df1 <- data.frame(A = c("1", "2","3"),
              B = c(10, 20, 30))
df2 <- data.frame(A = c("1", "1","1","2","2","2","3","3","3"),
              B = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
              C = c(100, 125, 150, 100, 150, 200, 100, 200, 300))

我希望 df2 有一个额外的列 "D" 这是值 df2$C - df1$B 与 A 列匹配。 前 100-10 125-10 150-10 100-20 150-20 ...

df2 <- data.frame(A = c("1", "1","1","2", "2","2","3", "3", "3"),
              B = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
              C = c(100, 125, 150, 100, 150, 200, 100, 200, 300),
              D = c(90, 115, 140, 80, 130, 180, 70, 170, 270))

我应该如何创建 df2$D?

以 R 为基数

df1 <- data.frame(A = c("1", "2","3"),
                  B = c(10, 20, 30))
df2 <- data.frame(A = c("1", "1","1","2","2","2","3","3","3"),
                  B = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
                  C = c(100, 125, 150, 100, 150, 200, 100, 200, 300))


df <- merge(df1,df2,by = "A")
df$D <- df$C-df$B.x
df$B <- df$B.y
df[,c("B.x","B.y")] <- NULL

> df
  A   C   D B
1 1 100  90 1
2 1 125 115 2
3 1 150 140 3
4 2 100  80 1
5 2 150 130 2
6 2 200 180 3
7 3 100  70 1
8 3 200 170 2
9 3 300 270 3

使用 data.table 您可以直接更新联接:

library(data.table)
df1 <- setDT(df1)
df2 <- setDT(df2)
df2[df1,D := C-i.B,on = "A"]

> df2
   A B   C   D
1: 1 1 100  90
2: 1 2 125 115
3: 1 3 150 140
4: 2 1 100  80
5: 2 2 150 130
6: 2 3 200 180
7: 3 1 100  70
8: 3 2 200 170
9: 3 3 300 270

dplyr:

library(dplyr)

df2 %>%
  merge(df1,by = "A") %>%
  mutate(D = C - B.y,
         B = B.x,
         B.x = NULL,
         B.y = NULL) 

base Rmatch

df2$D <- with(df2, C - df1$B[match(A, df1$A)])