通过具有唯一值的一列合并两个数据框

Merge two data frames by one column with unique values

这应该很容易回答,但我找不到这个问题。 假设我们有两个数据帧 df1 和 df2。

> df1 <- data.frame(a=c(1,2,3,4,5),
+                   b=c(11,22,33,44,55))
> df1
  a  b
1 1 11
2 2 22
3 3 33
4 4 44
5 5 55

> df2 <- data.frame(a=c(1,1,1,2,2,3,4,4,4,5,5),
+                   c=c("abc","abc","abc","def","def","ghi","jkl","jkl","jkl","mno","mno"),
+                   d=c("x","y","z","x","y","x","x","y","z","x","y"))
> df2
   a   c d
1  1 abc x
2  1 abc y
3  1 abc z
4  2 def x
5  2 def y
6  3 ghi x
7  4 jkl x
8  4 jkl y
9  4 jkl z
10 5 mno x
11 5 mno y

我想合并两个数据框,以便只将列 'c' 添加到 df1。 像这样:

> df3 <- data.frame(a=c(1,2,3,4,5),
+                   b=c(11,22,33,44,55),
+                   c=c("abc","def","ghi","jkl","mno"))
> df3
  a  b   c
1 1 11 abc
2 2 22 def
3 3 33 ghi
4 4 44 jkl
5 5 55 mno

我已经尝试使用合并功能和左连接。 在这种情况下,我得到了这个结果:

> df5 <- merge(x = df1, y = df2[ , c("a", "c")], by = "a", all.x = TRUE)
> df5
   a  b   c
1  1 11 abc
2  1 11 abc
3  1 11 abc
4  2 22 def
5  2 22 def
6  3 33 ghi
7  4 44 jkl
8  4 44 jkl
9  4 44 jkl
10 5 55 mno
11 5 55 mno

非常感谢您的帮助!

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

df1 <- data.frame(a = c(1, 2, 3, 4, 5), b = c(11, 22, 33, 44, 55))
df2 <-
  data.frame(
    a = c(1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5),
    c = c(
      "abc",
      "abc",
      "abc",
      "def",
      "def",
      "ghi",
      "jkl",
      "jkl",
      "jkl",
      "mno",
      "mno"
    ),
    d = c("x", "y", "z", "x", "y", "x", "x", "y", "z", "x", "y")
  )

df1 |> inner_join(df2, by = "a") |> 
  select(- d) |> 
  distinct()
#>   a  b   c
#> 1 1 11 abc
#> 2 2 22 def
#> 3 3 33 ghi
#> 4 4 44 jkl
#> 5 5 55 mno

reprex package (v2.0.1)

于 2022-04-20 创建

获得这个的一种方法是(使用提供的相同 df1df2

require(tidyverse)
df3 <- unique((inner_join(df1, select(df2, c("a","c")), by = c("a"))))

我最初使用内部联接,但 left_join 也可以工作

另一种方法是创建 df2

的子集
df2b <- df2 %>% 
  select(a,c) %>%
  unique()
df3b <- left_join(df1, df2b, by="a")

我想你可以像下面这样使用 merge

> merge(df1,unique(df2[c("a","c")]))
  a  b   c
1 1 11 abc
2 2 22 def
3 3 33 ghi
4 4 44 jkl
5 5 55 mno