通过具有唯一值的一列合并两个数据框
Merge two data frames by one column with unique values
这应该很容易回答,但我找不到这个问题。
假设我们有两个数据帧 df1 和 df2。
> df1 <- data.frame(a=c(1,2,3,4,5),
+ b=c(11,22,33,44,55))
> df1
a b
1 1 11
2 2 22
3 3 33
4 4 44
5 5 55
和
> df2 <- data.frame(a=c(1,1,1,2,2,3,4,4,4,5,5),
+ c=c("abc","abc","abc","def","def","ghi","jkl","jkl","jkl","mno","mno"),
+ d=c("x","y","z","x","y","x","x","y","z","x","y"))
> df2
a c d
1 1 abc x
2 1 abc y
3 1 abc z
4 2 def x
5 2 def y
6 3 ghi x
7 4 jkl x
8 4 jkl y
9 4 jkl z
10 5 mno x
11 5 mno y
我想合并两个数据框,以便只将列 'c' 添加到 df1。
像这样:
> df3 <- data.frame(a=c(1,2,3,4,5),
+ b=c(11,22,33,44,55),
+ c=c("abc","def","ghi","jkl","mno"))
> df3
a b c
1 1 11 abc
2 2 22 def
3 3 33 ghi
4 4 44 jkl
5 5 55 mno
我已经尝试使用合并功能和左连接。
在这种情况下,我得到了这个结果:
> df5 <- merge(x = df1, y = df2[ , c("a", "c")], by = "a", all.x = TRUE)
> df5
a b c
1 1 11 abc
2 1 11 abc
3 1 11 abc
4 2 22 def
5 2 22 def
6 3 33 ghi
7 4 44 jkl
8 4 44 jkl
9 4 44 jkl
10 5 55 mno
11 5 55 mno
非常感谢您的帮助!
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df1 <- data.frame(a = c(1, 2, 3, 4, 5), b = c(11, 22, 33, 44, 55))
df2 <-
data.frame(
a = c(1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5),
c = c(
"abc",
"abc",
"abc",
"def",
"def",
"ghi",
"jkl",
"jkl",
"jkl",
"mno",
"mno"
),
d = c("x", "y", "z", "x", "y", "x", "x", "y", "z", "x", "y")
)
df1 |> inner_join(df2, by = "a") |>
select(- d) |>
distinct()
#> a b c
#> 1 1 11 abc
#> 2 2 22 def
#> 3 3 33 ghi
#> 4 4 44 jkl
#> 5 5 55 mno
由 reprex package (v2.0.1)
于 2022-04-20 创建
获得这个的一种方法是(使用提供的相同 df1
和 df2
)
require(tidyverse)
df3 <- unique((inner_join(df1, select(df2, c("a","c")), by = c("a"))))
我最初使用内部联接,但 left_join 也可以工作
另一种方法是创建 df2
的子集
df2b <- df2 %>%
select(a,c) %>%
unique()
df3b <- left_join(df1, df2b, by="a")
我想你可以像下面这样使用 merge
> merge(df1,unique(df2[c("a","c")]))
a b c
1 1 11 abc
2 2 22 def
3 3 33 ghi
4 4 44 jkl
5 5 55 mno
这应该很容易回答,但我找不到这个问题。 假设我们有两个数据帧 df1 和 df2。
> df1 <- data.frame(a=c(1,2,3,4,5),
+ b=c(11,22,33,44,55))
> df1
a b
1 1 11
2 2 22
3 3 33
4 4 44
5 5 55
和
> df2 <- data.frame(a=c(1,1,1,2,2,3,4,4,4,5,5),
+ c=c("abc","abc","abc","def","def","ghi","jkl","jkl","jkl","mno","mno"),
+ d=c("x","y","z","x","y","x","x","y","z","x","y"))
> df2
a c d
1 1 abc x
2 1 abc y
3 1 abc z
4 2 def x
5 2 def y
6 3 ghi x
7 4 jkl x
8 4 jkl y
9 4 jkl z
10 5 mno x
11 5 mno y
我想合并两个数据框,以便只将列 'c' 添加到 df1。 像这样:
> df3 <- data.frame(a=c(1,2,3,4,5),
+ b=c(11,22,33,44,55),
+ c=c("abc","def","ghi","jkl","mno"))
> df3
a b c
1 1 11 abc
2 2 22 def
3 3 33 ghi
4 4 44 jkl
5 5 55 mno
我已经尝试使用合并功能和左连接。 在这种情况下,我得到了这个结果:
> df5 <- merge(x = df1, y = df2[ , c("a", "c")], by = "a", all.x = TRUE)
> df5
a b c
1 1 11 abc
2 1 11 abc
3 1 11 abc
4 2 22 def
5 2 22 def
6 3 33 ghi
7 4 44 jkl
8 4 44 jkl
9 4 44 jkl
10 5 55 mno
11 5 55 mno
非常感谢您的帮助!
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df1 <- data.frame(a = c(1, 2, 3, 4, 5), b = c(11, 22, 33, 44, 55))
df2 <-
data.frame(
a = c(1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5),
c = c(
"abc",
"abc",
"abc",
"def",
"def",
"ghi",
"jkl",
"jkl",
"jkl",
"mno",
"mno"
),
d = c("x", "y", "z", "x", "y", "x", "x", "y", "z", "x", "y")
)
df1 |> inner_join(df2, by = "a") |>
select(- d) |>
distinct()
#> a b c
#> 1 1 11 abc
#> 2 2 22 def
#> 3 3 33 ghi
#> 4 4 44 jkl
#> 5 5 55 mno
由 reprex package (v2.0.1)
于 2022-04-20 创建获得这个的一种方法是(使用提供的相同 df1
和 df2
)
require(tidyverse)
df3 <- unique((inner_join(df1, select(df2, c("a","c")), by = c("a"))))
我最初使用内部联接,但 left_join 也可以工作
另一种方法是创建 df2
df2b <- df2 %>%
select(a,c) %>%
unique()
df3b <- left_join(df1, df2b, by="a")
我想你可以像下面这样使用 merge
> merge(df1,unique(df2[c("a","c")]))
a b c
1 1 11 abc
2 2 22 def
3 3 33 ghi
4 4 44 jkl
5 5 55 mno