基于多列连接 DataFrames
Join DataFrames based on multiple columns
我想连接两个表。一个是 DataTable DT_1
,另一个是 DataFrame DF_2
。 DT_1
中的 name
和 value
列与 DF_2
的行和列相关。输出将是带有新列 expr
的 DT_1
,该列包含来自 DF_2
.
的相应值
View(DT_1)
CHR POS ID name value
<dbl> <chr> <chr> <chr> <dbl>
1 1 31439_T_A ENSG00000232325 indv1 1
2 1 31439_T_A ENSG00000232325 indv2 1
3 1 31439_T_A ENSG00000232325 indv3 2
4 1 31504_G_A ENSG00000242474 indv1 2
5 1 31504_G_A ENSG00000242474 indv2 1
6 1 31504_G_A ENSG00000242474 indv3 1
7 1 34713_A_C ENSG00000242474 indv1 1
8 1 34713_A_C ENSG00000242474 indv2 0
9 1 34713_A_C ENSG00000242474 indv3 1
10 1 34918_C_T ENSG00000242474 indv1 1
11 1 34918_C_T ENSG00000242474 indv2 0
12 1 34918_C_T ENSG00000242474 indv3 1
13 1 35119_G_A ENSG00000239715 indv1 0
14 1 35119_G_A ENSG00000239715 indv2 0
15 1 35119_G_A ENSG00000239715 indv3 1
view(DF_2)
name ENSG00000232325 ENSG00000242474 ENSG00000239715
indv1 0.1 0.4 0.22
indv2 0.3 -0.003 0.0001
indv3 -0.02 0.1 0.1
期望的输出:
> OUTPUT
CHR POS ID name value expr
<dbl> <chr> <chr> <chr> <dbl> <dbl>
1 1 31439_T_A ENSG00000232325 indv1 1 0.1
2 1 31439_T_A ENSG00000232325 indv2 1 0.3
3 1 31439_T_A ENSG00000232325 indv3 2 -0.02
4 1 31504_G_A ENSG00000242474 indv1 2 0.4
5 1 31504_G_A ENSG00000242474 indv2 1 -0.003
6 1 31504_G_A ENSG00000242474 indv3 1 0.1
7 1 34713_A_C ENSG00000242474 indv1 1 0.4
8 1 34713_A_C ENSG00000242474 indv2 0 -0.003
9 1 34713_A_C ENSG00000242474 indv3 1 0.1
10 1 34918_C_T ENSG00000242474 indv1 1 0.4
11 1 34918_C_T ENSG00000242474 indv2 0 -0.003
12 1 34918_C_T ENSG00000242474 indv3 1 0.1
13 1 35119_G_A ENSG00000239715 indv1 0 0.22
14 1 35119_G_A ENSG00000239715 indv2 0 0.0001
15 1 35119_G_A ENSG00000239715 indv3 1 0.1
谢谢!
library(tidyverse)
# create example data
DF_1 <- tribble(
~CHR, ~POS, ~ID, ~name, ~value,
1, "1 31439_T_A", "ENSG00000232325", "indv1", 1,
2, "1 31439_T_A", "ENSG00000232325", "indv2", 1,
3, "1 31439_T_A", "ENSG00000232325", "indv3", 2,
)
DF_2 <- tribble(
~name, ~ENSG00000232325, ~ENSG00000242474, ~ENSG00000239715,
"indv1", 0.1, 0.4, 0.22,
"indv2", 0.3, -0.003, 0.0001,
"indv3", -0.02, 0.1, 0.1
)
DF_2 %>%
pivot_longer(-name, names_to = "ID", values_to = "expr") %>%
inner_join(DF_1)
#> Joining, by = c("name", "ID")
#> # A tibble: 3 x 6
#> name ID expr CHR POS value
#> <chr> <chr> <dbl> <dbl> <chr> <dbl>
#> 1 indv1 ENSG00000232325 0.1 1 1 31439_T_A 1
#> 2 indv2 ENSG00000232325 0.3 2 1 31439_T_A 1
#> 3 indv3 ENSG00000232325 -0.02 3 1 31439_T_A 2
由 reprex package (v2.0.1)
创建于 2022-02-08
data.table接近
library(data.table)
setDT(df1)[melt(setDT(df2), id="name",value.name = "expr"), on=.(name, ID=variable)]
输出
CHR POS ID name value expr
1: 1 1 31439_T_A ENSG00000232325 indv1 1 0.1000
2: 2 1 31439_T_A ENSG00000232325 indv2 1 0.3000
3: 3 1 31439_T_A ENSG00000232325 indv3 2 -0.0200
4: 4 1 31504_G_A ENSG00000242474 indv1 2 0.4000
5: 7 1 34713_A_C ENSG00000242474 indv1 1 0.4000
6: 10 1 34918_C_T ENSG00000242474 indv1 1 0.4000
7: 5 1 31504_G_A ENSG00000242474 indv2 1 -0.0030
8: 8 1 34713_A_C ENSG00000242474 indv2 0 -0.0030
9: 11 1 34918_C_T ENSG00000242474 indv2 0 -0.0030
10: 6 1 31504_G_A ENSG00000242474 indv3 1 0.1000
11: 9 1 34713_A_C ENSG00000242474 indv3 1 0.1000
12: 12 1 34918_C_T ENSG00000242474 indv3 1 0.1000
13: 13 1 35119_G_A ENSG00000239715 indv1 0 0.2200
14: 14 1 35119_G_A ENSG00000239715 indv2 0 0.0001
15: 15 1 35119_G_A ENSG00000239715 indv3 1 0.1000
我想连接两个表。一个是 DataTable DT_1
,另一个是 DataFrame DF_2
。 DT_1
中的 name
和 value
列与 DF_2
的行和列相关。输出将是带有新列 expr
的 DT_1
,该列包含来自 DF_2
.
View(DT_1)
CHR POS ID name value
<dbl> <chr> <chr> <chr> <dbl>
1 1 31439_T_A ENSG00000232325 indv1 1
2 1 31439_T_A ENSG00000232325 indv2 1
3 1 31439_T_A ENSG00000232325 indv3 2
4 1 31504_G_A ENSG00000242474 indv1 2
5 1 31504_G_A ENSG00000242474 indv2 1
6 1 31504_G_A ENSG00000242474 indv3 1
7 1 34713_A_C ENSG00000242474 indv1 1
8 1 34713_A_C ENSG00000242474 indv2 0
9 1 34713_A_C ENSG00000242474 indv3 1
10 1 34918_C_T ENSG00000242474 indv1 1
11 1 34918_C_T ENSG00000242474 indv2 0
12 1 34918_C_T ENSG00000242474 indv3 1
13 1 35119_G_A ENSG00000239715 indv1 0
14 1 35119_G_A ENSG00000239715 indv2 0
15 1 35119_G_A ENSG00000239715 indv3 1
view(DF_2)
name ENSG00000232325 ENSG00000242474 ENSG00000239715
indv1 0.1 0.4 0.22
indv2 0.3 -0.003 0.0001
indv3 -0.02 0.1 0.1
期望的输出:
> OUTPUT
CHR POS ID name value expr
<dbl> <chr> <chr> <chr> <dbl> <dbl>
1 1 31439_T_A ENSG00000232325 indv1 1 0.1
2 1 31439_T_A ENSG00000232325 indv2 1 0.3
3 1 31439_T_A ENSG00000232325 indv3 2 -0.02
4 1 31504_G_A ENSG00000242474 indv1 2 0.4
5 1 31504_G_A ENSG00000242474 indv2 1 -0.003
6 1 31504_G_A ENSG00000242474 indv3 1 0.1
7 1 34713_A_C ENSG00000242474 indv1 1 0.4
8 1 34713_A_C ENSG00000242474 indv2 0 -0.003
9 1 34713_A_C ENSG00000242474 indv3 1 0.1
10 1 34918_C_T ENSG00000242474 indv1 1 0.4
11 1 34918_C_T ENSG00000242474 indv2 0 -0.003
12 1 34918_C_T ENSG00000242474 indv3 1 0.1
13 1 35119_G_A ENSG00000239715 indv1 0 0.22
14 1 35119_G_A ENSG00000239715 indv2 0 0.0001
15 1 35119_G_A ENSG00000239715 indv3 1 0.1
谢谢!
library(tidyverse)
# create example data
DF_1 <- tribble(
~CHR, ~POS, ~ID, ~name, ~value,
1, "1 31439_T_A", "ENSG00000232325", "indv1", 1,
2, "1 31439_T_A", "ENSG00000232325", "indv2", 1,
3, "1 31439_T_A", "ENSG00000232325", "indv3", 2,
)
DF_2 <- tribble(
~name, ~ENSG00000232325, ~ENSG00000242474, ~ENSG00000239715,
"indv1", 0.1, 0.4, 0.22,
"indv2", 0.3, -0.003, 0.0001,
"indv3", -0.02, 0.1, 0.1
)
DF_2 %>%
pivot_longer(-name, names_to = "ID", values_to = "expr") %>%
inner_join(DF_1)
#> Joining, by = c("name", "ID")
#> # A tibble: 3 x 6
#> name ID expr CHR POS value
#> <chr> <chr> <dbl> <dbl> <chr> <dbl>
#> 1 indv1 ENSG00000232325 0.1 1 1 31439_T_A 1
#> 2 indv2 ENSG00000232325 0.3 2 1 31439_T_A 1
#> 3 indv3 ENSG00000232325 -0.02 3 1 31439_T_A 2
由 reprex package (v2.0.1)
创建于 2022-02-08data.table接近
library(data.table)
setDT(df1)[melt(setDT(df2), id="name",value.name = "expr"), on=.(name, ID=variable)]
输出
CHR POS ID name value expr
1: 1 1 31439_T_A ENSG00000232325 indv1 1 0.1000
2: 2 1 31439_T_A ENSG00000232325 indv2 1 0.3000
3: 3 1 31439_T_A ENSG00000232325 indv3 2 -0.0200
4: 4 1 31504_G_A ENSG00000242474 indv1 2 0.4000
5: 7 1 34713_A_C ENSG00000242474 indv1 1 0.4000
6: 10 1 34918_C_T ENSG00000242474 indv1 1 0.4000
7: 5 1 31504_G_A ENSG00000242474 indv2 1 -0.0030
8: 8 1 34713_A_C ENSG00000242474 indv2 0 -0.0030
9: 11 1 34918_C_T ENSG00000242474 indv2 0 -0.0030
10: 6 1 31504_G_A ENSG00000242474 indv3 1 0.1000
11: 9 1 34713_A_C ENSG00000242474 indv3 1 0.1000
12: 12 1 34918_C_T ENSG00000242474 indv3 1 0.1000
13: 13 1 35119_G_A ENSG00000239715 indv1 0 0.2200
14: 14 1 35119_G_A ENSG00000239715 indv2 0 0.0001
15: 15 1 35119_G_A ENSG00000239715 indv3 1 0.1000