按行号和行名匹配 2 个数据帧,并在匹配时从第一个 df 中提取值
matching 2 data frames by row number and row name and extracting values from the first df when a match occurs
抱歉,如果标题不清楚或者我没有很好地解释这一点。
我有一个评分矩阵作为数据框,如下所示:
1 2 3 4 5 6 7 8 9 10
L 40.220674 17.3635308 17.3635308 17.3635308 9.867452 0 0.0000000 0.000000 0.0000000 0.0000000
M 29.589501 19.1056911 19.1056911 19.1056911 14.285714 0 10.0000000 6.842105 1.4736842 0.1052632
I 13.761672 10.1045296 10.1045296 10.1045296 0.000000 0 0.0000000 0.000000 0.0000000 0.0000000
Y 25.085714 21.4285714 21.4285714 21.4285714 12.223859 0 0.0000000 0.000000 0.0000000 0.0000000
W 3.555865 0.8130081 0.8130081 0.8130081 0.000000 0 0.0000000 0.000000 0.0000000 0.0000000
K 2.700859 0.2322880 0.2322880 0.2322880 1.325479 0 2.6315789 3.684211 2.6315789 2.1052632
S 8.739141 6.9105691 6.9105691 6.9105691 0.000000 0 0.0000000 0.000000 0.0000000 0.0000000
V 1.969431 0.2322880 0.2322880 0.2322880 0.000000 0 3.4736842 3.684211 2.5263158 0.1052632
每行对应一个不同的氨基酸,每列是该氨基酸在肽中的位置。
我也有许多肽的 df,指示肽每个位置的氨基酸。
pep_1 pep_2 pep_3
1 M A C
2 A C L
3 C L W
4 L W S
5 W S F
6 S F S
7 F S W
8 S W P
9 W P S
10 P S C
11 S C F
12 C F L
13 F L S
14 L S L
我正在尝试将每个肽与评分矩阵匹配,当氨基酸与评分矩阵中的位置相同时,我想导出每个肽的所有这些值并对其求和。
我尝试使用 plyr::match_df 没有成功。
是否有更高阶的函数或包可以完成此任务?欢迎提出任何建议。
谢谢!
我们可以使用 pivot_longer
将两个数据集重新整形为 'long' 格式,然后在匹配列上使用 left_join
进行连接,并将输出重新整形为 'wide' 格式 pivot_wider
library(dplyr)
library(tidyr)
library(tibble)
df2 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, values_to = 'pep') %>%
left_join(df1 %>%
rownames_to_column('pep') %>%
pivot_longer(cols = -pep, names_to = 'rn') %>%
mutate(rn = as.integer(rn))) %>%
select(-pep) %>%
pivot_wider(names_from = name, values_from = value)
数据
df1 <- structure(list(`1` = c(40.220674, 29.589501, 13.761672, 25.085714,
3.555865, 2.700859, 8.739141, 1.969431), `2` = c(17.3635308,
19.1056911, 10.1045296, 21.4285714, 0.8130081, 0.232288, 6.9105691,
0.232288), `3` = c(17.3635308, 19.1056911, 10.1045296, 21.4285714,
0.8130081, 0.232288, 6.9105691, 0.232288), `4` = c(17.3635308,
19.1056911, 10.1045296, 21.4285714, 0.8130081, 0.232288, 6.9105691,
0.232288), `5` = c(9.867452, 14.285714, 0, 12.223859, 0, 1.325479,
0, 0), `6` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `7` = c(0, 10,
0, 0, 0, 2.6315789, 0, 3.4736842), `8` = c(0, 6.842105, 0, 0,
0, 3.684211, 0, 3.684211), `9` = c(0, 1.4736842, 0, 0, 0, 2.6315789,
0, 2.5263158), `10` = c(0, 0.1052632, 0, 0, 0, 2.1052632, 0,
0.1052632)), class = "data.frame", row.names = c("L", "M", "I",
"Y", "W", "K", "S", "V"))
df2 <- structure(list(pep_1 = c("M", "A", "C", "L", "W", "S", "F", "S",
"W", "P", "S", "C", "F", "L"), pep_2 = c("A", "C", "L", "W",
"S", "F", "S", "W", "P", "S", "C", "F", "L", "S"), pep_3 = c("C",
"L", "W", "S", "F", "S", "W", "P", "S", "C", "F", "L", "S", "L"
)), class = "data.frame", row.names = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10", "11", "12", "13", "14"))
抱歉,如果标题不清楚或者我没有很好地解释这一点。
我有一个评分矩阵作为数据框,如下所示:
1 2 3 4 5 6 7 8 9 10
L 40.220674 17.3635308 17.3635308 17.3635308 9.867452 0 0.0000000 0.000000 0.0000000 0.0000000
M 29.589501 19.1056911 19.1056911 19.1056911 14.285714 0 10.0000000 6.842105 1.4736842 0.1052632
I 13.761672 10.1045296 10.1045296 10.1045296 0.000000 0 0.0000000 0.000000 0.0000000 0.0000000
Y 25.085714 21.4285714 21.4285714 21.4285714 12.223859 0 0.0000000 0.000000 0.0000000 0.0000000
W 3.555865 0.8130081 0.8130081 0.8130081 0.000000 0 0.0000000 0.000000 0.0000000 0.0000000
K 2.700859 0.2322880 0.2322880 0.2322880 1.325479 0 2.6315789 3.684211 2.6315789 2.1052632
S 8.739141 6.9105691 6.9105691 6.9105691 0.000000 0 0.0000000 0.000000 0.0000000 0.0000000
V 1.969431 0.2322880 0.2322880 0.2322880 0.000000 0 3.4736842 3.684211 2.5263158 0.1052632
每行对应一个不同的氨基酸,每列是该氨基酸在肽中的位置。
我也有许多肽的 df,指示肽每个位置的氨基酸。
pep_1 pep_2 pep_3
1 M A C
2 A C L
3 C L W
4 L W S
5 W S F
6 S F S
7 F S W
8 S W P
9 W P S
10 P S C
11 S C F
12 C F L
13 F L S
14 L S L
我正在尝试将每个肽与评分矩阵匹配,当氨基酸与评分矩阵中的位置相同时,我想导出每个肽的所有这些值并对其求和。
我尝试使用 plyr::match_df 没有成功。
是否有更高阶的函数或包可以完成此任务?欢迎提出任何建议。
谢谢!
我们可以使用 pivot_longer
将两个数据集重新整形为 'long' 格式,然后在匹配列上使用 left_join
进行连接,并将输出重新整形为 'wide' 格式 pivot_wider
library(dplyr)
library(tidyr)
library(tibble)
df2 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn, values_to = 'pep') %>%
left_join(df1 %>%
rownames_to_column('pep') %>%
pivot_longer(cols = -pep, names_to = 'rn') %>%
mutate(rn = as.integer(rn))) %>%
select(-pep) %>%
pivot_wider(names_from = name, values_from = value)
数据
df1 <- structure(list(`1` = c(40.220674, 29.589501, 13.761672, 25.085714,
3.555865, 2.700859, 8.739141, 1.969431), `2` = c(17.3635308,
19.1056911, 10.1045296, 21.4285714, 0.8130081, 0.232288, 6.9105691,
0.232288), `3` = c(17.3635308, 19.1056911, 10.1045296, 21.4285714,
0.8130081, 0.232288, 6.9105691, 0.232288), `4` = c(17.3635308,
19.1056911, 10.1045296, 21.4285714, 0.8130081, 0.232288, 6.9105691,
0.232288), `5` = c(9.867452, 14.285714, 0, 12.223859, 0, 1.325479,
0, 0), `6` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), `7` = c(0, 10,
0, 0, 0, 2.6315789, 0, 3.4736842), `8` = c(0, 6.842105, 0, 0,
0, 3.684211, 0, 3.684211), `9` = c(0, 1.4736842, 0, 0, 0, 2.6315789,
0, 2.5263158), `10` = c(0, 0.1052632, 0, 0, 0, 2.1052632, 0,
0.1052632)), class = "data.frame", row.names = c("L", "M", "I",
"Y", "W", "K", "S", "V"))
df2 <- structure(list(pep_1 = c("M", "A", "C", "L", "W", "S", "F", "S",
"W", "P", "S", "C", "F", "L"), pep_2 = c("A", "C", "L", "W",
"S", "F", "S", "W", "P", "S", "C", "F", "L", "S"), pep_3 = c("C",
"L", "W", "S", "F", "S", "W", "P", "S", "C", "F", "L", "S", "L"
)), class = "data.frame", row.names = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10", "11", "12", "13", "14"))