R:根据另一个数据框过滤数据框

R: Filter a dataframe based on another dataframe

我想根据 pf df 对 e df 进行子集化。这两个数据框共享相同的行名。我的代码添加了一个“row_names”列,这是我不想要的。

library(dplyr)

e <- e[, !row.names(e) %in% row.names(pf)]


> dput(e[1:10, 1:10])
structure(list(row_names = c("2315374", "2315376", "2315587", 
"2315588", "2315591", "2315595", "2315598", "2315603", "2315604", 
"2315640"), JHU_113_2.CEL = c(6.28274, 5.81678, 8.88557, 6.2865, 
6.97515, 5.94179, 4.9942, 7.67845, 6.20952, 5.85307), JHU_144.CEL = c(6.79161, 
5.71165, 8.95699, 6.6675, 6.61705, 5.39178, 5.56888, 7.90005, 
6.59687, 6.07303), JHU_173.CEL = c(6.11265, 6.02794, 8.36898, 
6.07503, 6.51994, 5.09497, 5.57912, 7.47594, 6.14608, 6.41875
), JHU_176R.CEL = c(6.13997, 5.37082, 8.28993, 6.76625, 6.74982, 
4.96199, 5.4396, 6.75087, 5.70518, 6.07282), JHU_182.CEL = c(6.68056, 
5.95527, 8.41361, 6.19819, 6.60917, 2.96431, 5.19249, 7.62805, 
6.49572, 6.28283), JHU_186.CEL = c(6.48156, 5.75999, 8.6498, 
6.8426, 6.55182, 4.95204, 5.87991, 8.00069, 6.12622, 6.13699), 
    JHU_187.CEL = c(6.45415, 5.87863, 8.74305, 6.13916, 6.6224, 
    5.00979, 5.6054, 7.34296, 6.2369, 6.16377), JHU_188.CEL = c(6.04542, 
    5.5483, 8.31915, 6.40219, 6.44394, 4.06493, 5.09513, 6.81338, 
    6.39569, 6.48616), JHU_203.CEL = c(5.99176, 6.35571, 8.43548, 
    6.45059, 5.76592, 5.38048, 5.43618, 7.52014, 6.70869, 6.34162
    )), row.names = c(NA, 10L), class = "data.frame")

> dput(pf[1:10, 1:10])
structure(list(JHU_113_2.CEL = c(0.0073, 0.04079, 0.02234, 0.08252, 
0.00127, 0.00256, 0.04265, 0.02244, 0.02615, 0.04563), JHU_144.CEL = c(0.00293, 
0.01525, 0.00914, 0.03644, 0.00162, 0.01274, 0.00798, 0.00955, 
0.00732, 0.02698), JHU_173.CEL = c(0.03034, 0.0309, 0.05024, 
0.09374, 0.00548, 0.04754, 0.03491, 0.03399, 0.02661, 0.01546
), JHU_176R.CEL = c(0.02571, 0.08493, 0.06918, 0.0337, 0.00945, 
0.03185, 0.03182, 0.15281, 0.04117, 0.03758), JHU_182.CEL = c(0.00436, 
0.01303, 0.0521, 0.04909, 0.00341, 0.45759, 0.02033, 0.01911, 
0.01037, 0.01974), JHU_186.CEL = c(0.00788, 0.03559, 0.02822, 
0.02419, 0.00383, 0.02401, 0.00727, 0.0126, 0.01663, 0.02291), 
    JHU_187.CEL = c(0.0052, 0.02163, 0.02401, 0.07845, 0.0028, 
    0.01582, 0.008, 0.0432, 0.01312, 0.03307), JHU_188.CEL = c(0.01704, 
    0.01586, 0.06648, 0.09027, 0.00882, 0.21313, 0.03859, 0.1424, 
    0.01815, 0.01586), JHU_203.CEL = c(0.03273, 0.04264, 0.06823, 
    0.09449, 0.06553, 0.05349, 0.06481, 0.07518, 0.02777, 0.07511
    ), JHU_205.CEL = c(0.0672, 0.05689, 0.1278, 0.12526, 0.03327, 
    0.10091, 0.42112, 0.18431, 0.08595, 0.01157)), row.names = c(2315374L, 
2315376L, 2315587L, 2315588L, 2315591L, 2315595L, 2315598L, 2315603L, 
2315604L, 2315640L), class = "data.frame")

如果您只想保留 e 中出现在 pf 中的行名(或者不出现,则使用 !rownames(e)),那么您可以 filter 在行名上:

library(tidyverse)

e %>% 
  filter(rownames(e) %in% rownames(pf))

另一种可能性是为两个数据框创建一个行名列。然后,我们可以对行名执行 semi_join(即 rn)。然后,将 rn 列转换回行名。

library(tidyverse)

list(e, pf) %>% 
  map(~ .x %>% 
        as.data.frame %>%
        rownames_to_column('rn')) %>% 
  reduce(full_join, by = 'rn') %>%
  column_to_rownames('rn')

输出

        JHU_113_2.CEL JHU_144.CEL JHU_173.CEL JHU_176R.CEL JHU_182.CEL JHU_186.CEL JHU_187.CEL JHU_188.CEL JHU_203.CEL
2315374       6.28274     6.79161     6.11265      6.13997     6.68056     6.48156     6.45415     6.04542     5.99176
2315376       5.81678     5.71165     6.02794      5.37082     5.95527     5.75999     5.87863     5.54830     6.35571
2315587       8.88557     8.95699     8.36898      8.28993     8.41361     8.64980     8.74305     8.31915     8.43548
2315588       6.28650     6.66750     6.07503      6.76625     6.19819     6.84260     6.13916     6.40219     6.45059
2315591       6.97515     6.61705     6.51994      6.74982     6.60917     6.55182     6.62240     6.44394     5.76592
2315595       5.94179     5.39178     5.09497      4.96199     2.96431     4.95204     5.00979     4.06493     5.38048
2315598       4.99420     5.56888     5.57912      5.43960     5.19249     5.87991     5.60540     5.09513     5.43618
2315603       7.67845     7.90005     7.47594      6.75087     7.62805     8.00069     7.34296     6.81338     7.52014
2315604       6.20952     6.59687     6.14608      5.70518     6.49572     6.12622     6.23690     6.39569     6.70869
2315640       5.85307     6.07303     6.41875      6.07282     6.28283     6.13699     6.16377     6.48616     6.34162