使用 R 过滤来自一个 table 的信息以与另一个 table 的信息组合

Filtering information from one table to be combined with information of another table using R

有人可以帮我解决这个问题吗? 我有两个表,如下所示(data1 和 data2)。我想输出 (data3) 结合两者。 这个想法是select只有来自data1的每个"mir"(mir.1,mir2,...,mir25)的负值,结合来自data2的各自"mir"的信息来生成数据3。

示例:

data1 <-"P_G,mir-1,mir-2,mir-3,mir-4,mir-5,mir-6,mir-7,mir-8,mir-9,mir-10,mir-11,mir-12,mir-13,mir-14,mir-15,mir-16,mir-17,mir-18,mir-19,mir-20,mir-21,mir-22,mir-23,mir-24,mir-25
P111179_ENSBTAG00000005989,NA,0.27,NA,NA,NA,NA,NA,-0.19,0.3580,NA,0.5234,-0.58,-0.24,-0.50,NA,NA,-0.11,-0.37,NA,NA,0.41,0.20,NA,-0.32,NA
P137378_ENSBTAG00000038920,-0.09231,-0.30,NA,-0.02,0.2158,-0.16,-0.08,-0.41,-0.21,-0.21,-0.49,0.6939,-0.08,0.6030,0.6030,-0.15,0.5506,0.0810,-0.38,NA,0.36,-0.44,0.26,0.05,-0.10
P136511_8_ENSBTAG00000043999,0.40317,0.38,NA,0.6185,-0.15,0.6674,-0.47,0.0065,0.2911,0.2911,0.3999,-0.62,-0.24,-0.53,-0.53,-0.23,-0.23,-0.25,-0.07,NA,0.11,0.29,-0.54,-0.33,-0.21
P136181_1_ENSBTAG00000022991,-0.27507,-0.12,NA,-0.42,-0.01,-0.15,0.5465,-0.54,0.1782,0.1782,-0.42,0.6711,-0.23,0.4807,0.4807,-0.17,0.2977,0.3164,-0.41,0.09,0.29,0.16,-0.07,0.04,0.02
P134814_ENSBTAG00000006541,-0.4182,NA,NA,NA,-0.61,0.1221,0.2858,NA,NA,NA,NA,0.0309,NA,0.3838,0.3838,NA,NA,-0.02,NA,NA,0.14,NA,-0.13,0.19,-0.04
P133768_ENSBTAG00000021120,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
P138444_ENSBTAG00000000894,NA,NA,NA,0.0455,-0.25,NA,NA,-0.31,NA,0.5985,0.4818,NA,NA,NA,NA,-0.23,NA,-0.14,NA,NA,0.25,NA,NA,-0.30,NA
P138481_ENSBTAG00000005534,NA,NA,NA,0.1475,NA,NA,NA,0.1111,NA,0.2673,NA,NA,NA,NA,NA,NA,NA,-0.50,NA,NA,NA,NA,NA,0.10,NA
P111219_ENSBTAG00000004910,NA,NA,NA,NA,NA,0.5612,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-0.09,-0.19,NA,NA,NA,NA,NA,NA,NA"
data1 <-read.table(text=data1,sep=",",header=T)

data2 <-"mirna,expression
mir-1,1506.554
mir-2,64.385
mir-3,790.762
mir-4,13.574
mir-5,59.737
mir-6,29832.972
mir-7,10085.201
mir-8,5.81
mir-9,53.09
mir-10,53.09
mir-11,13096.997
mir-12,561.459
mir-13,0.63
mir-14,57.918
mir-15,57.918
mir-16,0.695
mir-17,75.411
mir-18,258.793
mir-19,1.16
mir-20,3.95
mir-21,24.966
mir-22,31.509
mir-23,4.391111111
mir-24,16.659
mir-25,141.242"
data2 <-read.table(text=data2,sep=",",header=T)

预期结果:

data3 <- "P_G,mirna,expression
P111179_ENSBTAG00000005989,mir-8,5.81
P111179_ENSBTAG00000005989,mir-12,561.46
P111179_ENSBTAG00000005989,mir-13,0.63
P111179_ENSBTAG00000005989,mir-14,57.92
P111179_ENSBTAG00000005989,mir-17,75.41
P111179_ENSBTAG00000005989,mir-18,258.79
P111179_ENSBTAG00000005989,mir-24,16.66
P137378_ENSBTAG00000038920,mir-1,1506.55
P137378_ENSBTAG00000038920,mir-2,64.39
P137378_ENSBTAG00000038920,mir-4,13.57
P137378_ENSBTAG00000038920,mir-6,29832.97
P137378_ENSBTAG00000038920,mir-7,10085.20
P137378_ENSBTAG00000038920,mir-8,5.81
P137378_ENSBTAG00000038920,mir-9,53.09
P137378_ENSBTAG00000038920,mir-10,53.09
P137378_ENSBTAG00000038920,mir-11,13097.00
P137378_ENSBTAG00000038920,mir-13,0.63
P137378_ENSBTAG00000038920,mir-16,0.70
P137378_ENSBTAG00000038920,mir-19,1.16
P137378_ENSBTAG00000038920,mir-22,31.51
P137378_ENSBTAG00000038920,mir-25,141.24"
data3 <-read.table(text=data3,sep=",",header=T)

我们可以 melt 第一个数据集并加入 on 第二个数据集

library(data.table)
unique(setDT(data2)[melt(setDT(data1), id.var="P_G", na.rm = TRUE, variable.name = "mirna", 
   value.name = "expression")[,
   mirna:= sub("[.]", "-", mirna)
   ][expression>0], on = "mirna"][], by = c("mirna", "expression"))