如何循环遍历 R 中的大列表?
How to loop over a large list in R?
我有一个包含 300 个名称的 Largelist data
和一个包含值的数据框 data6
。
Largelist data
如下所示:
dput(data$name1)
c("55024", "29126", "3732", "1960", "79368", "115352", "10875",
"2530", "348654", "3070", "29969", "9124", "10125", "143686",
"6504", "25992", "26137")
dput(data$name2)
c("323", "836", "9976", "1407", "1840", "2289", "2317", "2739",
"8337", "10964", "3572", "3693", "4023", "4058", "124540", "4638",
"5214", "6238", "8115", "7049", "8459", "10791", "55884", "7494",
"7535")
而 data6
数据框如下所示:
dput(data6)
structure(list(sample1 = c(1.797420114, 2.441058007, 2.072991117,
1.976459487, 2.153422414, 3.19334551, 1.293754109, 2.42458892,
1.617599756, 1.706955406, 1.333884316, 2.734852229, 2.727135936,
1.246081115, 1.729863031, 1.39318281, 1.540910675, 2.159654566,
3.12446713, 2.942906309, 1.459526471, 3.080694772, 2.659254927,
2.654413352, 2.539851295, 3.021233948), sample2 = c(1.893457121,
2.515794451, 2.110657811, 1.960663021, 2.00443825, 3.117625404,
1.157262746, 2.409548272, 1.458719384, 1.855561189, 1.263697526,
2.756331664, 2.621355832, 1.21049778, 1.848646261, 1.247975834,
1.725660164, 2.294596029, 3.183559871, 3.000828084, 1.638529898,
2.965147948, 2.69031941, 2.604888992, 2.695617226, 3.024335517
), sample3 = c(2.064216543, 2.590740583, 1.739272353, 2.026633201,
2.11544319, 3.208407354, 1.275654593, 2.360166877, 1.784837002,
1.795149114, 1.289316392, 2.747176531, 2.7794964, 1.142444503,
1.569541472, 1.298843972, 1.770285058, 2.443448795, 3.089447481,
2.910612462, 1.236543553, 3.166973944, 2.66008316, 2.734142725,
2.626122207, 2.987731358), sample4 = c(1.888713323, 2.398032239,
2.142526217, 1.950883861, 2.023021668, 3.287045461, 1.256008953,
2.394323766, 1.546403414, 1.800866081, 1.430312275, 2.754314046,
2.687826087, 1.218387918, 1.696783819, 1.223212096, 1.583623673,
2.211686144, 3.118032952, 2.956183327, 1.398410126, 3.114571708,
2.725648336, 2.743968119, 2.612508141, 3.012197931), sample5 = c(1.900294551,
2.44526815, 1.907689891, 2.001029682, 2.114359656, 3.204535394,
1.254469716, 2.306233714, 1.780838868, 1.830477155, 1.359595123,
2.730898706, 2.619532527, 1.151552352, 1.643335806, 1.19766925,
1.747261813, 2.318571867, 3.135500762, 2.914971673, 1.460149594,
3.000380417, 2.689031735, 2.612427905, 2.714124436, 3.045352789
)), row.names = c(18723L, 55024L, 29126L, 323L, 836L, 2739L,
3732L, 7494L, 7535L, 2289L, 10875L, 11033L, 1960L, 4023L, 8115L,
8337L, 1407L, 133443L, 143686L, 25992L, 6504L, 25992L, 23679L,
35467L, 11023L, 26137L), class = "data.frame")
以及我使用的代码:
df <- as.data.frame(data$name1)
# selected
Sig <- unique(df$name1)
library(GSVA)
set.seed(8, sample.kind = "Rounding")
core <- gsva(expr=as.matrix(data6),
gset.idx.list=list(Sig), method="ssgsea")
core2 <- as.data.frame(t(core))
colnames(core2)[1] <- "name1"
最后 core2
如下所示:
name1
sample1 2.57
sample2 2.32
sample3 2.15
sample4 2.56
sample5 1.67
我想对列表中的所有其他 299 个名称重复上述代码 data
。因此,对于 name2,我想再次执行以下操作:
df <- as.data.frame(data$name2)
# selected
Sig <- unique(df$name2)
library(GSVA)
set.seed(8, sample.kind = "Rounding")
core <- gsva(expr=as.matrix(data6),
gset.idx.list=list(Sig), method="ssgsea")
core2 <- as.data.frame(t(core))
colnames(core2)[1] <- "name2"
现在 core2
name2
name2
sample1 3.57
sample2 2.42
sample3 2.87
sample4 2.16
sample5 4.67
与其对每个列表执行此操作并获得每个列表的最终结果,我想循环并为列表中的所有 300 个名称生成 core2
结果作为单个数据框。
如何在 R 中执行此操作?
我们可以使用 lapply/Map
遍历 'data' 的列,应用该函数,然后 cbind
list
元素
library(GSVA)
m1 <- as.matrix(data6)
out <- do.call(cbind, Map(\(x, y) {
# selected
Sig <- unique(x)
set.seed(8, sample.kind = "Rounding")
core <- gsva(expr=m1,
gset.idx.list=list(Sig), method="ssgsea")
core2 <- as.data.frame(t(core))
colnames(core2)[1] <- y
core2
}, data, names(data)))
|=======================================================================================================================================| 100%
|=======================================================================================================================================| 100%
[1] "Normalizing..."
-输出
> out
name1 name2
sample1 1.960636 -4.549292
sample2 2.380813 -5.152423
sample3 1.380813 -4.152423
sample4 1.881693 -4.900299
sample5 1.797915 -4.733601
我有一个包含 300 个名称的 Largelist data
和一个包含值的数据框 data6
。
Largelist data
如下所示:
dput(data$name1)
c("55024", "29126", "3732", "1960", "79368", "115352", "10875",
"2530", "348654", "3070", "29969", "9124", "10125", "143686",
"6504", "25992", "26137")
dput(data$name2)
c("323", "836", "9976", "1407", "1840", "2289", "2317", "2739",
"8337", "10964", "3572", "3693", "4023", "4058", "124540", "4638",
"5214", "6238", "8115", "7049", "8459", "10791", "55884", "7494",
"7535")
而 data6
数据框如下所示:
dput(data6)
structure(list(sample1 = c(1.797420114, 2.441058007, 2.072991117,
1.976459487, 2.153422414, 3.19334551, 1.293754109, 2.42458892,
1.617599756, 1.706955406, 1.333884316, 2.734852229, 2.727135936,
1.246081115, 1.729863031, 1.39318281, 1.540910675, 2.159654566,
3.12446713, 2.942906309, 1.459526471, 3.080694772, 2.659254927,
2.654413352, 2.539851295, 3.021233948), sample2 = c(1.893457121,
2.515794451, 2.110657811, 1.960663021, 2.00443825, 3.117625404,
1.157262746, 2.409548272, 1.458719384, 1.855561189, 1.263697526,
2.756331664, 2.621355832, 1.21049778, 1.848646261, 1.247975834,
1.725660164, 2.294596029, 3.183559871, 3.000828084, 1.638529898,
2.965147948, 2.69031941, 2.604888992, 2.695617226, 3.024335517
), sample3 = c(2.064216543, 2.590740583, 1.739272353, 2.026633201,
2.11544319, 3.208407354, 1.275654593, 2.360166877, 1.784837002,
1.795149114, 1.289316392, 2.747176531, 2.7794964, 1.142444503,
1.569541472, 1.298843972, 1.770285058, 2.443448795, 3.089447481,
2.910612462, 1.236543553, 3.166973944, 2.66008316, 2.734142725,
2.626122207, 2.987731358), sample4 = c(1.888713323, 2.398032239,
2.142526217, 1.950883861, 2.023021668, 3.287045461, 1.256008953,
2.394323766, 1.546403414, 1.800866081, 1.430312275, 2.754314046,
2.687826087, 1.218387918, 1.696783819, 1.223212096, 1.583623673,
2.211686144, 3.118032952, 2.956183327, 1.398410126, 3.114571708,
2.725648336, 2.743968119, 2.612508141, 3.012197931), sample5 = c(1.900294551,
2.44526815, 1.907689891, 2.001029682, 2.114359656, 3.204535394,
1.254469716, 2.306233714, 1.780838868, 1.830477155, 1.359595123,
2.730898706, 2.619532527, 1.151552352, 1.643335806, 1.19766925,
1.747261813, 2.318571867, 3.135500762, 2.914971673, 1.460149594,
3.000380417, 2.689031735, 2.612427905, 2.714124436, 3.045352789
)), row.names = c(18723L, 55024L, 29126L, 323L, 836L, 2739L,
3732L, 7494L, 7535L, 2289L, 10875L, 11033L, 1960L, 4023L, 8115L,
8337L, 1407L, 133443L, 143686L, 25992L, 6504L, 25992L, 23679L,
35467L, 11023L, 26137L), class = "data.frame")
以及我使用的代码:
df <- as.data.frame(data$name1)
# selected
Sig <- unique(df$name1)
library(GSVA)
set.seed(8, sample.kind = "Rounding")
core <- gsva(expr=as.matrix(data6),
gset.idx.list=list(Sig), method="ssgsea")
core2 <- as.data.frame(t(core))
colnames(core2)[1] <- "name1"
最后 core2
如下所示:
name1
sample1 2.57
sample2 2.32
sample3 2.15
sample4 2.56
sample5 1.67
我想对列表中的所有其他 299 个名称重复上述代码 data
。因此,对于 name2,我想再次执行以下操作:
df <- as.data.frame(data$name2)
# selected
Sig <- unique(df$name2)
library(GSVA)
set.seed(8, sample.kind = "Rounding")
core <- gsva(expr=as.matrix(data6),
gset.idx.list=list(Sig), method="ssgsea")
core2 <- as.data.frame(t(core))
colnames(core2)[1] <- "name2"
现在 core2
name2
name2
sample1 3.57
sample2 2.42
sample3 2.87
sample4 2.16
sample5 4.67
与其对每个列表执行此操作并获得每个列表的最终结果,我想循环并为列表中的所有 300 个名称生成 core2
结果作为单个数据框。
如何在 R 中执行此操作?
我们可以使用 lapply/Map
遍历 'data' 的列,应用该函数,然后 cbind
list
元素
library(GSVA)
m1 <- as.matrix(data6)
out <- do.call(cbind, Map(\(x, y) {
# selected
Sig <- unique(x)
set.seed(8, sample.kind = "Rounding")
core <- gsva(expr=m1,
gset.idx.list=list(Sig), method="ssgsea")
core2 <- as.data.frame(t(core))
colnames(core2)[1] <- y
core2
}, data, names(data)))
|=======================================================================================================================================| 100%
|=======================================================================================================================================| 100%
[1] "Normalizing..."
-输出
> out
name1 name2
sample1 1.960636 -4.549292
sample2 2.380813 -5.152423
sample3 1.380813 -4.152423
sample4 1.881693 -4.900299
sample5 1.797915 -4.733601