合并嵌套列表中的数据框
Combine data frames from a nested list
我在使用简洁的语法组合嵌套列表中包含的数据框时遇到问题。我有以下形式的嵌套列表:
nestedList <- lapply(1:3,function(y){
lapply(1:8,function(z){
data.frame(matrix(rnorm(20), nrow=10))
})
})
因此 nestedList
包含 3 个列表,每个列表包含 8 个带有数据框的列表。我想按如下方式合并列表:
tmp1 <- nestedList[[1]][[1]]
tmp2 <- nestedList[[2]][[1]]
tmp3 <- nestedList[[3]][[1]]
expectedResult <- rbind(tmp1,tmp2,tmp3)
我原以为以下语法是有效的,但显然不是:
unexpectedResult <- rbind(nestedList[[1:3]][[1]])
试试这个。
foo <- lapply(nestedList, function(x) x[[1]])
this <- do.call("rbind", foo)
do.call(rbind, lapply(nestedList[1:3], '[[', 1))
会成功的:
set.seed(123)
nestedList <- lapply(1:5,function(y){
lapply(1:8,function(z){
data.frame(matrix(rnorm(20), nrow=10))
})
})
> do.call(rbind, lapply(nestedList[1:3], '[[', 1))
X1 X2
1 -0.56047565 1.22408180
2 -0.23017749 0.35981383
3 1.55870831 0.40077145
4 0.07050839 0.11068272
5 0.12928774 -0.55584113
6 1.71506499 1.78691314
7 0.46091621 0.49785048
8 -1.26506123 -1.96661716
9 -0.68685285 0.70135590
10 -0.44566197 -0.47279141
11 1.05271147 -0.21538051
12 -1.04917701 0.06529303
13 -1.26015524 -0.03406725
14 3.24103993 2.12845190
15 -0.41685759 -0.74133610
16 0.29822759 -1.09599627
17 0.63656967 0.03778840
18 -0.48378063 0.31048075
19 0.51686204 0.43652348
20 0.36896453 -0.45836533
21 0.23743027 1.01755864
22 1.21810861 -1.18843404
23 -1.33877429 -0.72160444
24 0.66082030 1.51921771
25 -0.52291238 0.37738797
26 0.68374552 -2.05222282
27 -0.06082195 -1.36403745
28 0.63296071 -0.20078102
29 1.33551762 0.86577940
30 0.00729009 -0.10188326
我使用 purrr
提出了以下解决方案
my_result <- nestedList %>%
# extract first dataframe from each nested list
map(`[[`, 1) %>%
# bind rows together
bind_rows()
并测试结果是否正确
identical(my_result, expectedResult)
[1] TRUE
我想指出 data.table
中的 rbindlist
函数。这个函数通常比基数 rbind
更有效
library(data.table)
rbindlist(unlist(nestedList, recursive = F))
# Performance comparison
microbenchmark(times = 1000,
datatable_rbind = rbindlist(unlist(nestedList, recursive = F)),
base_rbind = do.call("rbind", lapply(nestedList, function(x) x[[1]])),
base_rbind2 = do.call(rbind, lapply(nestedList[1:3], '[[', 1))
)
# Unit: microseconds
# expr min lq mean median uq max neval
# datatable_rbind 85.530 109.397 124.5534 124.3035 141.1110 216.816 1000
# base_rbind 135.037 152.035 190.5976 184.8475 201.0455 5912.946 1000
# base_rbind2 136.196 151.783 179.9393 186.4245 200.4225 347.564 1000
我在使用简洁的语法组合嵌套列表中包含的数据框时遇到问题。我有以下形式的嵌套列表:
nestedList <- lapply(1:3,function(y){
lapply(1:8,function(z){
data.frame(matrix(rnorm(20), nrow=10))
})
})
因此 nestedList
包含 3 个列表,每个列表包含 8 个带有数据框的列表。我想按如下方式合并列表:
tmp1 <- nestedList[[1]][[1]]
tmp2 <- nestedList[[2]][[1]]
tmp3 <- nestedList[[3]][[1]]
expectedResult <- rbind(tmp1,tmp2,tmp3)
我原以为以下语法是有效的,但显然不是:
unexpectedResult <- rbind(nestedList[[1:3]][[1]])
试试这个。
foo <- lapply(nestedList, function(x) x[[1]])
this <- do.call("rbind", foo)
do.call(rbind, lapply(nestedList[1:3], '[[', 1))
会成功的:
set.seed(123)
nestedList <- lapply(1:5,function(y){
lapply(1:8,function(z){
data.frame(matrix(rnorm(20), nrow=10))
})
})
> do.call(rbind, lapply(nestedList[1:3], '[[', 1))
X1 X2
1 -0.56047565 1.22408180
2 -0.23017749 0.35981383
3 1.55870831 0.40077145
4 0.07050839 0.11068272
5 0.12928774 -0.55584113
6 1.71506499 1.78691314
7 0.46091621 0.49785048
8 -1.26506123 -1.96661716
9 -0.68685285 0.70135590
10 -0.44566197 -0.47279141
11 1.05271147 -0.21538051
12 -1.04917701 0.06529303
13 -1.26015524 -0.03406725
14 3.24103993 2.12845190
15 -0.41685759 -0.74133610
16 0.29822759 -1.09599627
17 0.63656967 0.03778840
18 -0.48378063 0.31048075
19 0.51686204 0.43652348
20 0.36896453 -0.45836533
21 0.23743027 1.01755864
22 1.21810861 -1.18843404
23 -1.33877429 -0.72160444
24 0.66082030 1.51921771
25 -0.52291238 0.37738797
26 0.68374552 -2.05222282
27 -0.06082195 -1.36403745
28 0.63296071 -0.20078102
29 1.33551762 0.86577940
30 0.00729009 -0.10188326
我使用 purrr
my_result <- nestedList %>%
# extract first dataframe from each nested list
map(`[[`, 1) %>%
# bind rows together
bind_rows()
并测试结果是否正确
identical(my_result, expectedResult)
[1] TRUE
我想指出 data.table
中的 rbindlist
函数。这个函数通常比基数 rbind
library(data.table)
rbindlist(unlist(nestedList, recursive = F))
# Performance comparison
microbenchmark(times = 1000,
datatable_rbind = rbindlist(unlist(nestedList, recursive = F)),
base_rbind = do.call("rbind", lapply(nestedList, function(x) x[[1]])),
base_rbind2 = do.call(rbind, lapply(nestedList[1:3], '[[', 1))
)
# Unit: microseconds
# expr min lq mean median uq max neval
# datatable_rbind 85.530 109.397 124.5534 124.3035 141.1110 216.816 1000
# base_rbind 135.037 152.035 190.5976 184.8475 201.0455 5912.946 1000
# base_rbind2 136.196 151.783 179.9393 186.4245 200.4225 347.564 1000