将 rbindlists 用于 R 中的嵌套列表
using rbindlists for nested lists in R
我有一个嵌套列表,如下所示。这是一个简化的示例,我的原始数据有数百个项目。
mtcars
mtcars <- mtcars[1:15,]
mtcars <- as.data.table(mtcars)
iris
iris <- iris[1:10,]
iris <- as.data.table(iris)
result <- vector("list", 3)
names <- c("Item 1", "Item 2", "Item 3")
names(result) <- names
testList1 <- list(mtcars, iris)
rm(result)
result <- list(Item1 = list(cars = mtcars,
flowers = iris),
Item2 = list(cars = mtcars,
flowers = iris),
Item3 = list(cars = mtcars,
flowers = iris))
我想总结结果,这样我就变成了两个 data.tables:一个用于 flowers
,另一个用于 cars
。
我尝试使用 result <- rbindlist(result, use.names = TRUE, fill = TRUE)
,但结果出错。
期望的结果是:
testList1 <- list(iris, iris, iris)
flowers <- rbindlist(testList1, use.names = TRUE, fill = TRUE)
flowers
testList1 <- list(mtcars, mtcars, mtcars)
cars <- rbindlist(testList1, use.names = TRUE, fill = TRUE)
cars
考虑到原始 results
比示例大得多,我如何绑定嵌套列表?
您可以使用 purrr
的 transpose
和 dplyr
的 bind_rows
。
library(purrr)
transpose(result) %>% map(dplyr::bind_rows)
#$cars
# mpg cyl disp hp drat wt qsec vs am gear carb
# 1: 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
# 2: 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
# 3: 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
# 4: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
# 5: 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
#...
#$flowers
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1: 5.1 3.5 1.4 0.2 setosa
# 2: 4.9 3.0 1.4 0.2 setosa
# 3: 4.7 3.2 1.3 0.2 setosa
# 4: 4.6 3.1 1.5 0.2 setosa
# 5: 5.0 3.6 1.4 0.2 setosa
#...
我们可以添加一个 'id'
列:
transpose(result) %>% map(dplyr::bind_rows, .id = "id")
#$cars
# mpg cyl disp hp drat wt qsec vs am gear carb id
# 1: 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 Item1
# 2: 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Item1
# 3: 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 Item1
#...
#...
#43: 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 Item3
#44: 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 Item3
#45: 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Item3
# mpg cyl disp hp drat wt qsec vs am gear carb id
#$flowers
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species id
# 1: 5.1 3.5 1.4 0.2 setosa Item1
# 2: 4.9 3.0 1.4 0.2 setosa Item1
# 3: 4.7 3.2 1.3 0.2 setosa Item1
#...
#29: 4.4 2.9 1.4 0.2 setosa Item3
#30: 4.9 3.1 1.5 0.1 setosa Item3
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species id
如果我们遍历内部列表names
,我们可以使用rbindlist
library(data.table)
lapply(setNames(names(result[[1]]), names(result[[1]])),
function(nm) rbindlist(lapply(result, `[[`, nm)))
#$cars
# mpg cyl disp hp drat wt qsec vs am gear carb
# 1: 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
# 2: 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
# 3: 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
# 4: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
# 5: 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
# 6: 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
# 7: 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
# 8: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
# 9: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
# ...
#$flowers
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1: 5.1 3.5 1.4 0.2 setosa
# 2: 4.9 3.0 1.4 0.2 setosa
# 3: 4.7 3.2 1.3 0.2 setosa
# 4: 4.6 3.1 1.5 0.2 setosa
# 5: 5.0 3.6 1.4 0.2 setosa
# 6: 5.4 3.9 1.7 0.4 setosa
# 7: 4.6 3.4 1.4 0.3 setosa
# 8: 5.0 3.4 1.5 0.2 setosa
# ..
如果我们需要 'id',则在 rbindlist
中指定 idcol
选项
lapply(setNames(names(result[[1]]), names(result[[1]])),
function(nm) rbindlist(lapply(result, `[[`, nm), idcol = 'id'))
我有一个嵌套列表,如下所示。这是一个简化的示例,我的原始数据有数百个项目。
mtcars
mtcars <- mtcars[1:15,]
mtcars <- as.data.table(mtcars)
iris
iris <- iris[1:10,]
iris <- as.data.table(iris)
result <- vector("list", 3)
names <- c("Item 1", "Item 2", "Item 3")
names(result) <- names
testList1 <- list(mtcars, iris)
rm(result)
result <- list(Item1 = list(cars = mtcars,
flowers = iris),
Item2 = list(cars = mtcars,
flowers = iris),
Item3 = list(cars = mtcars,
flowers = iris))
我想总结结果,这样我就变成了两个 data.tables:一个用于 flowers
,另一个用于 cars
。
我尝试使用 result <- rbindlist(result, use.names = TRUE, fill = TRUE)
,但结果出错。
期望的结果是:
testList1 <- list(iris, iris, iris)
flowers <- rbindlist(testList1, use.names = TRUE, fill = TRUE)
flowers
testList1 <- list(mtcars, mtcars, mtcars)
cars <- rbindlist(testList1, use.names = TRUE, fill = TRUE)
cars
考虑到原始 results
比示例大得多,我如何绑定嵌套列表?
您可以使用 purrr
的 transpose
和 dplyr
的 bind_rows
。
library(purrr)
transpose(result) %>% map(dplyr::bind_rows)
#$cars
# mpg cyl disp hp drat wt qsec vs am gear carb
# 1: 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
# 2: 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
# 3: 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
# 4: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
# 5: 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
#...
#$flowers
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1: 5.1 3.5 1.4 0.2 setosa
# 2: 4.9 3.0 1.4 0.2 setosa
# 3: 4.7 3.2 1.3 0.2 setosa
# 4: 4.6 3.1 1.5 0.2 setosa
# 5: 5.0 3.6 1.4 0.2 setosa
#...
我们可以添加一个 'id'
列:
transpose(result) %>% map(dplyr::bind_rows, .id = "id")
#$cars
# mpg cyl disp hp drat wt qsec vs am gear carb id
# 1: 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 Item1
# 2: 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Item1
# 3: 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 Item1
#...
#...
#43: 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 Item3
#44: 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 Item3
#45: 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Item3
# mpg cyl disp hp drat wt qsec vs am gear carb id
#$flowers
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species id
# 1: 5.1 3.5 1.4 0.2 setosa Item1
# 2: 4.9 3.0 1.4 0.2 setosa Item1
# 3: 4.7 3.2 1.3 0.2 setosa Item1
#...
#29: 4.4 2.9 1.4 0.2 setosa Item3
#30: 4.9 3.1 1.5 0.1 setosa Item3
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species id
如果我们遍历内部列表names
,我们可以使用rbindlist
library(data.table)
lapply(setNames(names(result[[1]]), names(result[[1]])),
function(nm) rbindlist(lapply(result, `[[`, nm)))
#$cars
# mpg cyl disp hp drat wt qsec vs am gear carb
# 1: 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
# 2: 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
# 3: 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
# 4: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
# 5: 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
# 6: 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
# 7: 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
# 8: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
# 9: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
# ...
#$flowers
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1: 5.1 3.5 1.4 0.2 setosa
# 2: 4.9 3.0 1.4 0.2 setosa
# 3: 4.7 3.2 1.3 0.2 setosa
# 4: 4.6 3.1 1.5 0.2 setosa
# 5: 5.0 3.6 1.4 0.2 setosa
# 6: 5.4 3.9 1.7 0.4 setosa
# 7: 4.6 3.4 1.4 0.3 setosa
# 8: 5.0 3.4 1.5 0.2 setosa
# ..
如果我们需要 'id',则在 rbindlist
idcol
选项
lapply(setNames(names(result[[1]]), names(result[[1]])),
function(nm) rbindlist(lapply(result, `[[`, nm), idcol = 'id'))