如何使用名称连接列表中的 data.frame?
How to concatenate data.frame inside lists by using names?
我必须导入 1,000 多个 excel 文件,每个 excel 包含多个 sheet(有些具有相同的 sheet 名称,有些具有不同的 sheet 个名字)。
下面举个小例子
games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list1 <- list(games, weather, cars)
names(list1) <- c('games', 'weather', 'cars')
games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
sport <- data.frame(index = c(1,2,3), interest = c('swim', 'soccer', 'rugby'))
list2 <- list(games, weather, sport)
names(list2) <- c('games', 'weather', 'sport')
list3 <- list(games, weather)
names(list3) <- c('games', 'weather')
rm(games, sport, weather, cars) # clean envir from unneeded stuff
我正在寻找使用列表名称合并列表的方法。我曾尝试使用 merge()
和 mapply()
,但它们没有 return 我想要的
我要的return如下:
$`games`
# A tibble: 6 x 2
index player
<dbl> <chr>
1 1 John
2 2 Sam
3 3 Mary
4 1 AA
5 2 BB
6 3 CC
$weather
# A tibble: 6 x 2
index temperature
<dbl> <chr>
1 1 hot
2 2 cold
3 3 rainy
4 1 cold
5 2 rainy
6 3 hot
$cars
# A tibble: 3 x 2
index car
<dbl> <chr>
1 1 honda
2 2 toyota
3 3 bmw
$sport
index interest
1 1 swim
2 2 soccer
3 3 rugby
编辑:我遇到过 list2 中有 data.frame sport
(不在 list1 中)
的情况
您可以使用 purrr
来帮助操作列表。我添加 stringAsFactors=FALSE
只是为了可以绑定 data.frame。如果您已经在使用 tibble,则不会有此问题。
- 我创建了一个列表列表。
transpose
更改列表以按名称重新组合元素。基本上,x[[1]][[2]] 等价于 transpose(x)[[2]][[1]]
- 我使用
map
遍历列表,并使用 dplyr::bind_rows
获取结果。
options(stringsAsFactors = FALSE)
games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list1 <- list(games, weather, cars)
names(list1) <- c('games', 'weather', 'cars')
games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
list2 <- list(games, weather)
names(list2) <- c('games', 'weather')
library(purrr)
list(list1, list2) %>%
# regroup named element together
transpose() %>%
# bind the df together
map(dplyr::bind_rows)
#> $games
#> index player
#> 1 1 John
#> 2 2 Sam
#> 3 3 Mary
#> 4 1 AA
#> 5 2 BB
#> 6 3 CC
#>
#> $weather
#> index temperature
#> 1 1 hot
#> 2 2 cold
#> 3 3 rainy
#> 4 1 cold
#> 5 2 rainy
#> 6 3 hot
#>
#> $cars
#> index car
#> 1 1 honda
#> 2 2 toyota
#> 3 3 bmw
由 reprex package (v0.2.1)
创建于 2018-11-04
如果第一个列表没有包含您想要的所有元素,您需要在转置中提供.names
参数。参见 help("transpose", package = "purrr")
。
我为此建立了一个例子。
options(stringsAsFactors = FALSE)
games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
list1 <- list(games = games, weather = weather)
games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list2 <- list(games = games, weather = weather, cars = cars)
library(purrr)
all_list <- list(list1, list2)
all_names <- all_list %>% map(names) %>% reduce(union)
list(list1, list2) %>%
# regroup named element together
transpose(.names = all_names) %>%
# bind the df together
map(dplyr::bind_rows)
#> $games
#> index player
#> 1 1 John
#> 2 2 Sam
#> 3 3 Mary
#> 4 1 AA
#> 5 2 BB
#> 6 3 CC
#>
#> $weather
#> index temperature
#> 1 1 hot
#> 2 2 cold
#> 3 3 rainy
#> 4 1 cold
#> 5 2 rainy
#> 6 3 hot
#>
#> $cars
#> index car
#> 1 1 honda
#> 2 2 toyota
#> 3 3 bmw
由 reprex package (v0.2.1)
创建于 2018-11-04
lapply()
有一个简单的方法。
lapply(unique(unlist(lapply(mget(ls(pattern="list")), names))),
function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]])))
使用 setNames()
和 dplyr::as_tibble
获取列表名称和标题。
像这样:
nms <- unique(unlist(lapply(Lol, names)))
setNames(lapply(lapply(nms, function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]]))),
dplyr::as_tibble), nms)
产量
$`games`
# A tibble: 6 x 2
index player
* <dbl> <fct>
1 1 John
2 2 Sam
3 3 Mary
4 1 AA
5 2 BB
6 3 CC
$weather
# A tibble: 6 x 2
index temperature
* <dbl> <fct>
1 1 hot
2 2 cold
3 3 rainy
4 1 cold
5 2 rainy
6 3 hot
$cars
# A tibble: 3 x 2
index car
* <dbl> <fct>
1 1 honda
2 2 toyota
3 3 bmw
$sport
# A tibble: 3 x 2
index interest
* <dbl> <fct>
1 1 swim
2 2 soccer
3 3 rugby
但是,如果列表的数量未知,假设你在全局环境中的所有列表都具有模式 "list" ,您可以采用以下方法。
Lol <- mget(ls(pattern="^list+")) # list of lists
mergeFun <- function(z) {
l1 <- lapply(z,
function(y) lapply(1:length(y), # new column w/ sublist names
function(x) cbind(y[[x]], list=names(y)[x])))
l2 <- unlist(l1, recursive=FALSE) # unnest lists
l3 <- Reduce(function(...) merge(..., all=TRUE), l2) # merge list
l4 <- split(l3, l3$list) # new list of lists by sublist names
l5 <- lapply(l4, function(w)
Filter(function(v) !all(is.na(v)), w[, -2])) # delete NA cols
return(lapply(l5, function(u) `rownames<-`(u, NULL))) # reset row names
}
如果需要,请执行 lapply(mergeFun(Lol), dplyr::as_tibble)
以获得 tibbles,否则只需 mergeFun(Lol)
。
产量
> lapply(mergeFun(Lol), dplyr::as_tibble)
$`games`
# A tibble: 6 x 2
index player
<dbl> <fct>
1 1 John
2 1 AA
3 2 Sam
4 2 BB
5 3 Mary
6 3 CC
$weather
# A tibble: 6 x 2
index temperature
<dbl> <fct>
1 1 cold
2 1 hot
3 2 cold
4 2 rainy
5 3 hot
6 3 rainy
$cars
# A tibble: 3 x 2
index car
<dbl> <fct>
1 1 honda
2 2 toyota
3 3 bmw
$sport
# A tibble: 3 x 2
index interest
<dbl> <fct>
1 1 swim
2 2 soccer
3 3 rugby
数据
list1 <- list(games = structure(list(index = c(1, 2, 3), player = structure(c(1L,
3L, 2L), .Label = c("John", "Mary", "Sam"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(2L,
1L, 3L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), cars = structure(list(index = c(1, 2, 3), car = structure(c(2L,
3L, 1L), .Label = c("bmw", "honda", "toyota"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)))
list2 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA",
"BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L,
3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), sport = structure(list(index = c(1, 2, 3), interest = structure(3:1, .Label = c("rugby",
"soccer", "swim"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)))
list3 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA",
"BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L,
3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)))
我必须导入 1,000 多个 excel 文件,每个 excel 包含多个 sheet(有些具有相同的 sheet 名称,有些具有不同的 sheet 个名字)。
下面举个小例子
games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list1 <- list(games, weather, cars)
names(list1) <- c('games', 'weather', 'cars')
games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
sport <- data.frame(index = c(1,2,3), interest = c('swim', 'soccer', 'rugby'))
list2 <- list(games, weather, sport)
names(list2) <- c('games', 'weather', 'sport')
list3 <- list(games, weather)
names(list3) <- c('games', 'weather')
rm(games, sport, weather, cars) # clean envir from unneeded stuff
我正在寻找使用列表名称合并列表的方法。我曾尝试使用 merge()
和 mapply()
,但它们没有 return 我想要的
我要的return如下:
$`games`
# A tibble: 6 x 2
index player
<dbl> <chr>
1 1 John
2 2 Sam
3 3 Mary
4 1 AA
5 2 BB
6 3 CC
$weather
# A tibble: 6 x 2
index temperature
<dbl> <chr>
1 1 hot
2 2 cold
3 3 rainy
4 1 cold
5 2 rainy
6 3 hot
$cars
# A tibble: 3 x 2
index car
<dbl> <chr>
1 1 honda
2 2 toyota
3 3 bmw
$sport
index interest
1 1 swim
2 2 soccer
3 3 rugby
编辑:我遇到过 list2 中有 data.frame sport
(不在 list1 中)
您可以使用 purrr
来帮助操作列表。我添加 stringAsFactors=FALSE
只是为了可以绑定 data.frame。如果您已经在使用 tibble,则不会有此问题。
- 我创建了一个列表列表。
transpose
更改列表以按名称重新组合元素。基本上,x[[1]][[2]] 等价于 transpose(x)[[2]][[1]]- 我使用
map
遍历列表,并使用dplyr::bind_rows
获取结果。
options(stringsAsFactors = FALSE)
games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list1 <- list(games, weather, cars)
names(list1) <- c('games', 'weather', 'cars')
games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
list2 <- list(games, weather)
names(list2) <- c('games', 'weather')
library(purrr)
list(list1, list2) %>%
# regroup named element together
transpose() %>%
# bind the df together
map(dplyr::bind_rows)
#> $games
#> index player
#> 1 1 John
#> 2 2 Sam
#> 3 3 Mary
#> 4 1 AA
#> 5 2 BB
#> 6 3 CC
#>
#> $weather
#> index temperature
#> 1 1 hot
#> 2 2 cold
#> 3 3 rainy
#> 4 1 cold
#> 5 2 rainy
#> 6 3 hot
#>
#> $cars
#> index car
#> 1 1 honda
#> 2 2 toyota
#> 3 3 bmw
由 reprex package (v0.2.1)
创建于 2018-11-04如果第一个列表没有包含您想要的所有元素,您需要在转置中提供.names
参数。参见 help("transpose", package = "purrr")
。
我为此建立了一个例子。
options(stringsAsFactors = FALSE)
games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
list1 <- list(games = games, weather = weather)
games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list2 <- list(games = games, weather = weather, cars = cars)
library(purrr)
all_list <- list(list1, list2)
all_names <- all_list %>% map(names) %>% reduce(union)
list(list1, list2) %>%
# regroup named element together
transpose(.names = all_names) %>%
# bind the df together
map(dplyr::bind_rows)
#> $games
#> index player
#> 1 1 John
#> 2 2 Sam
#> 3 3 Mary
#> 4 1 AA
#> 5 2 BB
#> 6 3 CC
#>
#> $weather
#> index temperature
#> 1 1 hot
#> 2 2 cold
#> 3 3 rainy
#> 4 1 cold
#> 5 2 rainy
#> 6 3 hot
#>
#> $cars
#> index car
#> 1 1 honda
#> 2 2 toyota
#> 3 3 bmw
由 reprex package (v0.2.1)
创建于 2018-11-04lapply()
有一个简单的方法。
lapply(unique(unlist(lapply(mget(ls(pattern="list")), names))),
function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]])))
使用 setNames()
和 dplyr::as_tibble
获取列表名称和标题。
像这样:
nms <- unique(unlist(lapply(Lol, names)))
setNames(lapply(lapply(nms, function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]]))),
dplyr::as_tibble), nms)
产量
$`games`
# A tibble: 6 x 2
index player
* <dbl> <fct>
1 1 John
2 2 Sam
3 3 Mary
4 1 AA
5 2 BB
6 3 CC
$weather
# A tibble: 6 x 2
index temperature
* <dbl> <fct>
1 1 hot
2 2 cold
3 3 rainy
4 1 cold
5 2 rainy
6 3 hot
$cars
# A tibble: 3 x 2
index car
* <dbl> <fct>
1 1 honda
2 2 toyota
3 3 bmw
$sport
# A tibble: 3 x 2
index interest
* <dbl> <fct>
1 1 swim
2 2 soccer
3 3 rugby
但是,如果列表的数量未知,假设你在全局环境中的所有列表都具有模式 "list" ,您可以采用以下方法。
Lol <- mget(ls(pattern="^list+")) # list of lists
mergeFun <- function(z) {
l1 <- lapply(z,
function(y) lapply(1:length(y), # new column w/ sublist names
function(x) cbind(y[[x]], list=names(y)[x])))
l2 <- unlist(l1, recursive=FALSE) # unnest lists
l3 <- Reduce(function(...) merge(..., all=TRUE), l2) # merge list
l4 <- split(l3, l3$list) # new list of lists by sublist names
l5 <- lapply(l4, function(w)
Filter(function(v) !all(is.na(v)), w[, -2])) # delete NA cols
return(lapply(l5, function(u) `rownames<-`(u, NULL))) # reset row names
}
如果需要,请执行 lapply(mergeFun(Lol), dplyr::as_tibble)
以获得 tibbles,否则只需 mergeFun(Lol)
。
产量
> lapply(mergeFun(Lol), dplyr::as_tibble)
$`games`
# A tibble: 6 x 2
index player
<dbl> <fct>
1 1 John
2 1 AA
3 2 Sam
4 2 BB
5 3 Mary
6 3 CC
$weather
# A tibble: 6 x 2
index temperature
<dbl> <fct>
1 1 cold
2 1 hot
3 2 cold
4 2 rainy
5 3 hot
6 3 rainy
$cars
# A tibble: 3 x 2
index car
<dbl> <fct>
1 1 honda
2 2 toyota
3 3 bmw
$sport
# A tibble: 3 x 2
index interest
<dbl> <fct>
1 1 swim
2 2 soccer
3 3 rugby
数据
list1 <- list(games = structure(list(index = c(1, 2, 3), player = structure(c(1L,
3L, 2L), .Label = c("John", "Mary", "Sam"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(2L,
1L, 3L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), cars = structure(list(index = c(1, 2, 3), car = structure(c(2L,
3L, 1L), .Label = c("bmw", "honda", "toyota"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)))
list2 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA",
"BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L,
3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), sport = structure(list(index = c(1, 2, 3), interest = structure(3:1, .Label = c("rugby",
"soccer", "swim"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)))
list3 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA",
"BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L,
3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA,
-3L)))