将嵌套列表转换为数据框

Transform a nested list to dataframe

I have a list of items, each of them has two items, one is a list and the other one is a character expression We generate de lists

My_list <- list()
My_list$'product1' <- list()
My_list$'product1'$'sales' <- c(1,2,3)
My_list$'product1'$'model' <- "arima"
My_list$'product2'$'sales' <- c(4,5,6)
My_list$'product2'$'model' <- "prophet"

This is the desired output shape

df1 <- data.frame(product=c("product1"),sales1 = 1, sales2 = 2, sales3 = 3)
df2 <- data.frame(product=c("product2"),sales1 = 4, sales2 = 5, sales3 = 6)
solution <- rbind (df1,df2)

I have tried something like this

solution <- lapply(My_list, function(x) do.call(rbind, lapply(x, as.data.frame)))
solution <- do.call(rbind, Map(cbind, product = names(My_list), My_list))
```7

这是一个data.table解决方案。我在下面的代码中添加了解释和中间结果作为注释...

library(data.table)
#bind list, using name as id
DT <- rbindlist( My_list, idcol = "product" )
#     product sales   model
# 1: product1     1   arima
# 2: product1     2   arima
# 3: product1     3   arima
# 4: product2     4 prophet
# 5: product2     5 prophet
# 6: product2     6 prophet

#create rowid's by product-group, used for casting in the next line
DT[, row_id := rowid(product) ]
#     product sales   model row_id
# 1: product1     1   arima      1
# 2: product1     2   arima      2
# 3: product1     3   arima      3
# 4: product2     4 prophet      1
# 5: product2     5 prophet      2
# 6: product2     6 prophet      3

#cast to wide format
dcast( DT, product ~ paste0( "sales", row_id ), value.var = "sales" )
#     product sales1 sales2 sales3
# 1: product1      1      2      3
# 2: product2      4      5      6

这里是基本的 R 解决方案:

# transpose and fetch the sales arguments putting them in a df
sales <-t(do.call(cbind,
        lapply(My_list, function(x) data.frame(x[names(x)=="sales"]))))

# rename the rows with products
rownames(sales) <- names(My_list)

# rename columns 
colnames(sales) <- paste0("sales",c(1:ncol(sales)))
sales

         sales1 sales2 sales3
product1      1      2      3
product2      4      5      6

如果您需要 data.frame 产品列:

sales <- data.frame(sales)
sales$product <- rownames(sales)
rownames(sales) <- 1:nrow(sales)
sales
  sales1 sales2 sales3  product
1      1      2      3 product1
2      4      5      6 product2

基本 R 选项

solution <- cbind(Product = names(My_list),
                  `names<-`(r <- as.data.frame(do.call(rbind,sapply(My_list, `[`,-2)),row.names = FALSE),
                            paste0("Sale",seq(ncol(r)))))

这给出了

> solution
   Product Sale1 Sale2 Sale3
1 product1     1     2     3
2 product2     4     5     6

这是一个基于 R 的简单版本,

as.data.frame(matrix(unlist(My_list), nrow = length(My_list), byrow = TRUE))
#  V1 V2 V3      V4
#1  1  2  3   arima
#2  4  5  6 prophet

您可以轻松地进行修改以适应您的预期输出(更改名称并将 V4 转换为 product1product2 ), 即

#save the data frame
d1 <- as.data.frame(matrix(unlist(My_list), nrow = length(My_list), byrow = TRUE))
#Set the column names
d1 <- setNames(d1, c(paste0('sales', seq(ncol(d1) - 1)), 'Product'))
#Change the variable under `Product`
d1$Product <- paste0('Product', seq(nrow(d1)))

d1
#  sales1 sales2 sales3  Product
#1      1      2      3 Product1
#2      4      5      6 Product2

在我看来一个非常直观且易于维护的方法:

data.frame(product=names(My_list), 
           do.call(rbind, lapply(My_list, FUN=function(x) unlist(x["sales"]))), row.names = NULL)

   product sales1 sales2 sales3
1 product1      1      2      3
2 product2      4      5      6

它使用 lapply 遍历列表列表并取消列出所有 sales 条目(自动命名它们)。然后它 rbind 使用 do.call 将向量组合在一起。


将模型名称添加到 table 的快速方法是使用 rapply,默认情况下不列出结果(参见 ?rapply 和参数 how

data.frame(model=rapply(My_list, f=paste, classes="character"),
           product=names(My_list), 
           do.call(rbind, lapply(My_list, FUN=function(x) unlist(x["sales"]))), row.names = NULL)

    model  product sales1 sales2 sales3
1   arima product1      1      2      3
2 prophet product2      4      5      6

您可以在 lapply 中使用 [[My_list 中获取第一项 sales,您可以 rbind do.call。从结果中设置了 colnames

tt <- do.call(rbind, lapply(My_list, "[[", 1))
#tt <- do.call(rbind, lapply(My_list, "[[", "sales")) #Alternative
colnames(tt) <- paste0("sales",seq_len(ncol(tt)))
tt
#         sales1 sales2 sales3
#product1      1      2      3
#product2      4      5      6