R数据存储在一个数据框列的两个列表中,想要创建单独的列

R data stored in two lists on one dataframe column, want to create individual columns

来自 json 文件的 R 数据作为列表存储在 'rounds' 列中。

> head(leaderboard[,20:22])
  round                         rounds strokes
1    -5 r1, r2, r3, r4, 67, 68, 67, 65     267
2    -7 r1, r2, r3, r4, 70, 70, 66, 63     269
3    -5 r1, r2, r3, r4, 72, 66, 66, 65     269
4    -7 r1, r2, r3, r4, 68, 69, 71, 63     271
5    -5 r1, r2, r3, r4, 72, 70, 65, 65     272
6    -1 r1, r2, r3, r4, 68, 69, 66, 69     272
> leaderboard$rounds[[1]]
  title strokes
1    r1      67
2    r2      68
3    r3      67
4    r4      65

我想把上面的数据变成这样:

round r1 r2 r3 r4 strokes
-5    67 68 67 65 267
-7    70 70 66 63 269

dput 函数的输出:

> dput(head(leaderboard[,20:22]))
structure(list(round = c("-5", "-7", "-5", "-7", "-5", "-1"), 
    rounds = list(structure(list(title = c("r1", "r2", "r3", 
    "r4"), strokes = c("67", "68", "67", "65")), class = "data.frame", row.names = c(NA, 
    4L)), structure(list(title = c("r1", "r2", "r3", "r4"), strokes = c("70", 
    "70", "66", "63")), class = "data.frame", row.names = c(NA, 
    4L)), structure(list(title = c("r1", "r2", "r3", "r4"), strokes = c("72", 
    "66", "66", "65")), class = "data.frame", row.names = c(NA, 
    4L)), structure(list(title = c("r1", "r2", "r3", "r4"), strokes = c("68", 
    "69", "71", "63")), class = "data.frame", row.names = c(NA, 
    4L)), structure(list(title = c("r1", "r2", "r3", "r4"), strokes = c("72", 
    "70", "65", "65")), class = "data.frame", row.names = c(NA, 
    4L)), structure(list(title = c("r1", "r2", "r3", "r4"), strokes = c("68", 
    "69", "66", "69")), class = "data.frame", row.names = c(NA, 
    4L))), strokes = c("267", "269", "269", "271", "272", "272"
    )), row.names = c(NA, 6L), class = "data.frame")

这是一个tidyverse方法

library(dplyr)
library(tidyr)

leaderboard %>% mutate(rounds = lapply(rounds, pivot_wider, names_from = "title", values_from = "strokes")) %>% unnest(rounds)

输出

# A tibble: 6 x 6
  round r1    r2    r3    r4    strokes
  <chr> <chr> <chr> <chr> <chr> <chr>  
1 -5    67    68    67    65    267    
2 -7    70    70    66    63    269    
3 -5    72    66    66    65    269    
4 -7    68    69    71    63    271    
5 -5    72    70    65    65    272    
6 -1    68    69    66    69    272    

您可以使用:

library(dplyr)
library(tidyr)

leaderboard %>%
  rename(new_strokes = strokes) %>%
  unnest(rounds) %>%
  pivot_wider(names_from = title, values_from = strokes)

#  round new_strokes r1    r2    r3    r4   
#  <chr> <chr>       <chr> <chr> <chr> <chr>
#1 -5    267         67    68    67    65   
#2 -7    269         70    70    66    63   
#3 -5    269         72    66    66    65   
#4 -7    271         68    69    71    63   
#5 -5    272         72    70    65    65   
#6 -1    272         68    69    66    69