如何通过将行组合成列表来折叠小标题?

How to collapse a tibble by combining rows into a list?

作为 dplyr/tidyr 争论管道的一部分,我得到了一个 tibble,看起来像这样:

trb <-
  tibble::tribble(~person,   ~data_name,       ~data_obj,    ~some_string,
                  "chris",   "df_mtcars",      mtcars,       "abc",
                  "rachel",  "df_trees",       trees,        "efg",
                  "john",    "df_iris",        iris,         "hij",
                  "nicole",  "df_plantgrowth", PlantGrowth,  "klm",
                  "ron",     "df_women",       women,        "nop", # | notice that both ron's rows have same values
                  "ron",     "df_cars",        cars,         "nop", # | except for data_name and data_obj
                  "jillian", "df_sleep",       sleep,        "tuv")

这个table描述了6个人和每个人喜欢的数据object。令人讨厌的是,"ron" 给出了 2 个数据偏好,所以我需要以某种方式将 ron 的信息折叠成一行 trb

我的“折叠策略”是将 ron 的两个数据首选项组合在一个命名列表中,这样最终输出将是

trb_output <-
  tribble(~person,   ~.dat,                     ~some_string,
          "chris",   mtcars,                    "abc",
          "rachel",  trees,                     "efg",
          "john",    iris,                      "hij",
          "nicole",  PlantGrowth,               "klm",
          "ron",     list("df_women" = women,
                          "df_cars"  = cars),   "nop",
          "jillian", sleep,                     "tuv")

一个重要的注意事项是我必须在管道上完成这件事。即:

# demo to desired solution
trb_output  <- 
  trb %>%
  wrangle_this(...) %>%
  wrangle_that(...)

trb_output  
## # A tibble: 6 x 3
##   person  .dat             some_string
##   <chr>   <list>           <chr>      
## 1 chris   <df [32 x 11]>   abc        
## 2 rachel  <df [31 x 3]>    efg        
## 3 john    <df [150 x 5]>   hij        
## 4 nicole  <df [30 x 2]>    klm        
## 5 ron     <named list [2]> nop        
## 6 jillian <df [20 x 3]>    tuv    

仅使用管道可以做到这一点吗?

这些对你有用吗?

trb2 = trb %>%
  nest_by(
    person, some_string
  ) 


trb3 <- trb %>%
  group_by(
    person, some_string
  ) %>%
  summarise(
    dta = list(data_name = data_obj)
  )

编辑,这个?

trb4 <- trb %>%
  group_by(
    person, some_string
  ) %>%
  summarise(
    dta = ifelse(length(data_obj) > 1, list(as.list(setNames(data_obj,data_name ))), data_obj))
  )

这可能并不完美,但这似乎有效:

trb %>% 
  group_by(across(c(-data_obj, -data_name))) %>% 
  summarise(data_obj = ifelse(length(data_obj) > 1, lst(setNames(data_obj,data_name)), data_obj))

  person  some_string data_obj        
  <chr>   <chr>       <list>          
1 chris   abc         <df [32 x 11]>  
2 jillian tuv         <df [20 x 3]>   
3 john    hij         <df [150 x 5]>  
4 nicole  klm         <df [30 x 2]>   
5 rachel  efg         <df [31 x 3]>   
6 ron     nop         <named list [2]>

这里有一种使用 purrr 的方法。

library(tidyverse)

trb %>%
  group_split(person) %>%
  map_dfr(function(df) {
    if (nrow(df) > 1) {
      tibble(person = unique(df$person), .dat = list(map(df$data_obj, ~.x) %>% set_names(df$data_name)), some_string = unique(df$some_string))
    } else {
      transmute(df, person, ".dat" := data_obj, some_string)
    }
  })
#> # A tibble: 6 × 3
#>   person  .dat             some_string
#>   <chr>   <list>           <chr>      
#> 1 chris   <df [32 × 11]>   abc        
#> 2 jillian <df [20 × 3]>    tuv        
#> 3 john    <df [150 × 5]>   hij        
#> 4 nicole  <df [30 × 2]>    klm        
#> 5 rachel  <df [31 × 3]>    efg        
#> 6 ron     <named list [2]> nop

reprex package (v2.0.1)

创建于 2022-01-22

编辑:更通用的方法是使用 across 而不是在每一列中手动调用 unique

trb %>%
  group_split(person) %>%
  map_dfr(function(df) {
    if (nrow(df) > 1) {
      other_cols <- summarise(df, across(-c(data_obj, data_name), ~ unique(.)))
      bind_cols(other_cols, tibble(.dat = list(map(df$data_obj, ~.x) %>% set_names(df$data_name))))
    } else {
      select(df, -c(data_obj, data_name, some_string), ".dat" := data_obj, some_string)
    }
  })
#> # A tibble: 6 × 3
#>   person  .dat             some_string
#>   <chr>   <list>           <chr>      
#> 1 chris   <df [32 × 11]>   abc        
#> 2 jillian <df [20 × 3]>    tuv        
#> 3 john    <df [150 × 5]>   hij        
#> 4 nicole  <df [30 × 2]>    klm        
#> 5 rachel  <df [31 × 3]>    efg        
#> 6 ron     <named list [2]> nop

reprex package (v2.0.1)

创建于 2022-01-22

数据:

trb <-
    tibble::tribble(~person,   ~data_name,       ~data_obj,    ~some_string,
                    "chris",   "df_mtcars",      mtcars,       "abc",
                    "rachel",  "df_trees",       trees,        "efg",
                    "john",    "df_iris",        iris,         "hij",
                    "nicole",  "df_plantgrowth", PlantGrowth,  "klm",
                    "ron",     "df_women",       women,        "nop", # | notice that both ron's rows have same values
                    "ron",     "df_cars",        cars,         "nop", # | except for data_name and data_obj
                    "jillian", "df_sleep",       sleep,        "tuv")