如何使用 tidyverse 在列表列上 set_names:tibble、purrr、dplyr

How to set_names on a list column using the tidyverse: tibble, purrr, dplyr

简短版本,我希望能够 set_names()summarise() 函数返回的 "list-column" 上。因此,如果我有一个使用 range() 函数的列表列,我希望能够将名称设置为 "min" 和 "max"。

下面是详细信息和可重现的示例。

library(tidyverse)

# Consider the following:
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total))
  )
#> # A tibble: 5 x 2
#>   vore    sleep_total_range
#>   <chr>   <list>           
#> 1 carni   <dbl [2]>        
#> 2 herbi   <dbl [2]>        
#> 3 insecti <dbl [2]>        
#> 4 omni    <dbl [2]>        
#> 5 <NA>    <dbl [2]>

# I would like to be able to identify and label (i.e., set_names()) for the 
# min and max columns

# Fail 1: No Column, No Labels
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total))
  ) %>% 
  unnest()
#> # A tibble: 10 x 2
#>    vore    sleep_total_range
#>    <chr>               <dbl>
#>  1 carni                 2.7
#>  2 carni                19.4
#>  3 herbi                 1.9
#>  4 herbi                16.6
#>  5 insecti               8.4
#>  6 insecti              19.9
#>  7 omni                  8  
#>  8 omni                 18  
#>  9 <NA>                  5.4
#> 10 <NA>                 13.7

# Fail 2: Column, but labels are not correct
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
  ) %>% 
  unnest()
#> # A tibble: 10 x 3
#>    vore    range_col value
#>    <chr>       <int> <dbl>
#>  1 carni           1   2.7
#>  2 carni           2  19.4
#>  3 herbi           1   1.9
#>  4 herbi           2  16.6
#>  5 insecti         1   8.4
#>  6 insecti         2  19.9
#>  7 omni            1   8  
#>  8 omni            2  18  
#>  9 <NA>            1   5.4
#> 10 <NA>            2  13.7

期望的结果

# Success: This is my desired result/output, but it feels verbose, 
# and not very "tidyverse / purrr"
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
  ) %>% 
  unnest() %>%
  mutate(
    range_col = ifelse(range_col == 1, "min", "max")
  )
#> # A tibble: 10 x 3
#>    vore    range_col value
#>    <chr>   <chr>     <dbl>
#>  1 carni   min         2.7
#>  2 carni   max        19.4
#>  3 herbi   min         1.9
#>  4 herbi   max        16.6
#>  5 insecti min         8.4
#>  6 insecti max        19.9
#>  7 omni    min         8  
#>  8 omni    max        18  
#>  9 <NA>    min         5.4
#> 10 <NA>    max        13.7

关闭但尚未...

# I thought I was close with this
temp <- 
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total))
  )

temp$sleep_total_range[[1]] %>% set_names(c("min", "max")) %>% enframe()
#> # A tibble: 2 x 2
#>   name  value
#>   <chr> <dbl>
#> 1 min     2.7
#> 2 max    19.4

# But this obviously does not work...
msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total)) %>% 
        set_names(c("min", "max")) %>% 
        enframe()
  )
#> `nm` must be `NULL` or a character vector the same length as `x`

reprex package (v0.3.0)

于 2019-07-18 创建

最简单的选项是 group_by vore 并计算每个组的 minmax

但是,如果您想继续使用 range,一个选择是 unnest 并为每个 vore.

重复 c("min", "max")
library(tidyverse)

msleep %>%
  group_by(vore) %>%
  summarise(sleep_total_range = list(range(sleep_total))) %>% 
  unnest() %>%
  group_by(vore) %>%
  mutate(column = c("min", "max"))


#   vore    sleep_total_range column
#   <chr>               <dbl> <chr> 
# 1 NA                    5.4 min   
# 2 NA                   13.7 max   
# 3 carni                 2.7 min   
# 4 carni                19.4 max   
# 5 herbi                 1.9 min   
# 6 herbi                16.6 max   
# 7 insecti               8.4 min   
# 8 insecti              19.9 max   
# 9 omni                  8   min   
#10 omni                 18   max   

或者在unnest之前添加第二个列表:

msleep %>%
  group_by(vore) %>%
  summarise(
    sleep_total_range = list(range(sleep_total))
  ) %>% 
  mutate(column = list(c("min", "max"))) %>%
  unnest()

虽然变异 c("min","max") 是这种情况下更好的选择,但如果您想避免这种情况,您可以这样做:

library(tidyverse)

msleep %>%
  group_by(vore) %>%
  summarise(sleep_total_range = list(c(min=min(sleep_total), 
                                       max=max(sleep_total)))) %>% 
  mutate(sleep_total_range = map(sleep_total_range, 
                                 ~data.frame(sleep_total_range=.x, dcol=names(.x)))) %>% 
  unnest()

#> # A tibble: 10 x 3
#>    vore    sleep_total_range dcol 
#>    <chr>               <dbl> <fct>
#>  1 <NA>                  5.4 min  
#>  2 <NA>                 13.7 max  
#>  3 carni                 2.7 min  
#>  4 carni                19.4 max  
#>  5 herbi                 1.9 min  
#>  6 herbi                16.6 max  
#>  7 insecti               8.4 min  
#>  8 insecti              19.9 max  
#>  9 omni                  8   min  
#> 10 omni                 18   max

如果我们创建一个tibble

,我们可以得到它作为两列
library(tidyverse)
msleep %>% 
    group_by(vore) %>% 
    summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)), 
         c("min", "max")) %>% as_tibble)) %>% 
   unnest

-用packageVersion('dplyr')# [1] ‘0.8.99.9000’

测试代码
msleep %>% 
    group_by(vore) %>% 
     summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)), 
          c("min", "max")) %>%
    as_tibble)) %>% 
    unnest(c(sleep_total_range)) #changed behavior or else a warning
# A tibble: 5 x 3
#  vore      min   max
#  <chr>   <dbl> <dbl>
#1 carni     2.7  19.4
#2 herbi     1.9  16.6
#3 insecti   8.4  19.9
#4 omni      8    18  
#5 <NA>      5.4  13.7

受@akrun 的启发,您也可以在此处采取一些非常规的 double-unnest 方法:

msleep %>% 
  group_by(vore) %>% 
  summarise(
    sleep_total_range = list(as.list(range(sleep_total)) %>% set_names(c("min", "max")) %>% enframe)
  ) %>%
  unnest() %>%
  unnest()

# A tibble: 10 x 3
   vore    name  value
   <chr>   <chr> <dbl>
 1 carni   min     2.7
 2 carni   max    19.4
 3 herbi   min     1.9
 4 herbi   max    16.6
 5 insecti min     8.4
 6 insecti max    19.9
 7 omni    min     8  
 8 omni    max    18  
 9 NA      min     5.4
10 NA      max    13.7