如何使用 tidyverse 在列表列上 set_names:tibble、purrr、dplyr
How to set_names on a list column using the tidyverse: tibble, purrr, dplyr
简短版本,我希望能够 set_names()
从 summarise()
函数返回的 "list-column" 上。因此,如果我有一个使用 range()
函数的列表列,我希望能够将名称设置为 "min" 和 "max"。
下面是详细信息和可重现的示例。
library(tidyverse)
# Consider the following:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
)
#> # A tibble: 5 x 2
#> vore sleep_total_range
#> <chr> <list>
#> 1 carni <dbl [2]>
#> 2 herbi <dbl [2]>
#> 3 insecti <dbl [2]>
#> 4 omni <dbl [2]>
#> 5 <NA> <dbl [2]>
# I would like to be able to identify and label (i.e., set_names()) for the
# min and max columns
# Fail 1: No Column, No Labels
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
) %>%
unnest()
#> # A tibble: 10 x 2
#> vore sleep_total_range
#> <chr> <dbl>
#> 1 carni 2.7
#> 2 carni 19.4
#> 3 herbi 1.9
#> 4 herbi 16.6
#> 5 insecti 8.4
#> 6 insecti 19.9
#> 7 omni 8
#> 8 omni 18
#> 9 <NA> 5.4
#> 10 <NA> 13.7
# Fail 2: Column, but labels are not correct
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
) %>%
unnest()
#> # A tibble: 10 x 3
#> vore range_col value
#> <chr> <int> <dbl>
#> 1 carni 1 2.7
#> 2 carni 2 19.4
#> 3 herbi 1 1.9
#> 4 herbi 2 16.6
#> 5 insecti 1 8.4
#> 6 insecti 2 19.9
#> 7 omni 1 8
#> 8 omni 2 18
#> 9 <NA> 1 5.4
#> 10 <NA> 2 13.7
期望的结果
# Success: This is my desired result/output, but it feels verbose,
# and not very "tidyverse / purrr"
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
) %>%
unnest() %>%
mutate(
range_col = ifelse(range_col == 1, "min", "max")
)
#> # A tibble: 10 x 3
#> vore range_col value
#> <chr> <chr> <dbl>
#> 1 carni min 2.7
#> 2 carni max 19.4
#> 3 herbi min 1.9
#> 4 herbi max 16.6
#> 5 insecti min 8.4
#> 6 insecti max 19.9
#> 7 omni min 8
#> 8 omni max 18
#> 9 <NA> min 5.4
#> 10 <NA> max 13.7
关闭但尚未...
# I thought I was close with this
temp <-
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
)
temp$sleep_total_range[[1]] %>% set_names(c("min", "max")) %>% enframe()
#> # A tibble: 2 x 2
#> name value
#> <chr> <dbl>
#> 1 min 2.7
#> 2 max 19.4
# But this obviously does not work...
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total)) %>%
set_names(c("min", "max")) %>%
enframe()
)
#> `nm` must be `NULL` or a character vector the same length as `x`
由 reprex package (v0.3.0)
于 2019-07-18 创建
最简单的选项是 group_by
vore
并计算每个组的 min
和 max
。
但是,如果您想继续使用 range
,一个选择是 unnest
并为每个 vore
.
重复 c("min", "max")
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(range(sleep_total))) %>%
unnest() %>%
group_by(vore) %>%
mutate(column = c("min", "max"))
# vore sleep_total_range column
# <chr> <dbl> <chr>
# 1 NA 5.4 min
# 2 NA 13.7 max
# 3 carni 2.7 min
# 4 carni 19.4 max
# 5 herbi 1.9 min
# 6 herbi 16.6 max
# 7 insecti 8.4 min
# 8 insecti 19.9 max
# 9 omni 8 min
#10 omni 18 max
或者在unnest之前添加第二个列表:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
) %>%
mutate(column = list(c("min", "max"))) %>%
unnest()
虽然变异 c("min","max")
是这种情况下更好的选择,但如果您想避免这种情况,您可以这样做:
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(c(min=min(sleep_total),
max=max(sleep_total)))) %>%
mutate(sleep_total_range = map(sleep_total_range,
~data.frame(sleep_total_range=.x, dcol=names(.x)))) %>%
unnest()
#> # A tibble: 10 x 3
#> vore sleep_total_range dcol
#> <chr> <dbl> <fct>
#> 1 <NA> 5.4 min
#> 2 <NA> 13.7 max
#> 3 carni 2.7 min
#> 4 carni 19.4 max
#> 5 herbi 1.9 min
#> 6 herbi 16.6 max
#> 7 insecti 8.4 min
#> 8 insecti 19.9 max
#> 9 omni 8 min
#> 10 omni 18 max
如果我们创建一个tibble
,我们可以得到它作为两列
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)),
c("min", "max")) %>% as_tibble)) %>%
unnest
-用packageVersion('dplyr')#
[1] ‘0.8.99.9000’
测试代码
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)),
c("min", "max")) %>%
as_tibble)) %>%
unnest(c(sleep_total_range)) #changed behavior or else a warning
# A tibble: 5 x 3
# vore min max
# <chr> <dbl> <dbl>
#1 carni 2.7 19.4
#2 herbi 1.9 16.6
#3 insecti 8.4 19.9
#4 omni 8 18
#5 <NA> 5.4 13.7
受@akrun 的启发,您也可以在此处采取一些非常规的 double-unnest 方法:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(as.list(range(sleep_total)) %>% set_names(c("min", "max")) %>% enframe)
) %>%
unnest() %>%
unnest()
# A tibble: 10 x 3
vore name value
<chr> <chr> <dbl>
1 carni min 2.7
2 carni max 19.4
3 herbi min 1.9
4 herbi max 16.6
5 insecti min 8.4
6 insecti max 19.9
7 omni min 8
8 omni max 18
9 NA min 5.4
10 NA max 13.7
简短版本,我希望能够 set_names()
从 summarise()
函数返回的 "list-column" 上。因此,如果我有一个使用 range()
函数的列表列,我希望能够将名称设置为 "min" 和 "max"。
下面是详细信息和可重现的示例。
library(tidyverse)
# Consider the following:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
)
#> # A tibble: 5 x 2
#> vore sleep_total_range
#> <chr> <list>
#> 1 carni <dbl [2]>
#> 2 herbi <dbl [2]>
#> 3 insecti <dbl [2]>
#> 4 omni <dbl [2]>
#> 5 <NA> <dbl [2]>
# I would like to be able to identify and label (i.e., set_names()) for the
# min and max columns
# Fail 1: No Column, No Labels
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
) %>%
unnest()
#> # A tibble: 10 x 2
#> vore sleep_total_range
#> <chr> <dbl>
#> 1 carni 2.7
#> 2 carni 19.4
#> 3 herbi 1.9
#> 4 herbi 16.6
#> 5 insecti 8.4
#> 6 insecti 19.9
#> 7 omni 8
#> 8 omni 18
#> 9 <NA> 5.4
#> 10 <NA> 13.7
# Fail 2: Column, but labels are not correct
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
) %>%
unnest()
#> # A tibble: 10 x 3
#> vore range_col value
#> <chr> <int> <dbl>
#> 1 carni 1 2.7
#> 2 carni 2 19.4
#> 3 herbi 1 1.9
#> 4 herbi 2 16.6
#> 5 insecti 1 8.4
#> 6 insecti 2 19.9
#> 7 omni 1 8
#> 8 omni 2 18
#> 9 <NA> 1 5.4
#> 10 <NA> 2 13.7
期望的结果
# Success: This is my desired result/output, but it feels verbose,
# and not very "tidyverse / purrr"
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
) %>%
unnest() %>%
mutate(
range_col = ifelse(range_col == 1, "min", "max")
)
#> # A tibble: 10 x 3
#> vore range_col value
#> <chr> <chr> <dbl>
#> 1 carni min 2.7
#> 2 carni max 19.4
#> 3 herbi min 1.9
#> 4 herbi max 16.6
#> 5 insecti min 8.4
#> 6 insecti max 19.9
#> 7 omni min 8
#> 8 omni max 18
#> 9 <NA> min 5.4
#> 10 <NA> max 13.7
关闭但尚未...
# I thought I was close with this
temp <-
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
)
temp$sleep_total_range[[1]] %>% set_names(c("min", "max")) %>% enframe()
#> # A tibble: 2 x 2
#> name value
#> <chr> <dbl>
#> 1 min 2.7
#> 2 max 19.4
# But this obviously does not work...
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total)) %>%
set_names(c("min", "max")) %>%
enframe()
)
#> `nm` must be `NULL` or a character vector the same length as `x`
由 reprex package (v0.3.0)
于 2019-07-18 创建最简单的选项是 group_by
vore
并计算每个组的 min
和 max
。
但是,如果您想继续使用 range
,一个选择是 unnest
并为每个 vore
.
c("min", "max")
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(range(sleep_total))) %>%
unnest() %>%
group_by(vore) %>%
mutate(column = c("min", "max"))
# vore sleep_total_range column
# <chr> <dbl> <chr>
# 1 NA 5.4 min
# 2 NA 13.7 max
# 3 carni 2.7 min
# 4 carni 19.4 max
# 5 herbi 1.9 min
# 6 herbi 16.6 max
# 7 insecti 8.4 min
# 8 insecti 19.9 max
# 9 omni 8 min
#10 omni 18 max
或者在unnest之前添加第二个列表:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
) %>%
mutate(column = list(c("min", "max"))) %>%
unnest()
虽然变异 c("min","max")
是这种情况下更好的选择,但如果您想避免这种情况,您可以这样做:
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(c(min=min(sleep_total),
max=max(sleep_total)))) %>%
mutate(sleep_total_range = map(sleep_total_range,
~data.frame(sleep_total_range=.x, dcol=names(.x)))) %>%
unnest()
#> # A tibble: 10 x 3
#> vore sleep_total_range dcol
#> <chr> <dbl> <fct>
#> 1 <NA> 5.4 min
#> 2 <NA> 13.7 max
#> 3 carni 2.7 min
#> 4 carni 19.4 max
#> 5 herbi 1.9 min
#> 6 herbi 16.6 max
#> 7 insecti 8.4 min
#> 8 insecti 19.9 max
#> 9 omni 8 min
#> 10 omni 18 max
如果我们创建一个tibble
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)),
c("min", "max")) %>% as_tibble)) %>%
unnest
-用packageVersion('dplyr')#
[1] ‘0.8.99.9000’
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)),
c("min", "max")) %>%
as_tibble)) %>%
unnest(c(sleep_total_range)) #changed behavior or else a warning
# A tibble: 5 x 3
# vore min max
# <chr> <dbl> <dbl>
#1 carni 2.7 19.4
#2 herbi 1.9 16.6
#3 insecti 8.4 19.9
#4 omni 8 18
#5 <NA> 5.4 13.7
受@akrun 的启发,您也可以在此处采取一些非常规的 double-unnest 方法:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(as.list(range(sleep_total)) %>% set_names(c("min", "max")) %>% enframe)
) %>%
unnest() %>%
unnest()
# A tibble: 10 x 3
vore name value
<chr> <chr> <dbl>
1 carni min 2.7
2 carni max 19.4
3 herbi min 1.9
4 herbi max 16.6
5 insecti min 8.4
6 insecti max 19.9
7 omni min 8
8 omni max 18
9 NA min 5.4
10 NA max 13.7