如何通过将行组合成列表来折叠小标题?
How to collapse a tibble by combining rows into a list?
作为 dplyr
/tidyr
争论管道的一部分,我得到了一个 tibble,看起来像这样:
trb <-
tibble::tribble(~person, ~data_name, ~data_obj, ~some_string,
"chris", "df_mtcars", mtcars, "abc",
"rachel", "df_trees", trees, "efg",
"john", "df_iris", iris, "hij",
"nicole", "df_plantgrowth", PlantGrowth, "klm",
"ron", "df_women", women, "nop", # | notice that both ron's rows have same values
"ron", "df_cars", cars, "nop", # | except for data_name and data_obj
"jillian", "df_sleep", sleep, "tuv")
这个table描述了6个人和每个人喜欢的数据object。令人讨厌的是,"ron"
给出了 2 个数据偏好,所以我需要以某种方式将 ron 的信息折叠成一行 trb
。
我的“折叠策略”是将 ron 的两个数据首选项组合在一个命名列表中,这样最终输出将是
trb_output <-
tribble(~person, ~.dat, ~some_string,
"chris", mtcars, "abc",
"rachel", trees, "efg",
"john", iris, "hij",
"nicole", PlantGrowth, "klm",
"ron", list("df_women" = women,
"df_cars" = cars), "nop",
"jillian", sleep, "tuv")
一个重要的注意事项是我必须在管道上完成这件事。即:
# demo to desired solution
trb_output <-
trb %>%
wrangle_this(...) %>%
wrangle_that(...)
trb_output
## # A tibble: 6 x 3
## person .dat some_string
## <chr> <list> <chr>
## 1 chris <df [32 x 11]> abc
## 2 rachel <df [31 x 3]> efg
## 3 john <df [150 x 5]> hij
## 4 nicole <df [30 x 2]> klm
## 5 ron <named list [2]> nop
## 6 jillian <df [20 x 3]> tuv
仅使用管道可以做到这一点吗?
这些对你有用吗?
trb2 = trb %>%
nest_by(
person, some_string
)
trb3 <- trb %>%
group_by(
person, some_string
) %>%
summarise(
dta = list(data_name = data_obj)
)
编辑,这个?
trb4 <- trb %>%
group_by(
person, some_string
) %>%
summarise(
dta = ifelse(length(data_obj) > 1, list(as.list(setNames(data_obj,data_name ))), data_obj))
)
这可能并不完美,但这似乎有效:
trb %>%
group_by(across(c(-data_obj, -data_name))) %>%
summarise(data_obj = ifelse(length(data_obj) > 1, lst(setNames(data_obj,data_name)), data_obj))
person some_string data_obj
<chr> <chr> <list>
1 chris abc <df [32 x 11]>
2 jillian tuv <df [20 x 3]>
3 john hij <df [150 x 5]>
4 nicole klm <df [30 x 2]>
5 rachel efg <df [31 x 3]>
6 ron nop <named list [2]>
这里有一种使用 purrr
的方法。
library(tidyverse)
trb %>%
group_split(person) %>%
map_dfr(function(df) {
if (nrow(df) > 1) {
tibble(person = unique(df$person), .dat = list(map(df$data_obj, ~.x) %>% set_names(df$data_name)), some_string = unique(df$some_string))
} else {
transmute(df, person, ".dat" := data_obj, some_string)
}
})
#> # A tibble: 6 × 3
#> person .dat some_string
#> <chr> <list> <chr>
#> 1 chris <df [32 × 11]> abc
#> 2 jillian <df [20 × 3]> tuv
#> 3 john <df [150 × 5]> hij
#> 4 nicole <df [30 × 2]> klm
#> 5 rachel <df [31 × 3]> efg
#> 6 ron <named list [2]> nop
由 reprex package (v2.0.1)
创建于 2022-01-22
编辑:更通用的方法是使用 across
而不是在每一列中手动调用 unique
。
trb %>%
group_split(person) %>%
map_dfr(function(df) {
if (nrow(df) > 1) {
other_cols <- summarise(df, across(-c(data_obj, data_name), ~ unique(.)))
bind_cols(other_cols, tibble(.dat = list(map(df$data_obj, ~.x) %>% set_names(df$data_name))))
} else {
select(df, -c(data_obj, data_name, some_string), ".dat" := data_obj, some_string)
}
})
#> # A tibble: 6 × 3
#> person .dat some_string
#> <chr> <list> <chr>
#> 1 chris <df [32 × 11]> abc
#> 2 jillian <df [20 × 3]> tuv
#> 3 john <df [150 × 5]> hij
#> 4 nicole <df [30 × 2]> klm
#> 5 rachel <df [31 × 3]> efg
#> 6 ron <named list [2]> nop
由 reprex package (v2.0.1)
创建于 2022-01-22
数据:
trb <-
tibble::tribble(~person, ~data_name, ~data_obj, ~some_string,
"chris", "df_mtcars", mtcars, "abc",
"rachel", "df_trees", trees, "efg",
"john", "df_iris", iris, "hij",
"nicole", "df_plantgrowth", PlantGrowth, "klm",
"ron", "df_women", women, "nop", # | notice that both ron's rows have same values
"ron", "df_cars", cars, "nop", # | except for data_name and data_obj
"jillian", "df_sleep", sleep, "tuv")
作为 dplyr
/tidyr
争论管道的一部分,我得到了一个 tibble,看起来像这样:
trb <-
tibble::tribble(~person, ~data_name, ~data_obj, ~some_string,
"chris", "df_mtcars", mtcars, "abc",
"rachel", "df_trees", trees, "efg",
"john", "df_iris", iris, "hij",
"nicole", "df_plantgrowth", PlantGrowth, "klm",
"ron", "df_women", women, "nop", # | notice that both ron's rows have same values
"ron", "df_cars", cars, "nop", # | except for data_name and data_obj
"jillian", "df_sleep", sleep, "tuv")
这个table描述了6个人和每个人喜欢的数据object。令人讨厌的是,"ron"
给出了 2 个数据偏好,所以我需要以某种方式将 ron 的信息折叠成一行 trb
。
我的“折叠策略”是将 ron 的两个数据首选项组合在一个命名列表中,这样最终输出将是
trb_output <-
tribble(~person, ~.dat, ~some_string,
"chris", mtcars, "abc",
"rachel", trees, "efg",
"john", iris, "hij",
"nicole", PlantGrowth, "klm",
"ron", list("df_women" = women,
"df_cars" = cars), "nop",
"jillian", sleep, "tuv")
一个重要的注意事项是我必须在管道上完成这件事。即:
# demo to desired solution
trb_output <-
trb %>%
wrangle_this(...) %>%
wrangle_that(...)
trb_output
## # A tibble: 6 x 3
## person .dat some_string
## <chr> <list> <chr>
## 1 chris <df [32 x 11]> abc
## 2 rachel <df [31 x 3]> efg
## 3 john <df [150 x 5]> hij
## 4 nicole <df [30 x 2]> klm
## 5 ron <named list [2]> nop
## 6 jillian <df [20 x 3]> tuv
仅使用管道可以做到这一点吗?
这些对你有用吗?
trb2 = trb %>%
nest_by(
person, some_string
)
trb3 <- trb %>%
group_by(
person, some_string
) %>%
summarise(
dta = list(data_name = data_obj)
)
编辑,这个?
trb4 <- trb %>%
group_by(
person, some_string
) %>%
summarise(
dta = ifelse(length(data_obj) > 1, list(as.list(setNames(data_obj,data_name ))), data_obj))
)
这可能并不完美,但这似乎有效:
trb %>%
group_by(across(c(-data_obj, -data_name))) %>%
summarise(data_obj = ifelse(length(data_obj) > 1, lst(setNames(data_obj,data_name)), data_obj))
person some_string data_obj
<chr> <chr> <list>
1 chris abc <df [32 x 11]>
2 jillian tuv <df [20 x 3]>
3 john hij <df [150 x 5]>
4 nicole klm <df [30 x 2]>
5 rachel efg <df [31 x 3]>
6 ron nop <named list [2]>
这里有一种使用 purrr
的方法。
library(tidyverse)
trb %>%
group_split(person) %>%
map_dfr(function(df) {
if (nrow(df) > 1) {
tibble(person = unique(df$person), .dat = list(map(df$data_obj, ~.x) %>% set_names(df$data_name)), some_string = unique(df$some_string))
} else {
transmute(df, person, ".dat" := data_obj, some_string)
}
})
#> # A tibble: 6 × 3
#> person .dat some_string
#> <chr> <list> <chr>
#> 1 chris <df [32 × 11]> abc
#> 2 jillian <df [20 × 3]> tuv
#> 3 john <df [150 × 5]> hij
#> 4 nicole <df [30 × 2]> klm
#> 5 rachel <df [31 × 3]> efg
#> 6 ron <named list [2]> nop
由 reprex package (v2.0.1)
创建于 2022-01-22编辑:更通用的方法是使用 across
而不是在每一列中手动调用 unique
。
trb %>%
group_split(person) %>%
map_dfr(function(df) {
if (nrow(df) > 1) {
other_cols <- summarise(df, across(-c(data_obj, data_name), ~ unique(.)))
bind_cols(other_cols, tibble(.dat = list(map(df$data_obj, ~.x) %>% set_names(df$data_name))))
} else {
select(df, -c(data_obj, data_name, some_string), ".dat" := data_obj, some_string)
}
})
#> # A tibble: 6 × 3
#> person .dat some_string
#> <chr> <list> <chr>
#> 1 chris <df [32 × 11]> abc
#> 2 jillian <df [20 × 3]> tuv
#> 3 john <df [150 × 5]> hij
#> 4 nicole <df [30 × 2]> klm
#> 5 rachel <df [31 × 3]> efg
#> 6 ron <named list [2]> nop
由 reprex package (v2.0.1)
创建于 2022-01-22数据:
trb <-
tibble::tribble(~person, ~data_name, ~data_obj, ~some_string,
"chris", "df_mtcars", mtcars, "abc",
"rachel", "df_trees", trees, "efg",
"john", "df_iris", iris, "hij",
"nicole", "df_plantgrowth", PlantGrowth, "klm",
"ron", "df_women", women, "nop", # | notice that both ron's rows have same values
"ron", "df_cars", cars, "nop", # | except for data_name and data_obj
"jillian", "df_sleep", sleep, "tuv")