在 tidyverse 中过滤列表列
filtering listcolumns in the tidyverse
data(mtcars)
mtcars <- rownames_to_column(mtcars,var = "car")
mtcars$id <- c(1:length(mtcars$car))
mtcars$make <- map_chr(mtcars$car,~strsplit(.x," ")[[1]][1])
mt2 <- mtcars %>% select(1:4,id,make) %>% nest(-make,.key = "l")
mt3 <- mtcars %>% select(5:10,id,make) %>% nest(-make,.key="m")
mt_make <- left_join(mt2,mt3)
mt2 <- mtcars %>% select(1:4,id,make) %>% nest(-id,.key = "l")
mt3 <- mtcars %>% select(5:10,id) %>% nest(-id,.key="m")
mt_id <- left_join(mt2,mt3)
我如何过滤 mt_make
以仅保留 6 缸 且超过 150 马力 的汽车并拆入没有 listcols 的数据框?
如何过滤 mt_id
以仅保留小于 200 hp 的 Merc 并在没有 listcols 的情况下取消嵌套到数据框中?
首先,这些都不是好的数据结构,因为当数据应该像正常 data.frame 那样水平关联时,并行工作真的很烦人。不过,我假设这是您的代表的产物,并且您的实际数据有更好的理由进行如此安排。如果没有,请为自己的理智修复结构。
综上所述,您可以使这些结构起作用:
library(tidyverse)
mt_make %>%
mutate(l = map(l, ~filter(.x, cyl == 6)),
m = map(m, ~filter(.x, hp > 150)),
n = map2(l, m, inner_join)) %>% # collect data where l and m have data
unnest(n)
#> # A tibble: 1 × 12
#> make car mpg cyl disp id hp drat wt qsec
#> <chr> <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Ferrari Ferrari Dino 19.7 6 145 30 175 3.62 2.77 15.5
#> # ... with 2 more variables: vs <dbl>, am <dbl>
mt_id %>%
mutate(l = map(l, ~filter(.x, make == 'Merc')),
m = map(m, ~filter(.x, hp < 200))) %>%
filter(map_int(l, nrow) == map_int(m, nrow)) %>% # assumes your data is parallel, which suggests there's no point to nesting
unnest()
#> # A tibble: 7 × 12
#> id car mpg cyl disp make hp drat wt qsec vs
#> <int> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 8 Merc 240D 24.4 4 146.7 Merc 62 3.69 3.19 20.0 1
#> 2 9 Merc 230 22.8 4 140.8 Merc 95 3.92 3.15 22.9 1
#> 3 10 Merc 280 19.2 6 167.6 Merc 123 3.92 3.44 18.3 1
#> 4 11 Merc 280C 17.8 6 167.6 Merc 123 3.92 3.44 18.9 1
#> 5 12 Merc 450SE 16.4 8 275.8 Merc 180 3.07 4.07 17.4 0
#> 6 13 Merc 450SL 17.3 8 275.8 Merc 180 3.07 3.73 17.6 0
#> 7 14 Merc 450SLC 15.2 8 275.8 Merc 180 3.07 3.78 18.0 0
#> # ... with 1 more variables: am <dbl>
data(mtcars)
mtcars <- rownames_to_column(mtcars,var = "car")
mtcars$id <- c(1:length(mtcars$car))
mtcars$make <- map_chr(mtcars$car,~strsplit(.x," ")[[1]][1])
mt2 <- mtcars %>% select(1:4,id,make) %>% nest(-make,.key = "l")
mt3 <- mtcars %>% select(5:10,id,make) %>% nest(-make,.key="m")
mt_make <- left_join(mt2,mt3)
mt2 <- mtcars %>% select(1:4,id,make) %>% nest(-id,.key = "l")
mt3 <- mtcars %>% select(5:10,id) %>% nest(-id,.key="m")
mt_id <- left_join(mt2,mt3)
我如何过滤 mt_make
以仅保留 6 缸 且超过 150 马力 的汽车并拆入没有 listcols 的数据框?
如何过滤 mt_id
以仅保留小于 200 hp 的 Merc 并在没有 listcols 的情况下取消嵌套到数据框中?
首先,这些都不是好的数据结构,因为当数据应该像正常 data.frame 那样水平关联时,并行工作真的很烦人。不过,我假设这是您的代表的产物,并且您的实际数据有更好的理由进行如此安排。如果没有,请为自己的理智修复结构。
综上所述,您可以使这些结构起作用:
library(tidyverse)
mt_make %>%
mutate(l = map(l, ~filter(.x, cyl == 6)),
m = map(m, ~filter(.x, hp > 150)),
n = map2(l, m, inner_join)) %>% # collect data where l and m have data
unnest(n)
#> # A tibble: 1 × 12
#> make car mpg cyl disp id hp drat wt qsec
#> <chr> <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Ferrari Ferrari Dino 19.7 6 145 30 175 3.62 2.77 15.5
#> # ... with 2 more variables: vs <dbl>, am <dbl>
mt_id %>%
mutate(l = map(l, ~filter(.x, make == 'Merc')),
m = map(m, ~filter(.x, hp < 200))) %>%
filter(map_int(l, nrow) == map_int(m, nrow)) %>% # assumes your data is parallel, which suggests there's no point to nesting
unnest()
#> # A tibble: 7 × 12
#> id car mpg cyl disp make hp drat wt qsec vs
#> <int> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 8 Merc 240D 24.4 4 146.7 Merc 62 3.69 3.19 20.0 1
#> 2 9 Merc 230 22.8 4 140.8 Merc 95 3.92 3.15 22.9 1
#> 3 10 Merc 280 19.2 6 167.6 Merc 123 3.92 3.44 18.3 1
#> 4 11 Merc 280C 17.8 6 167.6 Merc 123 3.92 3.44 18.9 1
#> 5 12 Merc 450SE 16.4 8 275.8 Merc 180 3.07 4.07 17.4 0
#> 6 13 Merc 450SL 17.3 8 275.8 Merc 180 3.07 3.73 17.6 0
#> 7 14 Merc 450SLC 15.2 8 275.8 Merc 180 3.07 3.78 18.0 0
#> # ... with 1 more variables: am <dbl>