在 nest 和 purrr map 之后重新排序 tidyr spread 的干净方法
Clean way to reorder tidyr spread after nest and purrr map
考虑以下因素:
library(tidyverse)
library(broom)
tidy.quants <- mtcars %>%
nest(-cyl) %>%
mutate(quantiles = map(data, ~ quantile(.$mpg))) %>%
unnest(map(quantiles, tidy))
tidy.quants
#> # A tibble: 15 x 3
#> cyl names x
#> <dbl> <chr> <dbl>
#> 1 6 0% 17.80
#> 2 6 25% 18.65
#> 3 6 50% 19.70
#> 4 6 75% 21.00
#> 5 6 100% 21.40
#> 6 4 0% 21.40
#> 7 4 25% 22.80
#> 8 4 50% 26.00
#> 9 4 75% 30.40
#> 10 4 100% 33.90
#> 11 8 0% 10.40
#> 12 8 25% 14.40
#> 13 8 50% 15.20
#> 14 8 75% 16.25
#> 15 8 100% 19.20
这很好很整洁,但是,当尝试传播(或传递给一个图)时,names
列 return 以(有点)意外的顺序排列:
tidy.quants %>% spread(names, x)
#> # A tibble: 3 x 6
#> cyl `0%` `100%` `25%` `50%` `75%`
#> * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 4 21.4 33.9 22.80 26.0 30.40
#> 2 6 17.8 21.4 18.65 19.7 21.00
#> 3 8 10.4 19.2 14.40 15.2 16.25
ggplot(tidy.quants, aes(x = names, y = x, color = factor(cyl))) +
geom_point()
是否有 clean/idiomatic 方法让 names
return 回到预期的顺序?即 0%, 25%, 50%, 75%, 100%
而不是 0%, 100%, 25%, 50%, 75%
?
你可以试试gtools::mixedsort
,它可以对嵌入数字的字符串进行排序;用mixedsort(unique(names))
得到排序后的标签后,类似于color
,你可以让names
(x轴变量)成为一个以排序值为水平的因子,那么ggplot
应该是能够以正确的顺序显示 x 轴标签:
library(gtools)
ggplot(tidy.quants, aes(x = factor(names, levels = mixedsort(unique(names))), y = x, color = factor(cyl))) +
geom_point() + xlab('names')
spread
的类似想法:
tidy.quants %>%
mutate(names = factor(names, mixedsort(unique(names)))) %>%
spread(names, x)
# A tibble: 3 x 6
# cyl `0%` `25%` `50%` `75%` `100%`
#* <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 4 21.4 22.80 26.0 30.40 33.9
#2 6 17.8 18.65 19.7 21.00 21.4
#3 8 10.4 14.40 15.2 16.25 19.2
这是可行的,因为 names
已经按 quantiles
:
排序
tidy.quants <- mtcars %>%
nest(-cyl) %>%
mutate(quantiles = map(data, ~ quantile(.$mpg))) %>%
unnest(map(quantiles, tidy)) %>%
mutate(names=factor(names,unique(names)))
tidy.quants %>% spread(names, x)
结果
# A tibble: 3 x 6
cyl `0%` `25%` `50%` `75%` `100%`
* <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 21.4 22.80 26.0 30.40 33.9
2 6 17.8 18.65 19.7 21.00 21.4
3 8 10.4 14.40 15.2 16.25 19.2
考虑以下因素:
library(tidyverse)
library(broom)
tidy.quants <- mtcars %>%
nest(-cyl) %>%
mutate(quantiles = map(data, ~ quantile(.$mpg))) %>%
unnest(map(quantiles, tidy))
tidy.quants
#> # A tibble: 15 x 3
#> cyl names x
#> <dbl> <chr> <dbl>
#> 1 6 0% 17.80
#> 2 6 25% 18.65
#> 3 6 50% 19.70
#> 4 6 75% 21.00
#> 5 6 100% 21.40
#> 6 4 0% 21.40
#> 7 4 25% 22.80
#> 8 4 50% 26.00
#> 9 4 75% 30.40
#> 10 4 100% 33.90
#> 11 8 0% 10.40
#> 12 8 25% 14.40
#> 13 8 50% 15.20
#> 14 8 75% 16.25
#> 15 8 100% 19.20
这很好很整洁,但是,当尝试传播(或传递给一个图)时,names
列 return 以(有点)意外的顺序排列:
tidy.quants %>% spread(names, x)
#> # A tibble: 3 x 6
#> cyl `0%` `100%` `25%` `50%` `75%`
#> * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 4 21.4 33.9 22.80 26.0 30.40
#> 2 6 17.8 21.4 18.65 19.7 21.00
#> 3 8 10.4 19.2 14.40 15.2 16.25
ggplot(tidy.quants, aes(x = names, y = x, color = factor(cyl))) +
geom_point()
是否有 clean/idiomatic 方法让 names
return 回到预期的顺序?即 0%, 25%, 50%, 75%, 100%
而不是 0%, 100%, 25%, 50%, 75%
?
你可以试试gtools::mixedsort
,它可以对嵌入数字的字符串进行排序;用mixedsort(unique(names))
得到排序后的标签后,类似于color
,你可以让names
(x轴变量)成为一个以排序值为水平的因子,那么ggplot
应该是能够以正确的顺序显示 x 轴标签:
library(gtools)
ggplot(tidy.quants, aes(x = factor(names, levels = mixedsort(unique(names))), y = x, color = factor(cyl))) +
geom_point() + xlab('names')
spread
的类似想法:
tidy.quants %>%
mutate(names = factor(names, mixedsort(unique(names)))) %>%
spread(names, x)
# A tibble: 3 x 6
# cyl `0%` `25%` `50%` `75%` `100%`
#* <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 4 21.4 22.80 26.0 30.40 33.9
#2 6 17.8 18.65 19.7 21.00 21.4
#3 8 10.4 14.40 15.2 16.25 19.2
这是可行的,因为 names
已经按 quantiles
:
tidy.quants <- mtcars %>%
nest(-cyl) %>%
mutate(quantiles = map(data, ~ quantile(.$mpg))) %>%
unnest(map(quantiles, tidy)) %>%
mutate(names=factor(names,unique(names)))
tidy.quants %>% spread(names, x)
结果
# A tibble: 3 x 6
cyl `0%` `25%` `50%` `75%` `100%`
* <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 21.4 22.80 26.0 30.40 33.9
2 6 17.8 18.65 19.7 21.00 21.4
3 8 10.4 14.40 15.2 16.25 19.2