summrise_each() 中具有多个变量的多个函数
multiple functions in summrise_each() with multiple variables
以下作品。我相信有更好的解决方案。
library(dplyr)
library(tidyr)
iris %>%
group_by(Species) %>%
summarise_each(funs(mean, median)) %>%
gather(var, val, -Species) %>%
separate(var, c("variable", "summary"), sep = "_") %>%
spread(summary, val)
gather
你的相关变量先,然后做你的汇总计算。
例如:
iris %>%
gather(var, val, -Species) %>%
group_by(Species, var) %>%
summarise_each(funs(mean, median))
不仅代码更简洁,而且因为你要做的事情更少,所以速度也会更快:
fun1 <- function() {
iris %>%
group_by(Species) %>%
summarise_each(funs(mean, median)) %>%
gather(var, val, -Species) %>%
separate(var, c("variable", "summary"), sep = "_") %>%
spread(summary, val)
}
fun2 <- function() {
iris %>%
gather(var, val, -Species) %>%
group_by(Species, var) %>%
summarise_each(funs(mean, median))
}
library(microbenchmark)
library(compare)
microbenchmark(fun1(), fun2())
# Unit: milliseconds
# expr min lq mean median uq max neval
# fun1() 6.725408 6.950540 7.572307 7.202001 7.648250 12.326271 100
# fun2() 3.346863 3.475828 3.784302 3.535849 3.824349 6.580824 100
compare(as.data.frame(fun1()), as.data.frame(fun2()), allowAll = TRUE)
# TRUE
# [variable] coerced from <factor> to <character>
# sorted
# renamed
# renamed rows
# dropped names
# dropped row names
以下作品。我相信有更好的解决方案。
library(dplyr)
library(tidyr)
iris %>%
group_by(Species) %>%
summarise_each(funs(mean, median)) %>%
gather(var, val, -Species) %>%
separate(var, c("variable", "summary"), sep = "_") %>%
spread(summary, val)
gather
你的相关变量先,然后做你的汇总计算。
例如:
iris %>%
gather(var, val, -Species) %>%
group_by(Species, var) %>%
summarise_each(funs(mean, median))
不仅代码更简洁,而且因为你要做的事情更少,所以速度也会更快:
fun1 <- function() {
iris %>%
group_by(Species) %>%
summarise_each(funs(mean, median)) %>%
gather(var, val, -Species) %>%
separate(var, c("variable", "summary"), sep = "_") %>%
spread(summary, val)
}
fun2 <- function() {
iris %>%
gather(var, val, -Species) %>%
group_by(Species, var) %>%
summarise_each(funs(mean, median))
}
library(microbenchmark)
library(compare)
microbenchmark(fun1(), fun2())
# Unit: milliseconds
# expr min lq mean median uq max neval
# fun1() 6.725408 6.950540 7.572307 7.202001 7.648250 12.326271 100
# fun2() 3.346863 3.475828 3.784302 3.535849 3.824349 6.580824 100
compare(as.data.frame(fun1()), as.data.frame(fun2()), allowAll = TRUE)
# TRUE
# [variable] coerced from <factor> to <character>
# sorted
# renamed
# renamed rows
# dropped names
# dropped row names