汇总分组数据时计算合计值
Calculate total values when summarising grouped data
我有一个数据框,我在其中对数据进行了分组,我 运行 按组汇总统计,但也想获得所有组合的汇总统计。有没有比做两次并像下面这样组合更简单的方法?
dataDF <- data.frame(
group = rep(c('a', 'b', 'c'), 10),
value1 = rnorm(30),
value2 = 1:30
)
grouped <- dataDF %>%
group_by(group) %>%
summarise(
mean1 = mean(value1),
mean2 = mean(value2),
sd1 = sd(value1),
sd2 = sd(value2),
max1 = max(value1),
max2 = max(value2)
)
total <- dataDF %>%
summarise(
mean1 = mean(value1),
mean2 = mean(value2),
sd1 = sd(value1),
sd2 = sd(value2),
max1 = max(value1),
max2 = max(value2)
)
combined <- rbind(
grouped,
data.frame(
group = 'All',
total
))
> combined
# A tibble: 4 x 7
group mean1 mean2 sd1 sd2 max1 max2
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a 0.336 14.5 1.15 9.08 1.98 28
2 b -0.215 15.5 1.17 9.08 1.30 29
3 c 0.332 16.5 0.874 9.08 2.19 30
4 All 0.151 15.5 1.07 8.80 2.19 30
您在寻找 summarise_all
吗?
> dataDF %>% group_by(group) %>% summarise_all(funs(mean,sd,max))
# A tibble: 3 x 7
group value1_mean value2_mean value1_sd value2_sd value1_max value2_max
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a -0.170 14.5 0.938 9.08 1.01 28.
2 b 0.196 15.5 0.771 9.08 1.46 29.
3 c -0.137 16.5 0.922 9.08 2.10 30.
我已经写了一个函数来做这个
summarise_with_total <- function(data, func, ...){
# Gets the cohorts which the DF is grouped by
cohorts <- groups(data)
# Results split by cohorts
split <- data %>%
func(...) %>%
data.frame()
# results combined
combined <- data.frame(
setNames(data.frame(matrix(data = "All",
ncol = length(cohorts),
nrow = 1)),
cohorts),
ungroup(data) %>%
func(...)
)
return(rbind(split, combined))
}
dataDF %>%
group_by(group) %>%
summarise_with_total(summarise_at,
.vars = c('value1', 'value2'),
.funs = funs(mean, sd, max))
我有一个数据框,我在其中对数据进行了分组,我 运行 按组汇总统计,但也想获得所有组合的汇总统计。有没有比做两次并像下面这样组合更简单的方法?
dataDF <- data.frame(
group = rep(c('a', 'b', 'c'), 10),
value1 = rnorm(30),
value2 = 1:30
)
grouped <- dataDF %>%
group_by(group) %>%
summarise(
mean1 = mean(value1),
mean2 = mean(value2),
sd1 = sd(value1),
sd2 = sd(value2),
max1 = max(value1),
max2 = max(value2)
)
total <- dataDF %>%
summarise(
mean1 = mean(value1),
mean2 = mean(value2),
sd1 = sd(value1),
sd2 = sd(value2),
max1 = max(value1),
max2 = max(value2)
)
combined <- rbind(
grouped,
data.frame(
group = 'All',
total
))
> combined
# A tibble: 4 x 7
group mean1 mean2 sd1 sd2 max1 max2
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a 0.336 14.5 1.15 9.08 1.98 28
2 b -0.215 15.5 1.17 9.08 1.30 29
3 c 0.332 16.5 0.874 9.08 2.19 30
4 All 0.151 15.5 1.07 8.80 2.19 30
您在寻找 summarise_all
吗?
> dataDF %>% group_by(group) %>% summarise_all(funs(mean,sd,max))
# A tibble: 3 x 7
group value1_mean value2_mean value1_sd value2_sd value1_max value2_max
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a -0.170 14.5 0.938 9.08 1.01 28.
2 b 0.196 15.5 0.771 9.08 1.46 29.
3 c -0.137 16.5 0.922 9.08 2.10 30.
我已经写了一个函数来做这个
summarise_with_total <- function(data, func, ...){
# Gets the cohorts which the DF is grouped by
cohorts <- groups(data)
# Results split by cohorts
split <- data %>%
func(...) %>%
data.frame()
# results combined
combined <- data.frame(
setNames(data.frame(matrix(data = "All",
ncol = length(cohorts),
nrow = 1)),
cohorts),
ungroup(data) %>%
func(...)
)
return(rbind(split, combined))
}
dataDF %>%
group_by(group) %>%
summarise_with_total(summarise_at,
.vars = c('value1', 'value2'),
.funs = funs(mean, sd, max))