如何添加包含每个数字列的平均值的行
How can I add a row with the mean of each numeric column
我有一个汇总为数据透视表 table 的数据框,我想用每个数字列的平均值添加一行,对于字符列,该行可以命名为“mean”
示例数据框如下
dat <- c('2000-01-15','2003-01-15','2000-02-15',
'2003-02-15','2000-04-15','2002-04-15',
'2000-12-15','2002-12-15','2003-12-13', "2003-12-15",'2002-02-21','2002-01-25','2003-04-24')
df <- data.frame(date =as.Date(dat), id = c(1,2,3,4,5,6,7,8,9,10,11,12,13),
sales = c(134,211,2000,234,421,400,34,1233,1222,1034,8034,1234,2331))
df <- df %>%
mutate(year = format(date, "%Y"),
month = format(date, "%b")) %>% select(-date) %>%
group_by(year,month) %>%
summarise(revenue = sum(sales))
df2 <- df %>% pivot_wider(id_cols = year, names_from = month, values_from = revenue)
从这里我想做
rbind(df2, summarise_all(df2, mean))
然而,主要的困难在于如何 return 仅数字列的平均值和 return 具有非数字列的字符。
我想要的输出应该是
year Apr Dec Feb Jan
<chr> <dbl> <dbl> <dbl> <dbl>
1 2000 421 34 2000 134
2 2002 400 1233 8034 1234
3 2003 2331 2256 234 211
4 mean 1051. 1174. 3423. 526.
我们可以adorn_totals
在末尾加一个求和行,然后除以总行数-1
library(dplyr)
library(janitor)
df2 %>%
adorn_totals(name = "mean") %>%
mutate(across(where(is.numeric),
~ replace(., n(), .[n()]/(n()-1)))) %>%
as_tibble
-输出
# A tibble: 4 x 5
year Apr Dec Feb Jan
<chr> <dbl> <dbl> <dbl> <dbl>
1 2000 421 34 2000 134
2 2002 400 1233 8034 1234
3 2003 2331 2256 234 211
4 mean 1051. 1174. 3423. 526.
或者另一种选择是使用 summarise
和 across
连接 (c()
) 末尾的 mean
值
df2 %>%
ungroup %>%
summarise(year = c(year, 'mean'),
across(where(is.numeric), ~ c(., mean(.))))
-输出
# A tibble: 4 x 5
year Apr Dec Feb Jan
<chr> <dbl> <dbl> <dbl> <dbl>
1 2000 421 34 2000 134
2 2002 400 1233 8034 1234
3 2003 2331 2256 234 211
4 mean 1051. 1174. 3423. 526.
或使用 add_row
来自 tibble
library(tibble)
df2 %>%
ungroup %>%
add_row(year = 'mean', !!! colMeans(.[-1]))
# A tibble: 4 x 5
year Apr Dec Feb Jan
<chr> <dbl> <dbl> <dbl> <dbl>
1 2000 421 34 2000 134
2 2002 400 1233 8034 1234
3 2003 2331 2256 234 211
4 mean 1051. 1174. 3423. 526.
基本 R 选项 -
rbind(df2, data.frame(year = 'mean', t(colMeans(df2[-1]))))
# year Apr Dec Feb Jan
# <chr> <dbl> <dbl> <dbl> <dbl>
#1 2000 421 34 2000 134
#2 2002 400 1233 8034 1234
#3 2003 2331 2256 234 211
#4 mean 1051. 1174. 3423. 526.
如果不知道数字列的位置,想动态识别,可以使用-
cols <- sapply(df2, is.numeric)
rbind(df2, data.frame(year = 'mean', t(colMeans(df2[cols]))))
我有一个汇总为数据透视表 table 的数据框,我想用每个数字列的平均值添加一行,对于字符列,该行可以命名为“mean”
示例数据框如下
dat <- c('2000-01-15','2003-01-15','2000-02-15',
'2003-02-15','2000-04-15','2002-04-15',
'2000-12-15','2002-12-15','2003-12-13', "2003-12-15",'2002-02-21','2002-01-25','2003-04-24')
df <- data.frame(date =as.Date(dat), id = c(1,2,3,4,5,6,7,8,9,10,11,12,13),
sales = c(134,211,2000,234,421,400,34,1233,1222,1034,8034,1234,2331))
df <- df %>%
mutate(year = format(date, "%Y"),
month = format(date, "%b")) %>% select(-date) %>%
group_by(year,month) %>%
summarise(revenue = sum(sales))
df2 <- df %>% pivot_wider(id_cols = year, names_from = month, values_from = revenue)
从这里我想做
rbind(df2, summarise_all(df2, mean))
然而,主要的困难在于如何 return 仅数字列的平均值和 return 具有非数字列的字符。
我想要的输出应该是
year Apr Dec Feb Jan
<chr> <dbl> <dbl> <dbl> <dbl>
1 2000 421 34 2000 134
2 2002 400 1233 8034 1234
3 2003 2331 2256 234 211
4 mean 1051. 1174. 3423. 526.
我们可以adorn_totals
在末尾加一个求和行,然后除以总行数-1
library(dplyr)
library(janitor)
df2 %>%
adorn_totals(name = "mean") %>%
mutate(across(where(is.numeric),
~ replace(., n(), .[n()]/(n()-1)))) %>%
as_tibble
-输出
# A tibble: 4 x 5
year Apr Dec Feb Jan
<chr> <dbl> <dbl> <dbl> <dbl>
1 2000 421 34 2000 134
2 2002 400 1233 8034 1234
3 2003 2331 2256 234 211
4 mean 1051. 1174. 3423. 526.
或者另一种选择是使用 summarise
和 across
连接 (c()
) 末尾的 mean
值
df2 %>%
ungroup %>%
summarise(year = c(year, 'mean'),
across(where(is.numeric), ~ c(., mean(.))))
-输出
# A tibble: 4 x 5
year Apr Dec Feb Jan
<chr> <dbl> <dbl> <dbl> <dbl>
1 2000 421 34 2000 134
2 2002 400 1233 8034 1234
3 2003 2331 2256 234 211
4 mean 1051. 1174. 3423. 526.
或使用 add_row
来自 tibble
library(tibble)
df2 %>%
ungroup %>%
add_row(year = 'mean', !!! colMeans(.[-1]))
# A tibble: 4 x 5
year Apr Dec Feb Jan
<chr> <dbl> <dbl> <dbl> <dbl>
1 2000 421 34 2000 134
2 2002 400 1233 8034 1234
3 2003 2331 2256 234 211
4 mean 1051. 1174. 3423. 526.
基本 R 选项 -
rbind(df2, data.frame(year = 'mean', t(colMeans(df2[-1]))))
# year Apr Dec Feb Jan
# <chr> <dbl> <dbl> <dbl> <dbl>
#1 2000 421 34 2000 134
#2 2002 400 1233 8034 1234
#3 2003 2331 2256 234 211
#4 mean 1051. 1174. 3423. 526.
如果不知道数字列的位置,想动态识别,可以使用-
cols <- sapply(df2, is.numeric)
rbind(df2, data.frame(year = 'mean', t(colMeans(df2[cols]))))