将管道运算符 %>% 与替换函数一起使用,例如 colnames()<-
Use pipe operator %>% with replacement functions like colnames()<-
如何使用管道运算符将管道输入替换函数,如 colnames()<-
?
这是我正在尝试做的事情:
library(dplyr)
averages_df <-
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp))
colnames(averages_df) <- c("cyl", "disp_mean", "hp_mean")
averages_df
# Source: local data frame [3 x 3]
#
# cyl disp_mean hp_mean
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
但理想情况下应该是这样的:
averages_df <-
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
add_colnames(c("cyl", "disp_mean", "hp_mean"))
有没有办法不用每次都写一个特殊的函数来做到这一点?
这里的答案是一个开始,但不完全是我的问题:Chaining arithmetic operators in dplyr
您可以使用 colnames<-
或 setNames
(感谢@David Arenburg)
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
`colnames<-`(c("cyl", "disp_mean", "hp_mean"))
# or
# `names<-`(c("cyl", "disp_mean", "hp_mean"))
# setNames(., c("cyl", "disp_mean", "hp_mean"))
# cyl disp_mean hp_mean
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
或从 magrittr
中选择一个 Alias
(set_colnames
):
library(magrittr)
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
set_colnames(c("cyl", "disp_mean", "hp_mean"))
dplyr::rename
如果您只是(重新)命名许多列中的几个(它需要同时写旧名称和新名称;请参阅@Richard Scriven 的回答)
在 dplyr
中,有几种不同的重命名列的方法。
一种是使用rename()
函数。在此示例中,您需要反勾 summarise()
创建的名称,因为它们是表达式。
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
rename(disp_mean = `mean(disp)`, hp_mean = `mean(hp)`)
# cyl disp_mean hp_mean
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
您也可以使用 select()
。这更容易一些,因为我们可以使用列号,而无需弄乱反引号。
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
select(1, disp_mean = 2, hp_mean = 3)
但是对于这个例子,最好的方法是做@thelatemail 在评论中提到的,那就是返回一步并命名 summarise()
中的列。
group_by(mtcars, cyl) %>%
summarise(disp_mean = mean(disp), hp_mean = mean(hp))
我们可以使用 summarise_at
的 .funs
参数和 dplyr 为汇总变量添加后缀,如下代码所示。
library(dplyr)
# summarise_at with dplyr
mtcars %>%
group_by(cyl) %>%
summarise_at(
.cols = c("disp", "hp"),
.funs = c(mean="mean")
)
# A tibble: 3 × 3
# cyl disp_mean hp_mean
# <dbl> <dbl> <dbl>
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
此外,我们可以通过多种方式设置列名。
# set_names with magrittr
mtcars %>%
group_by(cyl) %>%
summarise(mean(disp), mean(hp)) %>%
magrittr::set_names(c("cyl", "disp_mean", "hp_mean"))
# set_names with purrr
mtcars %>%
group_by(cyl) %>%
summarise(mean(disp), mean(hp)) %>%
purrr::set_names(c("cyl", "disp_mean", "hp_mean"))
# setNames with stats
mtcars %>%
group_by(cyl) %>%
summarise(mean(disp), mean(hp)) %>%
stats::setNames(c("cyl", "disp_mean", "hp_mean"))
# A tibble: 3 × 3
# cyl disp_mean hp_mean
# <dbl> <dbl> <dbl>
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
如何使用管道运算符将管道输入替换函数,如 colnames()<-
?
这是我正在尝试做的事情:
library(dplyr)
averages_df <-
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp))
colnames(averages_df) <- c("cyl", "disp_mean", "hp_mean")
averages_df
# Source: local data frame [3 x 3]
#
# cyl disp_mean hp_mean
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
但理想情况下应该是这样的:
averages_df <-
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
add_colnames(c("cyl", "disp_mean", "hp_mean"))
有没有办法不用每次都写一个特殊的函数来做到这一点?
这里的答案是一个开始,但不完全是我的问题:Chaining arithmetic operators in dplyr
您可以使用 colnames<-
或 setNames
(感谢@David Arenburg)
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
`colnames<-`(c("cyl", "disp_mean", "hp_mean"))
# or
# `names<-`(c("cyl", "disp_mean", "hp_mean"))
# setNames(., c("cyl", "disp_mean", "hp_mean"))
# cyl disp_mean hp_mean
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
或从 magrittr
中选择一个 Alias
(set_colnames
):
library(magrittr)
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
set_colnames(c("cyl", "disp_mean", "hp_mean"))
dplyr::rename
如果您只是(重新)命名许多列中的几个(它需要同时写旧名称和新名称;请参阅@Richard Scriven 的回答)
在 dplyr
中,有几种不同的重命名列的方法。
一种是使用rename()
函数。在此示例中,您需要反勾 summarise()
创建的名称,因为它们是表达式。
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
rename(disp_mean = `mean(disp)`, hp_mean = `mean(hp)`)
# cyl disp_mean hp_mean
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
您也可以使用 select()
。这更容易一些,因为我们可以使用列号,而无需弄乱反引号。
group_by(mtcars, cyl) %>%
summarise(mean(disp), mean(hp)) %>%
select(1, disp_mean = 2, hp_mean = 3)
但是对于这个例子,最好的方法是做@thelatemail 在评论中提到的,那就是返回一步并命名 summarise()
中的列。
group_by(mtcars, cyl) %>%
summarise(disp_mean = mean(disp), hp_mean = mean(hp))
我们可以使用 summarise_at
的 .funs
参数和 dplyr 为汇总变量添加后缀,如下代码所示。
library(dplyr)
# summarise_at with dplyr
mtcars %>%
group_by(cyl) %>%
summarise_at(
.cols = c("disp", "hp"),
.funs = c(mean="mean")
)
# A tibble: 3 × 3
# cyl disp_mean hp_mean
# <dbl> <dbl> <dbl>
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429
此外,我们可以通过多种方式设置列名。
# set_names with magrittr
mtcars %>%
group_by(cyl) %>%
summarise(mean(disp), mean(hp)) %>%
magrittr::set_names(c("cyl", "disp_mean", "hp_mean"))
# set_names with purrr
mtcars %>%
group_by(cyl) %>%
summarise(mean(disp), mean(hp)) %>%
purrr::set_names(c("cyl", "disp_mean", "hp_mean"))
# setNames with stats
mtcars %>%
group_by(cyl) %>%
summarise(mean(disp), mean(hp)) %>%
stats::setNames(c("cyl", "disp_mean", "hp_mean"))
# A tibble: 3 × 3
# cyl disp_mean hp_mean
# <dbl> <dbl> <dbl>
# 1 4 105.1364 82.63636
# 2 6 183.3143 122.28571
# 3 8 353.1000 209.21429