如何在 dplyr 中定义函数? - 添加卡方检验的结果
How to define a function in dplyr? - Adding the results of a chi-squared test
我正在尝试编写一个函数来为我提供两个变量的主元 table。扩展我的问题 ,我想包括预测变量和目标之间关系的卡方检验的 p 值。我该如何更改功能?
library(dplyr)
mean_mpg <- mean(mtcars$mpg)
# creating a new variable that shows that Miles/(US) gallon is greater than the mean or not
mtcars <-
mtcars %>%
mutate(mpg_cat = ifelse(mpg > mean_mpg, 1,0))
mtcars %>%
group_by(as.factor(cyl)) %>%
summarise(sum=sum(mpg_cat),total=n()) %>%
mutate(percentage=sum*100/total)
# Note: needs installation of rlang 0.4.0 or later
get_pivot <- function(data, predictor,target) {
result <-
data %>%
group_by(as.factor( {{ predictor }} )) %>%
summarise(sum=sum( {{ target }} ),total=n()) %>%
mutate(percentage=sum*100/total);
print(result)
}
这是我的工作示例:
mtcars %>%
group_by(as.factor(cyl)) %>%
summarise(sum=sum(mpg_cat),total=n(),
pvalue= chisq.test(as.factor(.$mpg_cat), as.factor(.$cyl))$p.value) %>%
mutate(percentage=sum*100/total)
我尝试了以下功能,但没有用。
get_pivot <- function(data, predictor,target) {
result <-
data %>%
group_by( {{ predictor }} ) %>%
summarise(clicks=sum( {{ target }} ),total=n(),
pvalue= chisq.test(.$target, .$predictor)$p.value) %>%
mutate(percentage=clicks*100/total);
print(result)
}
{{...}}
curly-curly 插值运算符是 quote-unquote 的一种便捷方式。但是,它不会在所有情况下都有效。在 OP 的函数中,使用 $
提取列,即。 .$target
或 .$predictor
部分不起作用。相反,我们可以将其转换为 character
(as_name
),然后使用 [[
提取列
library(rlang)
library(dplyr)
get_pivot <- function(data, predictor,target) {
data %>%
group_by( {{ predictor }} ) %>%
summarise(clicks=sum( {{ target }} ),total=n(),
pvalue= chisq.test(.[[as_name(enquo(target))]],
.[[as_name(enquo(predictor))]])$p.value) %>%
mutate(percentage=clicks*100/total);
}
get_pivot(mtcars, cyl, mpg_cat)
# A tibble: 3 x 5
# cyl clicks total pvalue percentage
# <dbl> <dbl> <int> <dbl> <dbl>
#1 4 11 11 0.00000366 100
#2 6 3 7 0.00000366 42.9
#3 8 0 14 0.00000366 0
我正在尝试编写一个函数来为我提供两个变量的主元 table。扩展我的问题
library(dplyr)
mean_mpg <- mean(mtcars$mpg)
# creating a new variable that shows that Miles/(US) gallon is greater than the mean or not
mtcars <-
mtcars %>%
mutate(mpg_cat = ifelse(mpg > mean_mpg, 1,0))
mtcars %>%
group_by(as.factor(cyl)) %>%
summarise(sum=sum(mpg_cat),total=n()) %>%
mutate(percentage=sum*100/total)
# Note: needs installation of rlang 0.4.0 or later
get_pivot <- function(data, predictor,target) {
result <-
data %>%
group_by(as.factor( {{ predictor }} )) %>%
summarise(sum=sum( {{ target }} ),total=n()) %>%
mutate(percentage=sum*100/total);
print(result)
}
这是我的工作示例:
mtcars %>%
group_by(as.factor(cyl)) %>%
summarise(sum=sum(mpg_cat),total=n(),
pvalue= chisq.test(as.factor(.$mpg_cat), as.factor(.$cyl))$p.value) %>%
mutate(percentage=sum*100/total)
我尝试了以下功能,但没有用。
get_pivot <- function(data, predictor,target) {
result <-
data %>%
group_by( {{ predictor }} ) %>%
summarise(clicks=sum( {{ target }} ),total=n(),
pvalue= chisq.test(.$target, .$predictor)$p.value) %>%
mutate(percentage=clicks*100/total);
print(result)
}
{{...}}
curly-curly 插值运算符是 quote-unquote 的一种便捷方式。但是,它不会在所有情况下都有效。在 OP 的函数中,使用 $
提取列,即。 .$target
或 .$predictor
部分不起作用。相反,我们可以将其转换为 character
(as_name
),然后使用 [[
library(rlang)
library(dplyr)
get_pivot <- function(data, predictor,target) {
data %>%
group_by( {{ predictor }} ) %>%
summarise(clicks=sum( {{ target }} ),total=n(),
pvalue= chisq.test(.[[as_name(enquo(target))]],
.[[as_name(enquo(predictor))]])$p.value) %>%
mutate(percentage=clicks*100/total);
}
get_pivot(mtcars, cyl, mpg_cat)
# A tibble: 3 x 5
# cyl clicks total pvalue percentage
# <dbl> <dbl> <int> <dbl> <dbl>
#1 4 11 11 0.00000366 100
#2 6 3 7 0.00000366 42.9
#3 8 0 14 0.00000366 0