为什么 purrr map() 函数在尝试 运行 用户函数进行重复卡方分析时会产生错误
Why is purrr map() function generating errors when attempting to run user function for repeated chi-square analysis
我正在尝试使用 dplyr 和 purrr 以编程方式为许多变量生成多个卡方分析。以前都是简单的复制粘贴代码,但是费力而且容易出错。我设法创建了一个函数来生成我想要的输出 table。但是,当我尝试使用 map() 函数时,它会生成如下错误。我认为这可能是我无法理解的与语法相关的内容,可能是由于 "covariates_list." 中引号的处理,如果您能给我任何帮助,我将不胜感激,谢谢。这是带有虚构数据集的代码,格式与我的相同。
library(tidyverse)
# Example data input in similar format to my data
df <- data.frame(stringsAsFactors=FALSE,
id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L),
outcome = c("y", "y", "y", "y", "n", "n", "n", "y", "y", "y", "y", "y",
"n", "n", "n", "n", "n", "n", "n", "n"),
covariate1 = c("y", "n", "n", "n", "n", "y", "n", "n", "n", "n", "y", "n",
"n", "n", "n", "y", "n", "n", "n", "y"),
covariate2 = c("y", "y", "n", "n", "n", "y", "y", "y", "n", "n", "n", "y",
"n", "n", "n", "y", "n", "n", "y", "y"),
covariate3 = c("y", "y", "n", "n", "n", "n", "n", "y", "y", "n", "y", "n",
"n", "n", "n", "n", "n", "n", "n", "n")
)
### Defining a function that will make a frequency table, and add a chisq p value to this.
univariate_table <- function(a,b,dat){
quo_a <- enquo(a)
quo_b <- enquo(b)
z1 <- dat %>% count(!!quo_b,!!quo_a)
z2 <- sum(z1$n)
z3 <- z1 %>% mutate(percentage = n/z2*100)
z4 <- dat %>% summarise(chisq.test(!!quo_a,!!quo_b)$p.value)
z5 <- as.numeric(z4)
z6 <- z3 %>% mutate(chisq_pvalue = z5)
return(z6)
}
### I can get the function to run independantly on each covariate
univariate_table(covariate1,outcome,df)
### Using the code below, I cannot get a purrr / loop / map function to run through a list of the covariates without recieving this error:
#Error in summarise_impl(.data, dots) :
# Evaluation error: 'x' and 'y' must have the same length.
covariates_list <- list("covariate1","covariate2","covariate3")
map(covariates_list,univariate_table,outcome,df)
我们将取消警告:
univariate_table <- function(a, b, dat) {
quo_a <- enquo(a)
quo_b <- enquo(b)
z1 <- dat %>% count(!!quo_b, !!quo_a)
z2 <- sum(z1$n)
z3 <- z1 %>% mutate(percentage = n / z2 * 100)
z4 <- dat %>% summarise(
suppressWarnings(chisq.test(!!quo_a, !!quo_b))$p.value
)
z5 <- as.numeric(z4)
z6 <- z3 %>% mutate(chisq_pvalue = z5)
return(z6)
}
然后稍微修改调用函数的方式:
covariates_list %>%
syms() %>%
map(function(cov) univariate_table(!!cov, outcome, df))
## [[1]]
## # A tibble: 4 x 5
## outcome covariate1 n percentage chisq_pvalue
## <chr> <chr> <int> <dbl> <dbl>
## 1 n n 8 40. 1.
## 2 n y 3 15. 1.
## 3 y n 7 35. 1.
## 4 y y 2 10. 1.
##
## [[2]]
## # A tibble: 4 x 5
## outcome covariate2 n percentage chisq_pvalue
## <chr> <chr> <int> <dbl> <dbl>
## 1 n n 6 30. 1.
## 2 n y 5 25. 1.
## 3 y n 5 25. 1.
## 4 y y 4 20. 1.
##
## [[3]]
## # A tibble: 3 x 5
## outcome covariate3 n percentage chisq_pvalue
## <chr> <chr> <int> <dbl> <dbl>
## 1 n n 11 55.0 0.0195
## 2 y n 4 20.0 0.0195
## 3 y y 5 25.0 0.0195
我正在尝试使用 dplyr 和 purrr 以编程方式为许多变量生成多个卡方分析。以前都是简单的复制粘贴代码,但是费力而且容易出错。我设法创建了一个函数来生成我想要的输出 table。但是,当我尝试使用 map() 函数时,它会生成如下错误。我认为这可能是我无法理解的与语法相关的内容,可能是由于 "covariates_list." 中引号的处理,如果您能给我任何帮助,我将不胜感激,谢谢。这是带有虚构数据集的代码,格式与我的相同。
library(tidyverse)
# Example data input in similar format to my data
df <- data.frame(stringsAsFactors=FALSE,
id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L),
outcome = c("y", "y", "y", "y", "n", "n", "n", "y", "y", "y", "y", "y",
"n", "n", "n", "n", "n", "n", "n", "n"),
covariate1 = c("y", "n", "n", "n", "n", "y", "n", "n", "n", "n", "y", "n",
"n", "n", "n", "y", "n", "n", "n", "y"),
covariate2 = c("y", "y", "n", "n", "n", "y", "y", "y", "n", "n", "n", "y",
"n", "n", "n", "y", "n", "n", "y", "y"),
covariate3 = c("y", "y", "n", "n", "n", "n", "n", "y", "y", "n", "y", "n",
"n", "n", "n", "n", "n", "n", "n", "n")
)
### Defining a function that will make a frequency table, and add a chisq p value to this.
univariate_table <- function(a,b,dat){
quo_a <- enquo(a)
quo_b <- enquo(b)
z1 <- dat %>% count(!!quo_b,!!quo_a)
z2 <- sum(z1$n)
z3 <- z1 %>% mutate(percentage = n/z2*100)
z4 <- dat %>% summarise(chisq.test(!!quo_a,!!quo_b)$p.value)
z5 <- as.numeric(z4)
z6 <- z3 %>% mutate(chisq_pvalue = z5)
return(z6)
}
### I can get the function to run independantly on each covariate
univariate_table(covariate1,outcome,df)
### Using the code below, I cannot get a purrr / loop / map function to run through a list of the covariates without recieving this error:
#Error in summarise_impl(.data, dots) :
# Evaluation error: 'x' and 'y' must have the same length.
covariates_list <- list("covariate1","covariate2","covariate3")
map(covariates_list,univariate_table,outcome,df)
我们将取消警告:
univariate_table <- function(a, b, dat) {
quo_a <- enquo(a)
quo_b <- enquo(b)
z1 <- dat %>% count(!!quo_b, !!quo_a)
z2 <- sum(z1$n)
z3 <- z1 %>% mutate(percentage = n / z2 * 100)
z4 <- dat %>% summarise(
suppressWarnings(chisq.test(!!quo_a, !!quo_b))$p.value
)
z5 <- as.numeric(z4)
z6 <- z3 %>% mutate(chisq_pvalue = z5)
return(z6)
}
然后稍微修改调用函数的方式:
covariates_list %>%
syms() %>%
map(function(cov) univariate_table(!!cov, outcome, df))
## [[1]]
## # A tibble: 4 x 5
## outcome covariate1 n percentage chisq_pvalue
## <chr> <chr> <int> <dbl> <dbl>
## 1 n n 8 40. 1.
## 2 n y 3 15. 1.
## 3 y n 7 35. 1.
## 4 y y 2 10. 1.
##
## [[2]]
## # A tibble: 4 x 5
## outcome covariate2 n percentage chisq_pvalue
## <chr> <chr> <int> <dbl> <dbl>
## 1 n n 6 30. 1.
## 2 n y 5 25. 1.
## 3 y n 5 25. 1.
## 4 y y 4 20. 1.
##
## [[3]]
## # A tibble: 3 x 5
## outcome covariate3 n percentage chisq_pvalue
## <chr> <chr> <int> <dbl> <dbl>
## 1 n n 11 55.0 0.0195
## 2 y n 4 20.0 0.0195
## 3 y y 5 25.0 0.0195