在 R 中使用 purrr 和 dplyr(列表列工作流)的函数中的 if 语句
if-statement in a function using purrr and dplyr (List Column Workflow) in R
我正在尝试估计 A 和 B 两家医院的均值差异。每家医院都有不同的 "groups",我在模拟数据集中给了他们第 1 组和第 2 组。也就是说,我想测试第 1 组和第 2 组中医院 A 和 B 之间的均值差异,此外我还有多个变量(例如 value1 和 value2)。因此,我必须在第 1 组和第 2 组中的医院 A 和 B 之间测试 value1。即使我在最后的调用中指定了 method=1,我也得到了第三种方法(else 部分)。我正在使用推断包进行引导(tidyverse 或 tidymodels 的一部分)。
library(tidyverse)
library(lubridate)
library(readxl)
library(infer)
library(stringr)
library(rlang)
set.seed(1)
A <-data.frame(value1=rnorm(n = 1000, mean = 0.8, sd = 0.2), value2= rnorm(n=10 ,mean=1, sd=0.3))
A$hosp <- "A"
A$group <- sample(1:2,nrow(A) , replace=T)
B= data.frame(value1 = rnorm(n=1200, mean =1 , sd = 0.2), value2= rnorm(n=15, mean=1.1, sd=0.4))
B$hosp <- "B"
B$group <- sample(1:2,nrow(B) , replace=T)
forskel <- bind_rows(A, B) %>%
group_by(group) %>%
nest()
rm(A, B)
下面是我的职责。
bootloop <- function(dataset, procestid, method, reps = 4, alpha = 0.05) {
procestid <- enquo(procestid)
diff_mean <- dataset %>%
mutate(diff_means = map(data, function(.x){.x %>%
group_by(hosp) %>%
summarise(mean(!!procestid, na.rm=TRUE)) %>%
pull() %>%
diff() })) %>%
select(-data)
bootstrap <- dataset %>%
mutate(distribution =map(data, function(.x){ .x %>%
specify(as.formula(paste0(quo_name(procestid), "~ hosp")) ) %>%
generate(reps = reps, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c( "A", "B"))} )) %>%
inner_join(diff_mean, by="group")
if (method==1) {
bootstrap2 <- bootstrap %>% mutate(Bias_Corrected_KI=map2(distribution, diff_means, function(.x, .y){ .x %>%
summarise( l =quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(alpha/2))),
u= quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(1-alpha/2))) )})) }
if (method==2) {
bootstrap2 <- bootstrap %>% mutate(Percentile_KI = map(distribution, function(.x){.x %>%
summarize(l = quantile(stat, alpha/2),
u = quantile(stat, 1 - alpha/2))})) }
else {
bootstrap2 <- bootstrap %>% mutate(SD_KI =map2(distribution, diff_means, function(.x,.y){.x %>%
get_confidence_interval(level = (1 - alpha), type="se", point_estimate = .y)}))
}
return(bootstrap2)
}
procestimes <- list("value1", "value2")
a <- map(syms(procestimes), bootloop , dataset=forskel, method=1 , reps=1000)
a
尽管我在调用中指定了 method=1,但我在 else 语句中得到了第三种形式的置信区间。
[[1]]
# A tibble: 2 x 5
group data distribution diff_means SD_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [1,000 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [1,000 x 2]> <dbl [1]> <tibble [1 x 2]>
[[2]]
# A tibble: 2 x 5
group data distribution diff_means SD_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [1,000 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [1,000 x 2]> <dbl [1]> <tibble [1 x 2]>
我想你忘了嵌套 if 语句,试试这个:
bootloop <- function(dataset, procestid, method, reps = 4, alpha = 0.05) {
procestid <- enquo(procestid)
diff_mean <- dataset %>%
mutate(diff_means = map(data, function(.x){.x %>%
group_by(hosp) %>%
summarise(mean(!!procestid, na.rm=TRUE)) %>%
pull() %>%
diff() })) %>%
select(-data)
bootstrap <- dataset %>%
mutate(distribution =map(data, function(.x){ .x %>%
specify(as.formula(paste0(quo_name(procestid), "~ hosp")) ) %>%
generate(reps = reps, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c( "A", "B"))} )) %>%
inner_join(diff_mean, by="group")
if (method==1) {
bootstrap2 <- bootstrap %>% mutate(Bias_Corrected_KI=map2(distribution, diff_means, function(.x, .y){ .x %>%
summarise( l =quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(alpha/2))),
u= quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(1-alpha/2))) )})) }
else { # here you should open a curly brackets with else, and close it of course
if (method==2) {
bootstrap2 <- bootstrap %>% mutate(Percentile_KI = map(distribution, function(.x){.x %>%
summarize(l = quantile(stat, alpha/2),
u = quantile(stat, 1 - alpha/2))})) }
else {
bootstrap2 <- bootstrap %>% mutate(SD_KI =map2(distribution, diff_means, function(.x,.y){.x %>%
get_confidence_interval(level = (1 - alpha), type="se", point_estimate = .y)}))
}}
return(bootstrap2)
}
结果:
bootloop (forskel, value1, method=1, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means Bias_Corrected_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
> bootloop (forskel, value1, method=2, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means Percentile_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
> bootloop (forskel, value1, method=3, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means SD_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
我正在尝试估计 A 和 B 两家医院的均值差异。每家医院都有不同的 "groups",我在模拟数据集中给了他们第 1 组和第 2 组。也就是说,我想测试第 1 组和第 2 组中医院 A 和 B 之间的均值差异,此外我还有多个变量(例如 value1 和 value2)。因此,我必须在第 1 组和第 2 组中的医院 A 和 B 之间测试 value1。即使我在最后的调用中指定了 method=1,我也得到了第三种方法(else 部分)。我正在使用推断包进行引导(tidyverse 或 tidymodels 的一部分)。
library(tidyverse)
library(lubridate)
library(readxl)
library(infer)
library(stringr)
library(rlang)
set.seed(1)
A <-data.frame(value1=rnorm(n = 1000, mean = 0.8, sd = 0.2), value2= rnorm(n=10 ,mean=1, sd=0.3))
A$hosp <- "A"
A$group <- sample(1:2,nrow(A) , replace=T)
B= data.frame(value1 = rnorm(n=1200, mean =1 , sd = 0.2), value2= rnorm(n=15, mean=1.1, sd=0.4))
B$hosp <- "B"
B$group <- sample(1:2,nrow(B) , replace=T)
forskel <- bind_rows(A, B) %>%
group_by(group) %>%
nest()
rm(A, B)
下面是我的职责。
bootloop <- function(dataset, procestid, method, reps = 4, alpha = 0.05) {
procestid <- enquo(procestid)
diff_mean <- dataset %>%
mutate(diff_means = map(data, function(.x){.x %>%
group_by(hosp) %>%
summarise(mean(!!procestid, na.rm=TRUE)) %>%
pull() %>%
diff() })) %>%
select(-data)
bootstrap <- dataset %>%
mutate(distribution =map(data, function(.x){ .x %>%
specify(as.formula(paste0(quo_name(procestid), "~ hosp")) ) %>%
generate(reps = reps, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c( "A", "B"))} )) %>%
inner_join(diff_mean, by="group")
if (method==1) {
bootstrap2 <- bootstrap %>% mutate(Bias_Corrected_KI=map2(distribution, diff_means, function(.x, .y){ .x %>%
summarise( l =quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(alpha/2))),
u= quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(1-alpha/2))) )})) }
if (method==2) {
bootstrap2 <- bootstrap %>% mutate(Percentile_KI = map(distribution, function(.x){.x %>%
summarize(l = quantile(stat, alpha/2),
u = quantile(stat, 1 - alpha/2))})) }
else {
bootstrap2 <- bootstrap %>% mutate(SD_KI =map2(distribution, diff_means, function(.x,.y){.x %>%
get_confidence_interval(level = (1 - alpha), type="se", point_estimate = .y)}))
}
return(bootstrap2)
}
procestimes <- list("value1", "value2")
a <- map(syms(procestimes), bootloop , dataset=forskel, method=1 , reps=1000)
a
尽管我在调用中指定了 method=1,但我在 else 语句中得到了第三种形式的置信区间。
[[1]]
# A tibble: 2 x 5
group data distribution diff_means SD_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [1,000 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [1,000 x 2]> <dbl [1]> <tibble [1 x 2]>
[[2]]
# A tibble: 2 x 5
group data distribution diff_means SD_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [1,000 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [1,000 x 2]> <dbl [1]> <tibble [1 x 2]>
我想你忘了嵌套 if 语句,试试这个:
bootloop <- function(dataset, procestid, method, reps = 4, alpha = 0.05) {
procestid <- enquo(procestid)
diff_mean <- dataset %>%
mutate(diff_means = map(data, function(.x){.x %>%
group_by(hosp) %>%
summarise(mean(!!procestid, na.rm=TRUE)) %>%
pull() %>%
diff() })) %>%
select(-data)
bootstrap <- dataset %>%
mutate(distribution =map(data, function(.x){ .x %>%
specify(as.formula(paste0(quo_name(procestid), "~ hosp")) ) %>%
generate(reps = reps, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c( "A", "B"))} )) %>%
inner_join(diff_mean, by="group")
if (method==1) {
bootstrap2 <- bootstrap %>% mutate(Bias_Corrected_KI=map2(distribution, diff_means, function(.x, .y){ .x %>%
summarise( l =quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(alpha/2))),
u= quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(1-alpha/2))) )})) }
else { # here you should open a curly brackets with else, and close it of course
if (method==2) {
bootstrap2 <- bootstrap %>% mutate(Percentile_KI = map(distribution, function(.x){.x %>%
summarize(l = quantile(stat, alpha/2),
u = quantile(stat, 1 - alpha/2))})) }
else {
bootstrap2 <- bootstrap %>% mutate(SD_KI =map2(distribution, diff_means, function(.x,.y){.x %>%
get_confidence_interval(level = (1 - alpha), type="se", point_estimate = .y)}))
}}
return(bootstrap2)
}
结果:
bootloop (forskel, value1, method=1, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means Bias_Corrected_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
> bootloop (forskel, value1, method=2, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means Percentile_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
> bootloop (forskel, value1, method=3, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means SD_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>