需要帮助为 summary_table(dplyr::group_by 编写一个函数
Need help writing a function for summary_table(dplyr::group_by
我正在尝试创建一个函数,以最大限度地减少我必须单独计算所有统计数据(最小值、中值、最大值、平均值、SD 和 NA)的次数。我已经包含了这个大列表的前两部分,以及如何使用该列表。
list("Child Age" =
list("Min" = ~ min(.data$ChildAge,na_rm = TRUE),
"Median" = ~ median(.data$ChildAge,na_rm = TRUE),
"Mean ± SD" = ~ qwraps2::mean_sd(.data$ChildAge,na_rm = TRUE),
"Max" = ~ max(.data$ChildAge,na_rm = TRUE),
"NA (Not factored in analysis)" = ~ percent(sum(is.na(.data$ChildAge)) /length(.data$ChildAge))),
"Child Gender" =
list("Girl" = ~ qwraps2::n_perc(.data$ChildGender == "Girl", na_rm = TRUE),
"Boy" = ~ qwraps2::n_perc(.data$ChildGender == "Boy", na_rm = TRUE))
......
by_clinic_demographic <- summary_table(dplyr::group_by(df, Clinic), demographic_summary)
by_clinic_demographic
我已经尝试设计一个可行的函数:
analysis_func <- function(x=df$StudyID) {
list1 <- list("Min" = min(x,na.rm = TRUE),
"Median" = median(x,na.rm = TRUE),
"Mean ± SD" = qwraps2::mean_sd(x,na_rm = TRUE),
"Max" = max(x,na.rm = TRUE),
"NA (Not factored in analysis)" = percent(sum(is.na(x)) /length(x)))
#str(list1)
return(list1)
}
当我在一个新列表中调用这个函数时:
assessment_summary <-
list("Mother Age" = analysis_func(.data$MotherAge),,
我收到错误:错误:x
必须是公式
当我在 = 符号后添加 ~ 时,例如:
"Min" = ~ min(x,na.rm = TRUE)
然后我得到错误:FUN(X[[i]], ...) 中的错误:仅在具有所有数字变量的数据帧上定义
这里是一个简化的版本来强调我遇到的问题:
analysis_func <- function(x=df$StudyID) {
list1 <- list("Min" = ~ min(x,na.rm = TRUE),
"Median" = ~ median(x,na.rm = TRUE),
"Mean ± SD" = ~ qwraps2::mean_sd(x,na_rm = TRUE),
"Max" = ~ max(x,na.rm = TRUE),
"NA (Not factored in analysis)" = ~ percent(sum(is.na(x)) /length(x)))
return(list1)
}
test_summary <-
list("Scores" = analysis_func(.data$StudyID))
# test_stack <- summary_table(dplyr::group_by(dataframe, s), test_summary)
# test_stack
n = c(2, 3, 5, 4,10,12,rep(10,4))
s = c(rep("aa",5),rep("bb",5))
dataframe <- data.frame (n,s)
test_summary2 <-
list("Scores" =
list("Min" = ~ min(.data$n,na_rm = TRUE),
"Median" = ~ median(.data$n,na_rm = TRUE),
"Mean ± SD" = ~ qwraps2::mean_sd(.data$n,na_rm = TRUE),
"Max" = ~ max(.data$n,na_rm = TRUE),
"NA (Not factored in analysis)" = ~ percent(sum(is.na(.data$n)) /length(.data$n)))
)
test_stack <- summary_table(dplyr::group_by(dataframe, s), test_summary2)
test_stack
如有任何帮助,我们将不胜感激。
我们可以使用这个功能:
analysis_func <- function(x) {
list1 <- list(Min = min(x,na.rm = TRUE),
Median = median(x,na.rm = TRUE),
Mean = mean(x,na.rm = TRUE),
SD = sd(x, na.rm = TRUE),
Max = max(x,na.rm = TRUE),
"NA (Not factored in analysis)" = mean(is.na(x)))
return(list(list1))
}
然后分组调用。
library(dplyr)
dataframe %>% group_by(s) %>% summarise(summary_list = analysis_func(n))
# A tibble: 3 x 2
# s summary_list
# <fct> <list>
#1 aa <named list [6]>
#2 bb <named list [6]>
#3 cc <named list [6]>
如果我们希望输出为单独的列,我们可以添加 unnest_wider
dataframe %>%
group_by(s) %>%
summarise(summary_list = analysis_func(n)) %>%
tidyr::unnest_wider(summary_list)
# A tibble: 3 x 7
# s Min Median Mean SD Max `NA (Not factored in analysis)`
# <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 aa 2 3 3 1.41 4 0
#2 bb 3 3 3 NA 3 0
#3 cc 5 5 5 NA 5 0
我正在尝试创建一个函数,以最大限度地减少我必须单独计算所有统计数据(最小值、中值、最大值、平均值、SD 和 NA)的次数。我已经包含了这个大列表的前两部分,以及如何使用该列表。
list("Child Age" =
list("Min" = ~ min(.data$ChildAge,na_rm = TRUE),
"Median" = ~ median(.data$ChildAge,na_rm = TRUE),
"Mean ± SD" = ~ qwraps2::mean_sd(.data$ChildAge,na_rm = TRUE),
"Max" = ~ max(.data$ChildAge,na_rm = TRUE),
"NA (Not factored in analysis)" = ~ percent(sum(is.na(.data$ChildAge)) /length(.data$ChildAge))),
"Child Gender" =
list("Girl" = ~ qwraps2::n_perc(.data$ChildGender == "Girl", na_rm = TRUE),
"Boy" = ~ qwraps2::n_perc(.data$ChildGender == "Boy", na_rm = TRUE))
......
by_clinic_demographic <- summary_table(dplyr::group_by(df, Clinic), demographic_summary)
by_clinic_demographic
我已经尝试设计一个可行的函数:
analysis_func <- function(x=df$StudyID) {
list1 <- list("Min" = min(x,na.rm = TRUE),
"Median" = median(x,na.rm = TRUE),
"Mean ± SD" = qwraps2::mean_sd(x,na_rm = TRUE),
"Max" = max(x,na.rm = TRUE),
"NA (Not factored in analysis)" = percent(sum(is.na(x)) /length(x)))
#str(list1)
return(list1)
}
当我在一个新列表中调用这个函数时:
assessment_summary <-
list("Mother Age" = analysis_func(.data$MotherAge),,
我收到错误:错误:x
必须是公式
当我在 = 符号后添加 ~ 时,例如:
"Min" = ~ min(x,na.rm = TRUE)
然后我得到错误:FUN(X[[i]], ...) 中的错误:仅在具有所有数字变量的数据帧上定义
这里是一个简化的版本来强调我遇到的问题:
analysis_func <- function(x=df$StudyID) {
list1 <- list("Min" = ~ min(x,na.rm = TRUE),
"Median" = ~ median(x,na.rm = TRUE),
"Mean ± SD" = ~ qwraps2::mean_sd(x,na_rm = TRUE),
"Max" = ~ max(x,na.rm = TRUE),
"NA (Not factored in analysis)" = ~ percent(sum(is.na(x)) /length(x)))
return(list1)
}
test_summary <-
list("Scores" = analysis_func(.data$StudyID))
# test_stack <- summary_table(dplyr::group_by(dataframe, s), test_summary)
# test_stack
n = c(2, 3, 5, 4,10,12,rep(10,4))
s = c(rep("aa",5),rep("bb",5))
dataframe <- data.frame (n,s)
test_summary2 <-
list("Scores" =
list("Min" = ~ min(.data$n,na_rm = TRUE),
"Median" = ~ median(.data$n,na_rm = TRUE),
"Mean ± SD" = ~ qwraps2::mean_sd(.data$n,na_rm = TRUE),
"Max" = ~ max(.data$n,na_rm = TRUE),
"NA (Not factored in analysis)" = ~ percent(sum(is.na(.data$n)) /length(.data$n)))
)
test_stack <- summary_table(dplyr::group_by(dataframe, s), test_summary2)
test_stack
如有任何帮助,我们将不胜感激。
我们可以使用这个功能:
analysis_func <- function(x) {
list1 <- list(Min = min(x,na.rm = TRUE),
Median = median(x,na.rm = TRUE),
Mean = mean(x,na.rm = TRUE),
SD = sd(x, na.rm = TRUE),
Max = max(x,na.rm = TRUE),
"NA (Not factored in analysis)" = mean(is.na(x)))
return(list(list1))
}
然后分组调用。
library(dplyr)
dataframe %>% group_by(s) %>% summarise(summary_list = analysis_func(n))
# A tibble: 3 x 2
# s summary_list
# <fct> <list>
#1 aa <named list [6]>
#2 bb <named list [6]>
#3 cc <named list [6]>
如果我们希望输出为单独的列,我们可以添加 unnest_wider
dataframe %>%
group_by(s) %>%
summarise(summary_list = analysis_func(n)) %>%
tidyr::unnest_wider(summary_list)
# A tibble: 3 x 7
# s Min Median Mean SD Max `NA (Not factored in analysis)`
# <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 aa 2 3 3 1.41 4 0
#2 bb 3 3 3 NA 3 0
#3 cc 5 5 5 NA 5 0