将 "Overall" 列添加到 R 中的摘要 table

Question

我正在创建关于来自 4 个不同区域（西部、中西部、东北部和南部）样本的健康调查数据的汇总统计数据 table，编码为区域 1-4。现在我可以创建一个 table 来显示每个地区的统计数据，但我还想添加一个额外的列来显示样本中所有个体的总体平均值或中位数。我怎样才能做到这一点？我已经包含了到目前为止我已经完成的代码。谢谢！

#the data frame 
structure(list(AGE = c(40L, 23L, 24L, 18L, 30L, 33L, 32L, 63L, 
22L, 24L), FAMSIZE = c(2L, 2L, 2L, 3L, 2L, 6L, 2L, 1L, 2L, 1L
), HYPERTEN = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), ALC = c(0, 2, 
3, 0, 2, 0, 3, 0, 2, 2), region_group = c("Region 4", "Region 3", 
"Region 4", "Region 3", "Region 1", "Region 2", "Region 1", "Region 2", 
"Region 4", "Region 4"), PSU = c(2L, 1L, 2L, 2L, 2L, 1L, 2L, 
2L, 1L, 2L), PERWEIGHT_MERGE = c(31.2, 615.2, 37.6, 1626, 44, 
149.8, 745.2, 984.2, 1512, 399.6), SAMPWEIGHT_MERGE = c(65, 860.4, 
94.4, 9146, 170.8, 310.4, 755.2, 1053.4, 3964.4, 706.2), STRATA = c(6296L, 
6165L, 6296L, 6224L, 6045L, 6083L, 6029L, 6073L, 6287L, 6247L
)), row.names = c(NA, 10L), class = "data.frame") 

#Code for the table 
out1<-sample_survey %>% 
  group_by(region_group) %>% 
  summarise("Number of drinks (mean)"=survey_mean(ALC),
            "Number of drinks (median)"=survey_median(ALC),"Hypertension"=survey_mean(HYPERTEN), "Family 
            Size"=survey_mean(FAMSIZE), "Age"=survey_median(AGE))

out1=t(out1)
out1 

                             [,1]         [,2]         [,3]         [,4]        
region_group                 "Region 1"   "Region 2"   "Region 3"   "Region 4"  
Number of drinks (mean)      "1.663778"   "2.131566"   "1.744107"   "2.009594"  
Number of drinks (mean)_se   "0.1375124"  "0.1245772"  "0.0957500"  "0.1199982" 
Number of drinks (median)    "1"          "2"          "1"          "2"         
Number of drinks (median)_se "0.0000000"  "0.2531528"  "0.0000000"  "0.2533324" 
Hypertension                 "0.1340147"  "0.1685102"  "0.1834528"  "0.1225418" 
Hypertension_se              "0.01623974" "0.01529678" "0.01463019" "0.01475651"
Family \n            Size    "3.121062"   "2.883905"   "3.107202"   "3.265012"  
Family \n            Size_se "0.11668906" "0.07435704" "0.08004129" "0.11138869"
Age                          "30"         "27"         "30"         "28"        
Age_se                       "1.3615690"  "1.0126110"  "0.7616152"  "0.7599972"

Answer 1

虽然我很确定有一些包可以开箱即用地实现这一目标，但实现这一目标的一种方法是重复整个数据集的摘要并在转置之前将表绑定在一起。为此，为了减少代码重复，我将总结代码放在辅助函数中。试试这个：

library(dplyr)
library(srvyr)

# Helper function
mysum <- function(d) {
  d %>%
    summarise(
      "Number of drinks (mean)" = survey_mean(ALC),
      "Number of drinks (median)" = survey_median(ALC),
      "Hypertension" = survey_mean(HYPERTEN), 
      "Family Size" = survey_mean(FAMSIZE), 
      "Age" = survey_median(AGE)
    )
}

sample_survey <- sample_survey %>%
  as_survey_design(strata = region_group)

#Code for the table 
out1<-sample_survey %>% 
  group_by(region_group) %>% 
  mysum()

out2<-sample_survey %>% 
  mysum() %>% 
  mutate(region_group = "Total")

bind_rows(out1, out2) %>% 
  t()
#>                              [,1]         [,2]         [,3]        
#> region_group                 "Region 1"   "Region 2"   "Region 3"  
#> Number of drinks (mean)      "2.50"       "0.00"       "1.00"      
#> Number of drinks (mean)_se   "0.5000000"  "0.0000000"  "1.0000000" 
#> Number of drinks (median)    "2"          "0"          "0"         
#> Number of drinks (median)_se "0.03935085" "0.00000000" "0.07870171"
#> Hypertension                 "0.0"        "0.5"        "0.0"       
#> Hypertension_se              "0.0"        "0.5"        "0.0"       
#> Family Size                  "2.00"       "3.50"       "2.50"      
#> Family Size_se               "0.0000000"  "2.5000000"  "0.5000000" 
#> Age                          "30"         "33"         "18"        
#> Age_se                       "0.07870171" "1.18052560" "0.19675427"
#>                              [,4]         [,5]        
#> region_group                 "Region 4"   "Total"     
#> Number of drinks (mean)      "1.75"       "1.40"      
#> Number of drinks (mean)_se   "0.6291529"  "0.3366502" 
#> Number of drinks (median)    "2"          "2"         
#> Number of drinks (median)_se "0.47133552" "0.50276759"
#> Hypertension                 "0.0"        "0.1"       
#> Hypertension_se              "0.0"        "0.1"       
#> Family Size                  "1.75"       "2.30"      
#> Family Size_se               "0.2500000"  "0.5196152" 
#> Age                          "24"         "24"        
#> Age_se                       "2.82801315" "2.02169598"

Answer 2

您也可以运行相同的计算而不包括 group_by，然后将新的结果列绑定到您所做的第一个 table。（我不得不返回并分别将 survey_mean 和 survey_median 更改为 mean 和 median，否则会抛出一个错误）。

library(dplyr)
library(srvyr)
#the data frame 
sample_survey <- structure(list(AGE = c(40L, 23L, 24L, 18L, 30L, 33L, 32L, 63L, 22L, 24L), 
                       FAMSIZE = c(2L, 2L, 2L, 3L, 2L, 6L, 2L, 1L, 2L, 1L), 
                       HYPERTEN = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0), 
                       ALC = c(0, 2, 3, 0, 2, 0, 3, 0, 2, 2), 
                       region_group = c("Region 4", "Region 3", "Region 4", "Region 3", 
                                        "Region 1", "Region 2", "Region 1", "Region 2", 
                                        "Region 4", "Region 4"), 
                       PSU = c(2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L), 
                       PERWEIGHT_MERGE = c(31.2, 615.2, 37.6, 1626, 44, 149.8, 745.2, 984.2, 1512, 399.6), 
                       SAMPWEIGHT_MERGE = c(65, 860.4, 94.4, 9146, 170.8, 310.4, 755.2, 1053.4, 3964.4, 706.2),
                       STRATA = c(6296L, 6165L, 6296L, 6224L, 6045L, 6083L, 6029L, 6073L, 6287L, 6247L )), row.names = c(NA, 10L), class = "data.frame") 

#Code for the table 
out1<-sample_survey %>% 
  group_by(region_group) %>% 
  summarise("Number of drinks (mean)"=mean(ALC),
            "Number of drinks (median)"=median(ALC),"Hypertension"=mean(HYPERTEN), "Family 
            Size"=mean(FAMSIZE), "Age"=median(AGE))

out1=t(out1)

#new code
out2 <- sample_survey %>%
  summarise("All Regions" = "All Regions",
            "Number of drinks (mean)"=mean(ALC),
            "Number of drinks (median)"=median(ALC),"Hypertension"=mean(HYPERTEN), "Family 
            Size"=mean(FAMSIZE), "Age"=median(AGE))
out2 = t(out2)
cbind(out1, out2)

> cbind(out1, out2)
                          [,1]       [,2]       [,3]       [,4]       [,5]         
region_group              "Region 1" "Region 2" "Region 3" "Region 4" "All Regions"
Number of drinks (mean)   "2.50"     "0.00"     "1.00"     "1.75"     "1.4"        
Number of drinks (median) "2.5"      "0.0"      "1.0"      "2.0"      "2"          
Hypertension              "0.0"      "0.5"      "0.0"      "0.0"      "0.1"        
Family \n            Size "2.00"     "3.50"     "2.50"     "1.75"     "2.3"        
Age                       "31.0"     "48.0"     "20.5"     "24.0"     "27"

将 "Overall" 列添加到 R 中的摘要 table

Add an "Overall" column to a summary table in R

datatable

r

survey

cluster-analysis

multiple-columns