R-data 的描述性统计数据(长格式)

Descriptive statistics for R-data in a long format

我有重复测量(5 次或更少)的数据,其中包括血压测量。我将数据格式化为长格式,但鉴于我是第一次这样做,我现在不再了解如何获取变量的描述性统计信息。

我的示例数据:

questiondata <- structure(list(id = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 
                                      3, 4, 4, 5, 5, 5, 5, 6, 6, 7, 7, 8, 8), 
                               time = c("time1", "time2", "time3", "time5", "time1", "time2", "time3", "time5", 
                                        "time1", "time2", "time3", "time5", "time4", "time5", "time4", "time5", 
                                        "time4", "time5", "time4", "time5", "time4", "time5", "time4", "time5"), 
                               cohort = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), 
                               systolicBP = c(102, 137, 132, 150, 152, 146, 160.5, 159.5, 144, 138, 137.5, 163, 
                                              137, 147, 125, 141, 147, 150, 148, 167.5, 153.5, 164.5, 159, 123), 
                               diastolicBP = c(56, 99, 78, 90, 77, 78, 80.5, 82, 72, 70, 67.5, 61, 86, 90, 80.5, 
                                               84, 75, 81, 91, 96, 80, 87.5, 87, 79), 
                               egfr = c(78.2, NA, 55.8, NA, NA, NA, 87.6, NA, NA, NA, 75.6, 70.9, 71.9, 71.8, 
                                        47.9, 36.6, 93.7, 81.5, 93.2, 82.1, 92.9, 79.1, 66.6, 55.2)), 
                          row.names = c(NA, -24L), class = c("tbl_df", "tbl", "data.frame"))

对应以下小标题:


# A tibble: 24 x 6
      id time  cohort systolicBP diastolicBP  egfr
   <dbl> <chr>  <dbl>      <dbl>       <dbl> <dbl>
 1 1 time1      1       102         56    78.2
 2 1 time2      1       137         99    NA  
 3 1 time3      1       132         78    55.8
 4 1 time5      1       150         90    NA  
 5 2 time1      1       152         77    NA  
 6 2 time2      1       146         78    NA  
 7 2 time3      1       160.        80.5  87.6
 8 2 time5      1       160.        82    NA  
 9 3 time1      1       144         72    NA  
10 3 time2      1       138         70    NA  
11 3 time3      1       138.        67.5  75.6
12 3 time5      1       163         61    70.9
13 4 time4      2       137         86    71.9
14 4 time5      2       147         90    71.8
15 5 time4      2       125         80.5  47.9
16 5 time5      2       141         84    36.6
17 6 time4      2       147         75    93.7
18 6 time5      2       150         81    81.5
19 7 time4      2       148         91    93.2
20 7 time5      2       168.        96    82.1
21 8 time4      2       154.        80    92.9
22 8 time5      2       164.        87.5  79.1
23 9 time4      2       159         87    66.6
24 9 time5      2       123         79    55.2

例如现在我想

我敢肯定这很容易,但我似乎得不到任何可行的结果。

library(tidyverse)
questiondata %>% 
  group_by(cohort) %>% 
  summarise(n = n_distinct(id),
            mean_systolic = mean(systolicBP, na.rm = TRUE), .groups = "drop")
#> # A tibble: 2 x 3
#>   cohort     n mean_systolic
#>    <dbl> <int>         <dbl>
#> 1      1     3          143.
#> 2      2     6          147.


questiondata %>% 
  group_by(cohort, time) %>% 
  summarise(mean_systolic_time = mean(systolicBP, na.rm = TRUE), .groups = "drop")
#> # A tibble: 6 x 3
#>   cohort time  mean_systolic_time
#>    <dbl> <chr>              <dbl>
#> 1      1 time1               133.
#> 2      1 time2               140.
#> 3      1 time3               143.
#> 4      1 time5               158.
#> 5      2 time4               145.
#> 6      2 time5               149.

reprex package (v2.0.0)

于 2021-06-25 创建
questiondata %>%
  group_by(cohort, time) %>%
  summarise(mean_systolic_time = str_c(round(mean(systolicBP, na.rm = TRUE), digits = 0), " mmHg"),
            .groups = "drop")

# A tibble: 6 x 3
  cohort time  mean_systolic_time
   <dbl> <chr> <chr>             
1      1 time1 133 mmHg          
2      1 time2 140 mmHg          
3      1 time3 143 mmHg          
4      1 time5 158 mmHg          
5      2 time4 145 mmHg          
6      2 time5 149 mmHg