通过频率分布进行数据重塑和汇总

Data reshape and summarization by frequency distribution

我有一个调查得到的数据集:

structure(list(question_1_A...1 = c("A", "A", "B", "C"), question_1_B = c("A", 
"B", "B", "C"), question_1_C = c("A", "A", "A", "B"), question_2_A...4 = c("A", 
"D", "B", "C"), question_2_B = c("D", NA, "B", "C"), question_2_C = c("E", 
"D", "A", "B")), row.names = c(NA, -4L), class = c("tbl_df", 
"tbl", "data.frame"))

我想根据这个图得到一个dataframe:

谢谢!

删除列名中的子字符串后我们可以使用split.default

out <-  lapply(split.default(df1, sub(".*_([A-C]).*", "\1", names(df1))),
      function(x)  {
     x1 <- round(t(proportions(table(stack(x)[2:1]), 1)), 2)
     x1 })

-输出

> out
$A
      ind
values question_1_A...1 question_2_A...4
     A             0.50             0.25
     B             0.25             0.25
     C             0.25             0.25
     D             0.00             0.25

$B
      ind
values question_1_B question_2_B
     A         0.25         0.00
     B         0.50         0.33
     C         0.25         0.33
     D         0.00         0.33

$C
      ind
values question_1_C question_2_C
     A         0.75         0.25
     B         0.25         0.25
     D         0.00         0.25
     E         0.00         0.25

如果我们想要 gt,那么我们可以

library(dplyr)
library(tidyr)
library(stringr)
library(gt)
df1 %>%
    rename_with(~ str_remove(., "\.+\d+$")) %>%
    pivot_longer(cols = everything(), names_to = c(".value", "grp"), 
      names_pattern = "(.*)_([A-Z])$", values_drop_na = TRUE) %>%
    pivot_longer(cols = starts_with('question'), 
       names_to = 'question', values_drop_na = TRUE) %>%
    count(grp, question, value)  %>% 
    group_by(grp, question) %>% 
    mutate(n =  sprintf('(%.2f)', round(n/sum(n), 2))) %>% 
    ungroup %>%
    unite(value, value, n, sep = " ") %>% 
    mutate(rn = data.table::rowid(grp, question)) %>% 
    pivot_wider(names_from = question, values_from = value) %>% 
    select(-rn) %>%
    group_by(grp) %>%
    gt(.)

-输出