通过频率分布进行数据重塑和汇总
Data reshape and summarization by frequency distribution
我有一个调查得到的数据集:
structure(list(question_1_A...1 = c("A", "A", "B", "C"), question_1_B = c("A",
"B", "B", "C"), question_1_C = c("A", "A", "A", "B"), question_2_A...4 = c("A",
"D", "B", "C"), question_2_B = c("D", NA, "B", "C"), question_2_C = c("E",
"D", "A", "B")), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
我想根据这个图得到一个dataframe:
谢谢!
删除列名中的子字符串后我们可以使用split.default
out <- lapply(split.default(df1, sub(".*_([A-C]).*", "\1", names(df1))),
function(x) {
x1 <- round(t(proportions(table(stack(x)[2:1]), 1)), 2)
x1 })
-输出
> out
$A
ind
values question_1_A...1 question_2_A...4
A 0.50 0.25
B 0.25 0.25
C 0.25 0.25
D 0.00 0.25
$B
ind
values question_1_B question_2_B
A 0.25 0.00
B 0.50 0.33
C 0.25 0.33
D 0.00 0.33
$C
ind
values question_1_C question_2_C
A 0.75 0.25
B 0.25 0.25
D 0.00 0.25
E 0.00 0.25
如果我们想要 gt
,那么我们可以
library(dplyr)
library(tidyr)
library(stringr)
library(gt)
df1 %>%
rename_with(~ str_remove(., "\.+\d+$")) %>%
pivot_longer(cols = everything(), names_to = c(".value", "grp"),
names_pattern = "(.*)_([A-Z])$", values_drop_na = TRUE) %>%
pivot_longer(cols = starts_with('question'),
names_to = 'question', values_drop_na = TRUE) %>%
count(grp, question, value) %>%
group_by(grp, question) %>%
mutate(n = sprintf('(%.2f)', round(n/sum(n), 2))) %>%
ungroup %>%
unite(value, value, n, sep = " ") %>%
mutate(rn = data.table::rowid(grp, question)) %>%
pivot_wider(names_from = question, values_from = value) %>%
select(-rn) %>%
group_by(grp) %>%
gt(.)
-输出
我有一个调查得到的数据集:
structure(list(question_1_A...1 = c("A", "A", "B", "C"), question_1_B = c("A",
"B", "B", "C"), question_1_C = c("A", "A", "A", "B"), question_2_A...4 = c("A",
"D", "B", "C"), question_2_B = c("D", NA, "B", "C"), question_2_C = c("E",
"D", "A", "B")), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
我想根据这个图得到一个dataframe:
谢谢!
删除列名中的子字符串后我们可以使用split.default
out <- lapply(split.default(df1, sub(".*_([A-C]).*", "\1", names(df1))),
function(x) {
x1 <- round(t(proportions(table(stack(x)[2:1]), 1)), 2)
x1 })
-输出
> out
$A
ind
values question_1_A...1 question_2_A...4
A 0.50 0.25
B 0.25 0.25
C 0.25 0.25
D 0.00 0.25
$B
ind
values question_1_B question_2_B
A 0.25 0.00
B 0.50 0.33
C 0.25 0.33
D 0.00 0.33
$C
ind
values question_1_C question_2_C
A 0.75 0.25
B 0.25 0.25
D 0.00 0.25
E 0.00 0.25
如果我们想要 gt
,那么我们可以
library(dplyr)
library(tidyr)
library(stringr)
library(gt)
df1 %>%
rename_with(~ str_remove(., "\.+\d+$")) %>%
pivot_longer(cols = everything(), names_to = c(".value", "grp"),
names_pattern = "(.*)_([A-Z])$", values_drop_na = TRUE) %>%
pivot_longer(cols = starts_with('question'),
names_to = 'question', values_drop_na = TRUE) %>%
count(grp, question, value) %>%
group_by(grp, question) %>%
mutate(n = sprintf('(%.2f)', round(n/sum(n), 2))) %>%
ungroup %>%
unite(value, value, n, sep = " ") %>%
mutate(rn = data.table::rowid(grp, question)) %>%
pivot_wider(names_from = question, values_from = value) %>%
select(-rn) %>%
group_by(grp) %>%
gt(.)
-输出