在 R 脚本中创建包含每个问题选项总百分比的报告
Create a report with total percentages per question option in R script
我有一个包含组和一堆 yes/no 值的数据集(0=否,1=是)。
我正在尝试仅使用 R 脚本创建一个包含每组总计和漂亮布局的报告。
这是我目前所知道的。
# test data (real data from API service is larger)
df_visits <- read.table(text = "
PATIENT_ID,RAND,UNIT_GT3,HEMOR_YN
1000,ARM1,1,0
1001,ARM2,0,0
1003,ARM1,1,0
1005,ARM1,1,0
1006,ARM2,1,1
1008,ARM2,0,0
1009,ARM1,1,1
1010,ARM2,1,0
1011,ARM1,1,0
1014,ARM2,0,0
1015,ARM2,1,0
1018,ARM1,1,1
", header = TRUE, sep = ",")
# results per group, using 'ifelse' feels clunky but it works
df_per_group <- data.frame(
cbind(
RAND = tapply(df_visits$RAND, list(df_visits$RAND), max, na.rm=TRUE),
UNIT_GT3_Y = tapply(ifelse(df_visits$UNIT_GT3 == 1, 1, 0), list(df_visits$RAND), sum, na.rm=TRUE),
UNIT_GT3_N = tapply(ifelse(df_visits$UNIT_GT3 == 0, 1, 0), list(df_visits$RAND), sum, na.rm=TRUE),
HEMOR_YN_Y = tapply(ifelse(df_visits$HEMOR_YN == 1, 1, 0), list(df_visits$RAND), sum, na.rm=TRUE),
HEMOR_YN_N = tapply(ifelse(df_visits$HEMOR_YN == 0, 1, 0), list(df_visits$RAND), sum, na.rm=TRUE)
)
)
# t{base} = Matrix Transpose
df_pivot <- as.data.frame(t(df_per_group))
生成的 df_pivot 数据框如下所示,我可以使用 write.csv()
:
导出它
# RND ARM1 ARM2
# ------------+-----+-----+
# UNIT_GT3_Y | 6 | 3 |
# UNIT_GT3_N | 0 | 3 |
# HEMOR_YN_Y | 2 | 1 |
# HEMOR_YN_N | 4 | 5 |
# ------------+-----+-----+
但是,我希望目标数据集也有每个问题的总百分比,并且有更好的问题标签,所以像这样:
# RND ARM1 ARM2
# --------------+-------------+------------+
# Units>3 : Yes | 6 (100%) | 3 (50%) |
# No | 0 (0%) | 3 (50%) |
# --------------+-------------+------------+
# Hemmorage Yes | 2 (33.3%) | 1 (16.7%) |
# No | 4 (66.7%) | 5 (83.3%) |
# --------------+-------------+------------+
有没有办法像这样更改标签并添加每个问题的百分比?
另外,我是 R 脚本的新手,所以是否有更有效的方法来获得所需的结果?
您可以使用 tables
包,它允许您直接从初始数据生成摘要 table :
library(tables)
tables::tabular(((`Units>3` = factor(ifelse(UNIT_GT3==0,'No','Yes'),levels=c('Yes','No'))) +
(Hemmorage = factor(ifelse(HEMOR_YN==0,'No','Yes'),levels=c('Yes','No'))))
~(RND = factor(RAND))* ((n=1) + Percent(Equal(factor(RAND))))
,data = df_visits)
RND
ARM1 ARM2
n Percent n Percent
Units>3 Yes 6 100.00 3 50.00
No 0 0.00 3 50.00
Hemmorage Yes 2 33.33 1 16.67
No 4 66.67 5 83.33
编辑:在评论中关注您的问题:
tables::tabular(((`Units>3` = factor(ifelse(UNIT_GT3==0,'No','Yes'),levels=c('Yes','No'))) +
(Hemmorage = factor(ifelse(HEMOR_YN==0,'No','Yes'),levels=c('Yes','No'))))
~(Total = 1)+(RND = factor(RAND))* ((Nb=1) + Percent(Equal(factor(RAND))))
,data = df_visits)
RND
ARM1 ARM2
Total Nb Percent Nb Percent
Units>3 Yes 9 6 100.00 3 50.00
No 3 0 0.00 3 50.00
Hemmorage Yes 3 2 33.33 1 16.67
No 9 4 66.67 5 83.33
您可以使用 tidyverse 和 kableExtra 包来创建漂亮的表格:
library(tidyverse)
library(kableExtra)
raw <- tibble::tibble(RND = c("UNIT_GT3_Y",
"UNIT_GT3_N",
"HEMOR_YN_Y",
"HEMOR_YN_N"),
ARM1 = c(6,0,2,4),
ARM2 = c(3,3,1,5))
tbl <- raw %>%
mutate(State = if_else(str_detect(str_sub(RND,-1), "Y"), "Yes", "No"),
RND = word(RND, 1,2, sep="_")) %>%
group_by(RND) %>%
mutate_if(is.numeric, ~ paste0(.x, " [",
round((.x /sum(.x)) * 100, 1),
"%]")) %>%
ungroup() %>%
select(RND, State, everything()) %>%
kbl() %>%
kable_paper(full_width = F) %>%
column_spec(1, bold = T) %>%
collapse_rows(columns = 1, valign = "top")
由 reprex package (v0.3.0)
于 2020-12-07 创建
kableExtra 有很多样式选项
我有一个包含组和一堆 yes/no 值的数据集(0=否,1=是)。 我正在尝试仅使用 R 脚本创建一个包含每组总计和漂亮布局的报告。
这是我目前所知道的。
# test data (real data from API service is larger)
df_visits <- read.table(text = "
PATIENT_ID,RAND,UNIT_GT3,HEMOR_YN
1000,ARM1,1,0
1001,ARM2,0,0
1003,ARM1,1,0
1005,ARM1,1,0
1006,ARM2,1,1
1008,ARM2,0,0
1009,ARM1,1,1
1010,ARM2,1,0
1011,ARM1,1,0
1014,ARM2,0,0
1015,ARM2,1,0
1018,ARM1,1,1
", header = TRUE, sep = ",")
# results per group, using 'ifelse' feels clunky but it works
df_per_group <- data.frame(
cbind(
RAND = tapply(df_visits$RAND, list(df_visits$RAND), max, na.rm=TRUE),
UNIT_GT3_Y = tapply(ifelse(df_visits$UNIT_GT3 == 1, 1, 0), list(df_visits$RAND), sum, na.rm=TRUE),
UNIT_GT3_N = tapply(ifelse(df_visits$UNIT_GT3 == 0, 1, 0), list(df_visits$RAND), sum, na.rm=TRUE),
HEMOR_YN_Y = tapply(ifelse(df_visits$HEMOR_YN == 1, 1, 0), list(df_visits$RAND), sum, na.rm=TRUE),
HEMOR_YN_N = tapply(ifelse(df_visits$HEMOR_YN == 0, 1, 0), list(df_visits$RAND), sum, na.rm=TRUE)
)
)
# t{base} = Matrix Transpose
df_pivot <- as.data.frame(t(df_per_group))
生成的 df_pivot 数据框如下所示,我可以使用 write.csv()
:
# RND ARM1 ARM2
# ------------+-----+-----+
# UNIT_GT3_Y | 6 | 3 |
# UNIT_GT3_N | 0 | 3 |
# HEMOR_YN_Y | 2 | 1 |
# HEMOR_YN_N | 4 | 5 |
# ------------+-----+-----+
但是,我希望目标数据集也有每个问题的总百分比,并且有更好的问题标签,所以像这样:
# RND ARM1 ARM2
# --------------+-------------+------------+
# Units>3 : Yes | 6 (100%) | 3 (50%) |
# No | 0 (0%) | 3 (50%) |
# --------------+-------------+------------+
# Hemmorage Yes | 2 (33.3%) | 1 (16.7%) |
# No | 4 (66.7%) | 5 (83.3%) |
# --------------+-------------+------------+
有没有办法像这样更改标签并添加每个问题的百分比?
另外,我是 R 脚本的新手,所以是否有更有效的方法来获得所需的结果?
您可以使用 tables
包,它允许您直接从初始数据生成摘要 table :
library(tables)
tables::tabular(((`Units>3` = factor(ifelse(UNIT_GT3==0,'No','Yes'),levels=c('Yes','No'))) +
(Hemmorage = factor(ifelse(HEMOR_YN==0,'No','Yes'),levels=c('Yes','No'))))
~(RND = factor(RAND))* ((n=1) + Percent(Equal(factor(RAND))))
,data = df_visits)
RND
ARM1 ARM2
n Percent n Percent
Units>3 Yes 6 100.00 3 50.00
No 0 0.00 3 50.00
Hemmorage Yes 2 33.33 1 16.67
No 4 66.67 5 83.33
编辑:在评论中关注您的问题:
tables::tabular(((`Units>3` = factor(ifelse(UNIT_GT3==0,'No','Yes'),levels=c('Yes','No'))) +
(Hemmorage = factor(ifelse(HEMOR_YN==0,'No','Yes'),levels=c('Yes','No'))))
~(Total = 1)+(RND = factor(RAND))* ((Nb=1) + Percent(Equal(factor(RAND))))
,data = df_visits)
RND
ARM1 ARM2
Total Nb Percent Nb Percent
Units>3 Yes 9 6 100.00 3 50.00
No 3 0 0.00 3 50.00
Hemmorage Yes 3 2 33.33 1 16.67
No 9 4 66.67 5 83.33
您可以使用 tidyverse 和 kableExtra 包来创建漂亮的表格:
library(tidyverse)
library(kableExtra)
raw <- tibble::tibble(RND = c("UNIT_GT3_Y",
"UNIT_GT3_N",
"HEMOR_YN_Y",
"HEMOR_YN_N"),
ARM1 = c(6,0,2,4),
ARM2 = c(3,3,1,5))
tbl <- raw %>%
mutate(State = if_else(str_detect(str_sub(RND,-1), "Y"), "Yes", "No"),
RND = word(RND, 1,2, sep="_")) %>%
group_by(RND) %>%
mutate_if(is.numeric, ~ paste0(.x, " [",
round((.x /sum(.x)) * 100, 1),
"%]")) %>%
ungroup() %>%
select(RND, State, everything()) %>%
kbl() %>%
kable_paper(full_width = F) %>%
column_spec(1, bold = T) %>%
collapse_rows(columns = 1, valign = "top")
由 reprex package (v0.3.0)
于 2020-12-07 创建kableExtra 有很多样式选项