R markdown 意外事件 table %>% 将具有选择性值的列变量制表
R markdown contingency table %>% tabulate column variables with selective values
我是 R 的新手,来自 Stata。下面是具有可重现数据示例的 r markdown 块。数据代表我正在使用的数据。但只有数量更多的二进制(逻辑)和因子变量。
库和数据:
# Setup and load package:
library(dplyr)
library(expss)
library(hablar)
library(kableExtra)
library(summarytools)
# Load data:
data("mtcars")
raw_df <- select(mtcars,c(wt,cyl,gear,vs,am))
# Data prep and labelling:
df <- raw_df %>%
apply_labels(wt = "Facility ID",
cyl = "Geographical Area",
cyl = c("Area A" = 4,"Area B" = 6, "Area C" = 8),
gear = "Tier",
gear = c("Tier 1" = 3, "Tier 2" = 4, "Tier 3" = 5),
vs = "E.coli",
am = "V.choleri") %>%
convert(chr(wt),
fct(cyl,gear),
lgl(vs,am))
请注意,在我的实际数据中,有更多的分类和逻辑变量。
我已经设法在 r markdown(html 输出)中做出以下 table:
df %>%
tab_cells(cyl, gear) %>%
tab_total_row_position("below") %>%
tab_total_statistic("u_rpct")%>%
tab_total_label("Total hosts (Row proportions)") %>%
tab_cols(vs, am) %>%
tab_stat_rpct() %>%
tab_cols(total(label = "Number of hosts")) %>%
tab_stat_cases() %>%
tab_pivot(stat_position = "outside_columns") %>%
recode(as.criterion(is.numeric) & is.na ~ 0, TRUE ~ copy) %>%
split_table_to_df() %>%
kable(align = "c", digits = 1) %>%
kable_styling(bootstrap_options = c("striped", "condensed", "responsive"),
full_width = F, position = "center") %>%
row_spec(1:2, bold = TRUE)
问题:
1. 我希望我可以只包含 "TRUE" 列,从 table 中删除 "FALSE" 列。但保持第一行标签不变("E. coli"、"V.choleri")。事实上我不需要第二行 ("TRUE","FALSE)
2. 我已经标记了 "Total Row proportion" (#Total hosts),但是无法删除前导的“#”符号。在带有 "Total row proportion" 的行的最右边的列单元格中,它显示“100”。我尝试将其作为列单元格的总和,但失败了。 “100”完全是误导。
3. 我还尝试通过 "summarytools" 包的 "ctable" 函数获得我想要的 table 。由于它具有出色的结构,因此在比例单元格内也诱导了观察次数。 :
print(ctable(df$cyl,df$am), method = 'render')
但问题是它似乎只允许一对分类变量。而且,"FALSE" 不能省略。但最后一列与 rowtotals (observations)
完美结合
详情:
R:4.0.0
R工作室:1.2.5042
这些包都是最新的。
expss
中的表格是常用的 data.frames。列标签只是列名,行用“|”分隔象征。因此,您可以像往常一样操作它们的列名。行标签位于 row_labels
列中,我们可以通过搜索和替换操作删除“#”符号。
"Total row proportion" 显示“100”,因为开始时您将总统计量指定为行百分比,单列的行百分比为 100。
考虑到以上所有因素:
library(dplyr)
library(expss)
library(hablar)
library(kableExtra)
library(summarytools)
# Load data:
data("mtcars")
raw_df <- select(mtcars,c(wt,cyl,gear,vs,am))
# Data prep and labelling:
df <- raw_df %>%
apply_labels(wt = "Facility ID",
cyl = "Geographical Area",
cyl = c("Area A" = 4,"Area B" = 6, "Area C" = 8),
gear = "Tier",
gear = c("Tier 1" = 3, "Tier 2" = 4, "Tier 3" = 5),
vs = "E.coli",
am = "V.choleri") %>%
convert(chr(wt),
fct(cyl,gear),
lgl(vs,am))
tbl = df %>%
tab_cells(cyl, gear) %>%
tab_total_row_position("below") %>%
tab_total_statistic("u_rpct")%>%
tab_total_label("Total hosts (Row proportions)") %>%
tab_cols(vs, am) %>%
tab_stat_rpct() %>%
tab_cols(total(label = "Number of hosts")) %>%
# specify total statistic for last column
tab_stat_cases(total_statistic = "u_cases") %>%
tab_pivot(stat_position = "outside_columns") %>%
recode(as.criterion(is.numeric) & is.na ~ 0, TRUE ~ copy) %>%
# remove columns with FALSE
except(contains("FALSE")) %>%
compute(
# remove '#' sign from row labels
row_labels = gsub("#", "", row_labels)
)
# remove '#' sign from column labels
colnames(tbl) = gsub("\|TRUE", "", colnames(tbl))
tbl %>%
split_table_to_df() %>%
kable(align = "c", digits = 1) %>%
kable_styling(bootstrap_options = c("striped", "condensed", "responsive"),
full_width = F, position = "center") %>%
row_spec(1:2, bold = TRUE)
我是 R 的新手,来自 Stata。下面是具有可重现数据示例的 r markdown 块。数据代表我正在使用的数据。但只有数量更多的二进制(逻辑)和因子变量。
库和数据:
# Setup and load package:
library(dplyr)
library(expss)
library(hablar)
library(kableExtra)
library(summarytools)
# Load data:
data("mtcars")
raw_df <- select(mtcars,c(wt,cyl,gear,vs,am))
# Data prep and labelling:
df <- raw_df %>%
apply_labels(wt = "Facility ID",
cyl = "Geographical Area",
cyl = c("Area A" = 4,"Area B" = 6, "Area C" = 8),
gear = "Tier",
gear = c("Tier 1" = 3, "Tier 2" = 4, "Tier 3" = 5),
vs = "E.coli",
am = "V.choleri") %>%
convert(chr(wt),
fct(cyl,gear),
lgl(vs,am))
请注意,在我的实际数据中,有更多的分类和逻辑变量。 我已经设法在 r markdown(html 输出)中做出以下 table:
df %>%
tab_cells(cyl, gear) %>%
tab_total_row_position("below") %>%
tab_total_statistic("u_rpct")%>%
tab_total_label("Total hosts (Row proportions)") %>%
tab_cols(vs, am) %>%
tab_stat_rpct() %>%
tab_cols(total(label = "Number of hosts")) %>%
tab_stat_cases() %>%
tab_pivot(stat_position = "outside_columns") %>%
recode(as.criterion(is.numeric) & is.na ~ 0, TRUE ~ copy) %>%
split_table_to_df() %>%
kable(align = "c", digits = 1) %>%
kable_styling(bootstrap_options = c("striped", "condensed", "responsive"),
full_width = F, position = "center") %>%
row_spec(1:2, bold = TRUE)
问题: 1. 我希望我可以只包含 "TRUE" 列,从 table 中删除 "FALSE" 列。但保持第一行标签不变("E. coli"、"V.choleri")。事实上我不需要第二行 ("TRUE","FALSE) 2. 我已经标记了 "Total Row proportion" (#Total hosts),但是无法删除前导的“#”符号。在带有 "Total row proportion" 的行的最右边的列单元格中,它显示“100”。我尝试将其作为列单元格的总和,但失败了。 “100”完全是误导。 3. 我还尝试通过 "summarytools" 包的 "ctable" 函数获得我想要的 table 。由于它具有出色的结构,因此在比例单元格内也诱导了观察次数。 :
print(ctable(df$cyl,df$am), method = 'render')
但问题是它似乎只允许一对分类变量。而且,"FALSE" 不能省略。但最后一列与 rowtotals (observations)
完美结合详情: R:4.0.0 R工作室:1.2.5042 这些包都是最新的。
expss
中的表格是常用的 data.frames。列标签只是列名,行用“|”分隔象征。因此,您可以像往常一样操作它们的列名。行标签位于 row_labels
列中,我们可以通过搜索和替换操作删除“#”符号。
"Total row proportion" 显示“100”,因为开始时您将总统计量指定为行百分比,单列的行百分比为 100。
考虑到以上所有因素:
library(dplyr)
library(expss)
library(hablar)
library(kableExtra)
library(summarytools)
# Load data:
data("mtcars")
raw_df <- select(mtcars,c(wt,cyl,gear,vs,am))
# Data prep and labelling:
df <- raw_df %>%
apply_labels(wt = "Facility ID",
cyl = "Geographical Area",
cyl = c("Area A" = 4,"Area B" = 6, "Area C" = 8),
gear = "Tier",
gear = c("Tier 1" = 3, "Tier 2" = 4, "Tier 3" = 5),
vs = "E.coli",
am = "V.choleri") %>%
convert(chr(wt),
fct(cyl,gear),
lgl(vs,am))
tbl = df %>%
tab_cells(cyl, gear) %>%
tab_total_row_position("below") %>%
tab_total_statistic("u_rpct")%>%
tab_total_label("Total hosts (Row proportions)") %>%
tab_cols(vs, am) %>%
tab_stat_rpct() %>%
tab_cols(total(label = "Number of hosts")) %>%
# specify total statistic for last column
tab_stat_cases(total_statistic = "u_cases") %>%
tab_pivot(stat_position = "outside_columns") %>%
recode(as.criterion(is.numeric) & is.na ~ 0, TRUE ~ copy) %>%
# remove columns with FALSE
except(contains("FALSE")) %>%
compute(
# remove '#' sign from row labels
row_labels = gsub("#", "", row_labels)
)
# remove '#' sign from column labels
colnames(tbl) = gsub("\|TRUE", "", colnames(tbl))
tbl %>%
split_table_to_df() %>%
kable(align = "c", digits = 1) %>%
kable_styling(bootstrap_options = c("striped", "condensed", "responsive"),
full_width = F, position = "center") %>%
row_spec(1:2, bold = TRUE)