修改列联表
Modifying Contingency Tables
我正在使用 R.
具有以下数据:
set.seed(123)
v1 <- c("2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015")
v2 <- c("A", "B", "C", "D", "E")
v3 <- c("Z", "Y", "X", "W" )
data_1 = data.frame(var_1 = rnorm(871, 10,10), var_2 = rnorm(871, 5,5))
data_1$dates <- as.factor(sample(v1, 871, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
data_1$types <- as.factor(sample(v2, 871, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
data_1$types2 <- as.factor(sample(v3, 871, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
data_2 = data.frame(var_1 = rnorm(412, 10,10), var_2 = rnorm(412, 5,5))
data_2$dates <- as.factor(sample(v1, 412, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
data_2$types <- as.factor(sample(v2, 412, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
data_2$types2 <- as.factor(sample(v3, 412, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
data_3 = data.frame(var_1 = rnorm(332, 10,10), var_2 = rnorm(332, 5,5))
data_3$dates <- as.factor(sample(v1, 332, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
data_3$types <- as.factor(sample(v2, 332, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
data_3$types2 <- as.factor(sample(v3, 332, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
然后我将它们全部组合成一个数据框 ("problem_data"):
data_1 <- data.frame(name="data_1", data_1)
data_2 <- data.frame(name="data_2", data_2)
data_3 <- data.frame(name="data_3", data_3)
problem_data <- rbind(data_1, data_2, data_3)
然后我做了以下应急措施table:
library(memisc)
summary <- xtabs(~dates+name+types+types2, problem_data)
t = ftable(summary, row.vars=1, col.vars=2:4)
show_html(t)
只使用xtabs()和ftable()命令,是否可以修改上面的偶发事件table,使其看起来像这样?
- data_1 : 2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015"
- data_2: 2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015"
- 等等
我在 xtabs()
命令中尝试了不同的排列:
# permutation 1
summary1 <- xtabs(~dates+name+types+types2, problem_data)
t1 = ftable(summary1, row.vars=1, col.vars=2:4)
show_html(t1)
# permutation 2
summary2 <- xtabs(~name+dates+types+types2, problem_data)
t2 = ftable(summary2, row.vars=1, col.vars=2:4)
show_html(t2)
# permutation 3
summary3 <- xtabs(~types+name+dates+types2, problem_data)
t3 = ftable(summary3, row.vars=1, col.vars=2:4)
show_html(t3)
# permutation 4
summary4 <- xtabs(~types2+dates+name+types2, problem_data)
t4 = ftable(summary4, row.vars=1, col.vars=2:4)
show_html(t4)
但到目前为止,似乎没有任何效果。
有人可以告诉我怎么做吗?
谢谢!
您需要设置一个属性,参见:https://adv-r.hadley.nz/vectors-chap.html?q=attr()#getting-and-setting
attr(t, "col.vars")$name <- c(
"data 1: along list of years",
"data 2: another one",
"data 3: yadada"
)
编辑以提供更多关于属性的信息。
在 R 中,您可以借助属性在向量之上构建对象。
看这个例子:
df <- data.frame( x = 1:2,
y = LETTERS[1:2])
attributes(df)
当你做 names(df)
这非常接近做 attr(df, "names")
。
names(df)
是您可以调用的获取函数,它允许您从对象中获取值。
如果你这样做:
names(df) <- c("foo", "bar")
您只是更改名称,而不是对对象进行任何其他修改,这称为设置。我可能对术语有点不理解,但这是你应该得到的想法。
之后,当您想了解如何在 R 中创建一个对象时,您可以使用 attributes
和 structure
来了解一下。 ftable
上的文档在这里也有帮助 (https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/ftable)。参见:
This information is then re-arranged as a matrix whose rows and columns correspond to unique combinations of the levels of the row and column variables (as specified by row.vars and col.vars, respectively).
之后您只需调用 attr(object, "name_of_attributes")
。这里它给了我们一个包含 3 个向量(名称、类型和类型 2)的列表。我假设你可以用这个函数做更复杂的事情(比如 types3、types4 等等)。最后只是用 names()
做同样的逻辑来“设置”你想要的修改。
您可能可以进一步微调它!
memisc
包允许的自定义程度不足以满足您的要求。对于这类任务,我建议使用像 flextable
这样的包。这是代码
library(flextable)
library(dplyr)
library(tidyr)
x <- ftable(xtabs(~name+dates+types+types2, problem_data), row.vars = 1:2, col.vars = 3:4)
ft <- as.data.frame(x) |>
arrange(name, dates, types, types2) |>
pivot_wider(names_from = c(types, types2), values_from = Freq)
header <- names(ft)
header1 <- replace(sub("([^_]+)_([^_]+)", "\1", header), 1:2, c("", "types"))
names(header1) <- header
header2 <- replace(sub("([^_]+)_([^_]+)", "\2", header), 2L, "types2")
names(header2) <- header
flextable(ft) |>
merge_v(j = "name") |> # create a merged first column
delete_part("header") |> # remove the old header
add_header(values = header2) |>
add_header(values = header1) |> # recreate headers
merge_h(part = "header") |> # merge accordingly
align(align = "left", part = "all") |>
hline_top(border = officer::fp_border(width = 1L), part = "header") |>
hline_bottom(border = officer::fp_border(width = 1L), part = "header") |>
hline_bottom(border = officer::fp_border(width = 1L), part = "body") |>
fix_border_issues() |>
fit_to_width(15L) |> # set the table width to your desired one. I use 15 inches for demonstration.
save_as_html(path = "test.html")
输出(“test.html”)看起来像这样
如果使用 memisc
,AFAIK,这是您可以获得的最佳结果
x <- ftable(xtabs(~name+dates+types+types2, problem_data), row.vars = 1:2, col.vars = 3:4)
memisc::show_html(x)
我正在使用 R.
具有以下数据:
set.seed(123)
v1 <- c("2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015")
v2 <- c("A", "B", "C", "D", "E")
v3 <- c("Z", "Y", "X", "W" )
data_1 = data.frame(var_1 = rnorm(871, 10,10), var_2 = rnorm(871, 5,5))
data_1$dates <- as.factor(sample(v1, 871, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
data_1$types <- as.factor(sample(v2, 871, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
data_1$types2 <- as.factor(sample(v3, 871, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
data_2 = data.frame(var_1 = rnorm(412, 10,10), var_2 = rnorm(412, 5,5))
data_2$dates <- as.factor(sample(v1, 412, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
data_2$types <- as.factor(sample(v2, 412, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
data_2$types2 <- as.factor(sample(v3, 412, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
data_3 = data.frame(var_1 = rnorm(332, 10,10), var_2 = rnorm(332, 5,5))
data_3$dates <- as.factor(sample(v1, 332, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
data_3$types <- as.factor(sample(v2, 332, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
data_3$types2 <- as.factor(sample(v3, 332, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
然后我将它们全部组合成一个数据框 ("problem_data"):
data_1 <- data.frame(name="data_1", data_1)
data_2 <- data.frame(name="data_2", data_2)
data_3 <- data.frame(name="data_3", data_3)
problem_data <- rbind(data_1, data_2, data_3)
然后我做了以下应急措施table:
library(memisc)
summary <- xtabs(~dates+name+types+types2, problem_data)
t = ftable(summary, row.vars=1, col.vars=2:4)
show_html(t)
只使用xtabs()和ftable()命令,是否可以修改上面的偶发事件table,使其看起来像这样?
- data_1 : 2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015"
- data_2: 2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015"
- 等等
我在 xtabs()
命令中尝试了不同的排列:
# permutation 1
summary1 <- xtabs(~dates+name+types+types2, problem_data)
t1 = ftable(summary1, row.vars=1, col.vars=2:4)
show_html(t1)
# permutation 2
summary2 <- xtabs(~name+dates+types+types2, problem_data)
t2 = ftable(summary2, row.vars=1, col.vars=2:4)
show_html(t2)
# permutation 3
summary3 <- xtabs(~types+name+dates+types2, problem_data)
t3 = ftable(summary3, row.vars=1, col.vars=2:4)
show_html(t3)
# permutation 4
summary4 <- xtabs(~types2+dates+name+types2, problem_data)
t4 = ftable(summary4, row.vars=1, col.vars=2:4)
show_html(t4)
但到目前为止,似乎没有任何效果。
有人可以告诉我怎么做吗?
谢谢!
您需要设置一个属性,参见:https://adv-r.hadley.nz/vectors-chap.html?q=attr()#getting-and-setting
attr(t, "col.vars")$name <- c(
"data 1: along list of years",
"data 2: another one",
"data 3: yadada"
)
编辑以提供更多关于属性的信息。
在 R 中,您可以借助属性在向量之上构建对象。
看这个例子:
df <- data.frame( x = 1:2,
y = LETTERS[1:2])
attributes(df)
当你做 names(df)
这非常接近做 attr(df, "names")
。
names(df)
是您可以调用的获取函数,它允许您从对象中获取值。
如果你这样做:
names(df) <- c("foo", "bar")
您只是更改名称,而不是对对象进行任何其他修改,这称为设置。我可能对术语有点不理解,但这是你应该得到的想法。
之后,当您想了解如何在 R 中创建一个对象时,您可以使用 attributes
和 structure
来了解一下。 ftable
上的文档在这里也有帮助 (https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/ftable)。参见:
This information is then re-arranged as a matrix whose rows and columns correspond to unique combinations of the levels of the row and column variables (as specified by row.vars and col.vars, respectively).
之后您只需调用 attr(object, "name_of_attributes")
。这里它给了我们一个包含 3 个向量(名称、类型和类型 2)的列表。我假设你可以用这个函数做更复杂的事情(比如 types3、types4 等等)。最后只是用 names()
做同样的逻辑来“设置”你想要的修改。
您可能可以进一步微调它!
memisc
包允许的自定义程度不足以满足您的要求。对于这类任务,我建议使用像 flextable
这样的包。这是代码
library(flextable)
library(dplyr)
library(tidyr)
x <- ftable(xtabs(~name+dates+types+types2, problem_data), row.vars = 1:2, col.vars = 3:4)
ft <- as.data.frame(x) |>
arrange(name, dates, types, types2) |>
pivot_wider(names_from = c(types, types2), values_from = Freq)
header <- names(ft)
header1 <- replace(sub("([^_]+)_([^_]+)", "\1", header), 1:2, c("", "types"))
names(header1) <- header
header2 <- replace(sub("([^_]+)_([^_]+)", "\2", header), 2L, "types2")
names(header2) <- header
flextable(ft) |>
merge_v(j = "name") |> # create a merged first column
delete_part("header") |> # remove the old header
add_header(values = header2) |>
add_header(values = header1) |> # recreate headers
merge_h(part = "header") |> # merge accordingly
align(align = "left", part = "all") |>
hline_top(border = officer::fp_border(width = 1L), part = "header") |>
hline_bottom(border = officer::fp_border(width = 1L), part = "header") |>
hline_bottom(border = officer::fp_border(width = 1L), part = "body") |>
fix_border_issues() |>
fit_to_width(15L) |> # set the table width to your desired one. I use 15 inches for demonstration.
save_as_html(path = "test.html")
输出(“test.html”)看起来像这样
如果使用 memisc
,AFAIK,这是您可以获得的最佳结果
x <- ftable(xtabs(~name+dates+types+types2, problem_data), row.vars = 1:2, col.vars = 3:4)
memisc::show_html(x)