将不同位置的行添加到 R 中的意外事件 Table
Adding Rows at Different Positions to a Contingency Table in R
我正在使用 R。对于我生成的这个随机数据集,我创建了以下偶然事件 table:
library(memisc)
library(dplyr)
set.seed(123)
v1 <- c("2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015")
v2 <- c("A", "B", "C", "D", "E")
v3 <- c("Z", "Y", "X", "W" )
v4 <- c("data_1", "data_2", "data_3", "data_4" )
dates <- as.factor(sample(v1, 1000, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
types <- as.factor(sample(v2,1000, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
types2 <- as.factor(sample(v3, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
names <- as.factor(sample(v3, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
var = rnorm(1000,10,10)
problem_data = data.frame(var,dates, types, types2, names)
summary <- xtabs(~dates+names+types+types2, problem_data)
t = ftable(summary, row.vars=1, col.vars=2:4)
show_html(t)
如果我想在此 table 的底部添加包含“总计”的行,我可以按如下方式进行:
totals <- problem_data %>% group_by(names, types, types2) %>% summarise(totals = n())
memisc::show_html(rbind(t, totals = totals$totals), varinfront = FALSE)
是否可以在这个偶然事件中的任意位置添加“总计”table?
例如,假设我要查找前两行(2010-2011、2011-2012)的总计,然后将这个总计插入到第三行的table中。我可以计算前两行的总数:
first_two_rows = subset(problem_data, dates %in% c("2010-2011","2011-2012"))
totals_first_two_rows <- first_two_rows %>% group_by(names, types, types2) %>% summarise(totals = n())
但是这个“totals_first_two_rows”怎么能加到偶数的第三个位置table?用这个Whosebugpost( Add new row to dataframe, at specific row-index, not appended?),我尝试使用答案中提供的功能:
insertRow <- function(existingDF, newrow, r) {
existingDF[seq(r+1,nrow(existingDF)+1),] <- existingDF[seq(r,nrow(existingDF)),]
existingDF[r,] <- newrow
existingDF
}
insertRow(t, totals_first_two_rows, 3)
但是这个returns出现以下错误:
Error in `[<-`(`*tmp*`, seq(r + 1, nrow(existingDF) + 1), , value = existingDF[seq(r, :
subscript out of bounds
有人可以告诉我如何解决这个问题吗?
谢谢!
insertRow
不起作用,因为 t
不是 data.frame
(令我惊讶的是 rbind(t, totals = totals$totals)
起作用)。
如果你想要不常见的 table 格式,我认为你无法避免将其设为 semi-manually.
这需要时间,但您可以完全自定义。
我介绍套餐flextable
(下面是例子):
注意:
!!sym(str)
和 !!!syms(strs)
是在 dplyr
函数中使用字符串 colname 的技术。
例如,iris %>% mutate(!!sym("colname") := !!sym("Sepal.Length") * 10)
表示 iris %>% mutate(colname = Sepal.Length * 10)
加载包和数据准备
library(memisc)
library(dplyr)
library(tidyr)
library(flextable)
library(officer)
set.seed(123)
v1 <- c("2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015")
v2 <- c("A", "B", "C", "D", "E")
v3 <- c("Z", "Y", "X", "W" )
v4 <- c("data_1", "data_2", "data_3", "data_4" )
dates <- as.factor(sample(v1, 1000, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
types <- as.factor(sample(v2,1000, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
types2 <- as.factor(sample(v3, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
names <- as.factor(sample(v4, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1))) # modified
var <- rnorm(1000,10,10)
problem_data <- data.frame(var,dates, types, types2, names)
contingency_table <- problem_data %>% # this code comes from your previous question (a little changed).
group_by(dates, names, types, types2) %>%
summarise(value = n(), .groups = "drop") # I summarize_function (from mean() to n() and colname).
first_two_rows = subset(problem_data, dates %in% c("2010-2011","2011-2012"))
totals_first_two_rows <- first_two_rows %>% group_by(names, types, types2) %>% summarise(totals = n(), .groups = "drop")
制作你要输出的df(但是header被粘贴了)
# variable preparation
ind_cols <- c("names", "types", "types2")
ind_rows <- c("dates")
ind_sep = "__" # use to make temp_header (pasted to single string; e.g., header1__header2__ ...)
# convet long to wide
base_data <- contingency_table %>%
tidyr::pivot_wider(names_from = ind_cols, values_from = value,
names_sep = ind_sep, names_sort = TRUE) %>%
arrange_at(ind_rows) %>%
mutate_if(is.factor, as.character)
# dates data_1__A__W data_1__A__X data_1__A__Y ...
# <chr> <int> <int> <int>
# 1 2010-2011 4 2 21
# 2 2011-2012 2 2 7
# 3 2012-2013 NA NA 2
# 4 2013-2014 2 2 1
# 5 2014-2015 1 NA 6
totals_f2r_wide <- totals_first_two_rows %>%
mutate(dates = "totals") %>% # this colname is important. it must be same as the colname what you want to put the value.
tidyr::pivot_wider(names_from = ind_cols, values_from = totals,
names_sep = ind_sep, names_sort = TRUE)
# dates data_1__A__W data_1__A__X data_1__A__Y
# <chr> <int> <int> <int>
# 1 totals 6 4 28
base_data2 <- bind_rows(base_data[1:2,], totals_f2r_wide, base_data[3:nrow(base_data),]) %>%
mutate_if(is.numeric, ~ replace_na(.x, 0))
## add header description col
# if you don't want it, please skip this part.
base_data3 <- base_data2 %>%
mutate(!!sym(paste0(ind_cols, sep = ":", collapse = ind_sep)) := NA) %>%
select(one_of(ind_rows), paste0(ind_cols, sep = ":", collapse = ind_sep), names(.)) # column order change
# dates `names:__types:__types2:` data_1__A__W data_1__A__X
# <chr> <lgl> <dbl> <dbl>
# 1 2010-2011 NA 4 2
# 2 2011-2012 NA 2 2
# 3 totals NA 6 4
# 4 2012-2013 NA 0 0
# 5 2013-2014 NA 2 2
# 6 2014-2015 NA 1 0
header 资料准备
计算header的每个元素有多少行。
header_info_maker <- function(base_data) {
pasted_ind <- base_data %>% colnames()
ind_num <- length(ind_cols)
ind_d <- tibble(a = pasted_ind) %>%
separate(a, into = letters[1:ind_num], sep = ind_sep, fill = "left") %>% # warning occurred, but no problem
mutate_all(~ replace_na(.x, ""))
# a b c
# <chr> <chr> <chr>
# 1 "" "" dates
# 2 "names:" "types:" types2:
# 3 "data_1" "A" W
# 4 "data_1" "A" X
# 5 "data_1" "A" Y
# 6 "data_1" "A" Z
# 7 "data_1" "B" W
group_ind <- ind_d %>%
mutate_all(~ cumsum(.x != lag(.x, default = "xxxxx")))
# a b c
# <int> <int> <int>
# 1 1 1 1
# 2 2 2 2
# 3 3 3 3
# 4 3 3 4
# 5 3 3 5
# 6 3 3 6
# 7 3 4 7
sapply(1:ncol(ind_d), function(x){
tibble(ind = ind_d[[x]], g_ind = group_ind[[x]]) %>%
count(g_ind, ind) %>%
select(-g_ind) %>%
as.list()
}, simplify = FALSE)
}
header_info <- header_info_maker(base_data3)
制作table
# convert df to flextable and delete origin pasted header.
ft <- flextable(base_data3) %>% delete_part(part = "header")
# add header
for(i in length(header_info):1){
ft <- add_header_row(ft, colwidths = header_info[[i]]$n, values = header_info[[i]]$ind)
}
# change design
ft <- ft %>%
theme_vanilla() %>%
vline(j = 1, border = fp_border(width = 2)) %>%
align(align = "center", part = "all") %>%
hline(i = c(2,3), border = fp_border(width = 2))
ft
我正在使用 R。对于我生成的这个随机数据集,我创建了以下偶然事件 table:
library(memisc)
library(dplyr)
set.seed(123)
v1 <- c("2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015")
v2 <- c("A", "B", "C", "D", "E")
v3 <- c("Z", "Y", "X", "W" )
v4 <- c("data_1", "data_2", "data_3", "data_4" )
dates <- as.factor(sample(v1, 1000, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
types <- as.factor(sample(v2,1000, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
types2 <- as.factor(sample(v3, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
names <- as.factor(sample(v3, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
var = rnorm(1000,10,10)
problem_data = data.frame(var,dates, types, types2, names)
summary <- xtabs(~dates+names+types+types2, problem_data)
t = ftable(summary, row.vars=1, col.vars=2:4)
show_html(t)
如果我想在此 table 的底部添加包含“总计”的行,我可以按如下方式进行:
totals <- problem_data %>% group_by(names, types, types2) %>% summarise(totals = n())
memisc::show_html(rbind(t, totals = totals$totals), varinfront = FALSE)
是否可以在这个偶然事件中的任意位置添加“总计”table?
例如,假设我要查找前两行(2010-2011、2011-2012)的总计,然后将这个总计插入到第三行的table中。我可以计算前两行的总数:
first_two_rows = subset(problem_data, dates %in% c("2010-2011","2011-2012"))
totals_first_two_rows <- first_two_rows %>% group_by(names, types, types2) %>% summarise(totals = n())
但是这个“totals_first_two_rows”怎么能加到偶数的第三个位置table?用这个Whosebugpost( Add new row to dataframe, at specific row-index, not appended?),我尝试使用答案中提供的功能:
insertRow <- function(existingDF, newrow, r) {
existingDF[seq(r+1,nrow(existingDF)+1),] <- existingDF[seq(r,nrow(existingDF)),]
existingDF[r,] <- newrow
existingDF
}
insertRow(t, totals_first_two_rows, 3)
但是这个returns出现以下错误:
Error in `[<-`(`*tmp*`, seq(r + 1, nrow(existingDF) + 1), , value = existingDF[seq(r, :
subscript out of bounds
有人可以告诉我如何解决这个问题吗?
谢谢!
insertRow
不起作用,因为 t
不是 data.frame
(令我惊讶的是 rbind(t, totals = totals$totals)
起作用)。
如果你想要不常见的 table 格式,我认为你无法避免将其设为 semi-manually.
这需要时间,但您可以完全自定义。
我介绍套餐flextable
(下面是例子):
注意:
!!sym(str)
和 !!!syms(strs)
是在 dplyr
函数中使用字符串 colname 的技术。
例如,iris %>% mutate(!!sym("colname") := !!sym("Sepal.Length") * 10)
表示 iris %>% mutate(colname = Sepal.Length * 10)
加载包和数据准备
library(memisc)
library(dplyr)
library(tidyr)
library(flextable)
library(officer)
set.seed(123)
v1 <- c("2010-2011","2011-2012", "2012-2013", "2013-2014", "2014-2015")
v2 <- c("A", "B", "C", "D", "E")
v3 <- c("Z", "Y", "X", "W" )
v4 <- c("data_1", "data_2", "data_3", "data_4" )
dates <- as.factor(sample(v1, 1000, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
types <- as.factor(sample(v2,1000, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
types2 <- as.factor(sample(v3, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1)))
names <- as.factor(sample(v4, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.1, 0.1))) # modified
var <- rnorm(1000,10,10)
problem_data <- data.frame(var,dates, types, types2, names)
contingency_table <- problem_data %>% # this code comes from your previous question (a little changed).
group_by(dates, names, types, types2) %>%
summarise(value = n(), .groups = "drop") # I summarize_function (from mean() to n() and colname).
first_two_rows = subset(problem_data, dates %in% c("2010-2011","2011-2012"))
totals_first_two_rows <- first_two_rows %>% group_by(names, types, types2) %>% summarise(totals = n(), .groups = "drop")
制作你要输出的df(但是header被粘贴了)
# variable preparation
ind_cols <- c("names", "types", "types2")
ind_rows <- c("dates")
ind_sep = "__" # use to make temp_header (pasted to single string; e.g., header1__header2__ ...)
# convet long to wide
base_data <- contingency_table %>%
tidyr::pivot_wider(names_from = ind_cols, values_from = value,
names_sep = ind_sep, names_sort = TRUE) %>%
arrange_at(ind_rows) %>%
mutate_if(is.factor, as.character)
# dates data_1__A__W data_1__A__X data_1__A__Y ...
# <chr> <int> <int> <int>
# 1 2010-2011 4 2 21
# 2 2011-2012 2 2 7
# 3 2012-2013 NA NA 2
# 4 2013-2014 2 2 1
# 5 2014-2015 1 NA 6
totals_f2r_wide <- totals_first_two_rows %>%
mutate(dates = "totals") %>% # this colname is important. it must be same as the colname what you want to put the value.
tidyr::pivot_wider(names_from = ind_cols, values_from = totals,
names_sep = ind_sep, names_sort = TRUE)
# dates data_1__A__W data_1__A__X data_1__A__Y
# <chr> <int> <int> <int>
# 1 totals 6 4 28
base_data2 <- bind_rows(base_data[1:2,], totals_f2r_wide, base_data[3:nrow(base_data),]) %>%
mutate_if(is.numeric, ~ replace_na(.x, 0))
## add header description col
# if you don't want it, please skip this part.
base_data3 <- base_data2 %>%
mutate(!!sym(paste0(ind_cols, sep = ":", collapse = ind_sep)) := NA) %>%
select(one_of(ind_rows), paste0(ind_cols, sep = ":", collapse = ind_sep), names(.)) # column order change
# dates `names:__types:__types2:` data_1__A__W data_1__A__X
# <chr> <lgl> <dbl> <dbl>
# 1 2010-2011 NA 4 2
# 2 2011-2012 NA 2 2
# 3 totals NA 6 4
# 4 2012-2013 NA 0 0
# 5 2013-2014 NA 2 2
# 6 2014-2015 NA 1 0
header 资料准备
计算header的每个元素有多少行。
header_info_maker <- function(base_data) {
pasted_ind <- base_data %>% colnames()
ind_num <- length(ind_cols)
ind_d <- tibble(a = pasted_ind) %>%
separate(a, into = letters[1:ind_num], sep = ind_sep, fill = "left") %>% # warning occurred, but no problem
mutate_all(~ replace_na(.x, ""))
# a b c
# <chr> <chr> <chr>
# 1 "" "" dates
# 2 "names:" "types:" types2:
# 3 "data_1" "A" W
# 4 "data_1" "A" X
# 5 "data_1" "A" Y
# 6 "data_1" "A" Z
# 7 "data_1" "B" W
group_ind <- ind_d %>%
mutate_all(~ cumsum(.x != lag(.x, default = "xxxxx")))
# a b c
# <int> <int> <int>
# 1 1 1 1
# 2 2 2 2
# 3 3 3 3
# 4 3 3 4
# 5 3 3 5
# 6 3 3 6
# 7 3 4 7
sapply(1:ncol(ind_d), function(x){
tibble(ind = ind_d[[x]], g_ind = group_ind[[x]]) %>%
count(g_ind, ind) %>%
select(-g_ind) %>%
as.list()
}, simplify = FALSE)
}
header_info <- header_info_maker(base_data3)
制作table
# convert df to flextable and delete origin pasted header.
ft <- flextable(base_data3) %>% delete_part(part = "header")
# add header
for(i in length(header_info):1){
ft <- add_header_row(ft, colwidths = header_info[[i]]$n, values = header_info[[i]]$ind)
}
# change design
ft <- ft %>%
theme_vanilla() %>%
vline(j = 1, border = fp_border(width = 2)) %>%
align(align = "center", part = "all") %>%
hline(i = c(2,3), border = fp_border(width = 2))
ft