试图在 R 中构建一个大的 table
Trying to build a large table in R
我正在尝试在 R 中构建一个大型 table。 是的,我听说过 table() 函数 - 事实上,我已经使用了好几次在下面的代码中 - 但我正在构建它是因为我不想每天键入 table() 20 次。我计划只使用 xtable + knitr 导出它。这很有用的原因是,对于我们这些必须反复对数据进行制表的人来说,这会节省很多时间。不幸的是,这里的循环有问题:
ESRD <- rep(c("Y", "N"), each=10)
DIABETES <- rep(c("Y", "N", "Y", "N"), c(5, 5, 5, 5))
BLAH <- rep(c("Y", "N"), each=10)
categoricalvariables <- data.frame(ESRD, DIABETES, BLAH)
descriptives <- function(VARIABLEMATRIX){
desc <- matrix(0, ncol=4, nrow=2*ncol(VARIABLEMATRIX) + ncol(VARIABLEMATRIX))
for (i in 1:ncol(VARIABLEMATRIX)){
matper <- matrix(0, nrow=dim(table(VARIABLEMATRIX[ ,i])), ncol=1)
for (i in 1:dim(table(VARIABLEMATRIX[ ,i]))){
matper[i, ] <- paste(round(prop.table(table(VARIABLEMATRIX[ ,i]))[i]*100, 2), "%")
}
matcount <- matrix(0, nrow=dim(table(VARIABLEMATRIX[ ,i])), ncol=1)
for (i in 1:dim(table(VARIABLEMATRIX[ ,i]))){
matcount[i, ] <- table(VARIABLEMATRIX[ ,i])[i]
}
desc[((3*i)-2), ] <- c(colnames(VARIABLEMATRIX)[i], "", "", "")
desc[((3*i)-1):(3*i), ] <- cbind("", names(table(VARIABLEMATRIX[ ,i])), matcount[ ,1], matper[ ,1])
return(desc)
}
}
descriptives(categoricalvariables)
我得到的输出是(显然有一个错误,但我不确定哪里出了问题):
[,1] [,2] [,3] [,4]
[1,] "0" "0" "0" "0"
[2,] "0" "0" "0" "0"
[3,] "0" "0" "0" "0"
[4,] "DIABETES" "" "" ""
[5,] "" "N" "10" "50 %"
[6,] "" "Y" "10" "50 %"
[7,] "0" "0" "0" "0"
[8,] "0" "0" "0" "0"
[9,] "0" "0" "0" "0"
预期的输出应该是:
[,1] [,2] [,3] [,4]
[1,] "ESRD" "" "" ""
[2,] "" "N" "10" "50 %"
[3,] "" "Y" "10" "50 %"
[4,] "DIABETES" "" "" ""
[5,] "" "N" "10" "50 %"
[6,] "" "Y" "10" "50 %"
[7,] "BLAH" "" "" ""
[8,] "" "N" "10" "50 %"
[9,] "" "Y" "10" "50 %"
这是一种选择:
desc <- function(x) {
af <- table(x)
rf <- prop.table(af) * 100
out <- cbind(Absolute=af, `Relative(%)`=rf)
dimnames(out) <- setNames(dimnames(out), c('Values', 'Frequency'))
out
}
lapply(categoricalvariables, desc)
#$ESRD
# Frequency
#Values Absolute Relative(%)
# N 10 50
# Y 10 50
#
#$DIABETES
# Frequency
#Values Absolute Relative(%)
# N 10 50
# Y 10 50
#
#$BLAH
# Frequency
#Values Absolute Relative(%)
# N 10 50
# Y 10 50
如果你真的想要字符矩阵
tmp <- lapply(categoricalvariables, desc)
out <- do.call(rbind, lapply(names(tmp), function(x) {
rbind(c(x, "", "", ""), cbind("", rownames(tmp[[x]]), tmp[[x]]))
}))
out <- unname(rbind(c("", "", "Abs.Freq", "Rel.Freq"), out))
out
# [,1] [,2] [,3] [,4]
# [1,] "" "" "Abs.Freq" "Rel.Freq"
# [2,] "ESRD" "" "" ""
# [3,] "" "N" "10" "50"
# [4,] "" "Y" "10" "50"
# [5,] "DIABETES" "" "" ""
# [6,] "" "N" "10" "50"
# [7,] "" "Y" "10" "50"
# [8,] "BLAH" "" "" ""
# [9,] "" "N" "10" "50"
#[10,] "" "Y" "10" "50"
我正在尝试在 R 中构建一个大型 table。 是的,我听说过 table() 函数 - 事实上,我已经使用了好几次在下面的代码中 - 但我正在构建它是因为我不想每天键入 table() 20 次。我计划只使用 xtable + knitr 导出它。这很有用的原因是,对于我们这些必须反复对数据进行制表的人来说,这会节省很多时间。不幸的是,这里的循环有问题:
ESRD <- rep(c("Y", "N"), each=10)
DIABETES <- rep(c("Y", "N", "Y", "N"), c(5, 5, 5, 5))
BLAH <- rep(c("Y", "N"), each=10)
categoricalvariables <- data.frame(ESRD, DIABETES, BLAH)
descriptives <- function(VARIABLEMATRIX){
desc <- matrix(0, ncol=4, nrow=2*ncol(VARIABLEMATRIX) + ncol(VARIABLEMATRIX))
for (i in 1:ncol(VARIABLEMATRIX)){
matper <- matrix(0, nrow=dim(table(VARIABLEMATRIX[ ,i])), ncol=1)
for (i in 1:dim(table(VARIABLEMATRIX[ ,i]))){
matper[i, ] <- paste(round(prop.table(table(VARIABLEMATRIX[ ,i]))[i]*100, 2), "%")
}
matcount <- matrix(0, nrow=dim(table(VARIABLEMATRIX[ ,i])), ncol=1)
for (i in 1:dim(table(VARIABLEMATRIX[ ,i]))){
matcount[i, ] <- table(VARIABLEMATRIX[ ,i])[i]
}
desc[((3*i)-2), ] <- c(colnames(VARIABLEMATRIX)[i], "", "", "")
desc[((3*i)-1):(3*i), ] <- cbind("", names(table(VARIABLEMATRIX[ ,i])), matcount[ ,1], matper[ ,1])
return(desc)
}
}
descriptives(categoricalvariables)
我得到的输出是(显然有一个错误,但我不确定哪里出了问题):
[,1] [,2] [,3] [,4]
[1,] "0" "0" "0" "0"
[2,] "0" "0" "0" "0"
[3,] "0" "0" "0" "0"
[4,] "DIABETES" "" "" ""
[5,] "" "N" "10" "50 %"
[6,] "" "Y" "10" "50 %"
[7,] "0" "0" "0" "0"
[8,] "0" "0" "0" "0"
[9,] "0" "0" "0" "0"
预期的输出应该是:
[,1] [,2] [,3] [,4]
[1,] "ESRD" "" "" ""
[2,] "" "N" "10" "50 %"
[3,] "" "Y" "10" "50 %"
[4,] "DIABETES" "" "" ""
[5,] "" "N" "10" "50 %"
[6,] "" "Y" "10" "50 %"
[7,] "BLAH" "" "" ""
[8,] "" "N" "10" "50 %"
[9,] "" "Y" "10" "50 %"
这是一种选择:
desc <- function(x) {
af <- table(x)
rf <- prop.table(af) * 100
out <- cbind(Absolute=af, `Relative(%)`=rf)
dimnames(out) <- setNames(dimnames(out), c('Values', 'Frequency'))
out
}
lapply(categoricalvariables, desc)
#$ESRD
# Frequency
#Values Absolute Relative(%)
# N 10 50
# Y 10 50
#
#$DIABETES
# Frequency
#Values Absolute Relative(%)
# N 10 50
# Y 10 50
#
#$BLAH
# Frequency
#Values Absolute Relative(%)
# N 10 50
# Y 10 50
如果你真的想要字符矩阵
tmp <- lapply(categoricalvariables, desc)
out <- do.call(rbind, lapply(names(tmp), function(x) {
rbind(c(x, "", "", ""), cbind("", rownames(tmp[[x]]), tmp[[x]]))
}))
out <- unname(rbind(c("", "", "Abs.Freq", "Rel.Freq"), out))
out
# [,1] [,2] [,3] [,4]
# [1,] "" "" "Abs.Freq" "Rel.Freq"
# [2,] "ESRD" "" "" ""
# [3,] "" "N" "10" "50"
# [4,] "" "Y" "10" "50"
# [5,] "DIABETES" "" "" ""
# [6,] "" "N" "10" "50"
# [7,] "" "Y" "10" "50"
# [8,] "BLAH" "" "" ""
# [9,] "" "N" "10" "50"
#[10,] "" "Y" "10" "50"