simulate.p.value in chisq.test 导致缺少 df 以及如何抑制

simulate.p.value in chisq.test results in missing df and how to suppress

library(Hmisc)
catTestchisq_sim_p <- function (tab) 
{
  st <- if (!is.matrix(tab) || nrow(tab) < 2 || ncol(tab) < 
            2) 
    list(p.value = NA, statistic = NA, parameter = NA)
  else {
    rowcounts <- tab %*% rep(1, ncol(tab))
    tab <- tab[rowcounts > 0, ]
    if (!is.matrix(tab)) 
      list(p.value = NA, statistic = NA, parameter = NA)
    else chisq.test(tab, correct = FALSE, simulate.p.value = TRUE)
  }
  list(P = st$p.value, stat = st$statistic, df = st$parameter, 
       testname = "Pearson", statname = "Chi-square", namefun = "chisq", 
       latexstat = "\chi^{2}", plotmathstat = "chi^2")
}

基于HmisccatTestchisq函数,我想定义一个类似的函数,catTestchisq_sim_p,其中simulate.p.value选项设置为TRUEchisq.test 中。然后我定义了一个新的summaryM2函数,如下(注意这个和原来的summaryM唯一的区别是我把catTest = catTestchisq改成了catTest = catTestchisq_sim_p)。

summaryM2 <- function (formula, groups = NULL, data = NULL, subset, na.action = na.retain, 
          overall = FALSE, continuous = 10, na.include = FALSE, quant = c(0.025, 
                                                                          0.05, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.95, 
                                                                          0.975), nmin = 100, test = FALSE, conTest = conTestkw, 
          catTest = catTestchisq_sim_p, ordTest = ordTestpo) 
{
  marg <- length(data) && ".marginal." %in% names(data)
  if (marg) 
    formula <- update(formula, . ~ . + .marginal.)
  formula <- Formula(formula)
  Y <- if (!missing(subset) && length(subset)) 
    model.frame(formula, data = data, subset = subset, na.action = na.action)
  else model.frame(formula, data = data, na.action = na.action)
  X <- model.part(formula, data = Y, rhs = 1)
  Y <- model.part(formula, data = Y, lhs = 1)
  getlab <- function(x, default) {
    lab <- attr(x, "label")
    if (!length(lab) || lab == "") 
      default
    else lab
  }
  if (marg) {
    xm <- X$.marginal.
    X$.marginal. <- NULL
  }
  else xm <- rep("", nrow(X))
  if (length(X)) {
    xname <- names(X)
    if (length(xname) == 1 && !length(groups)) 
      groups <- xname
    if (!length(groups) && length(xname) > 1) {
      warnings("Must specify groups when > 1 right hand side variable is present.\ngroups taken as first right hand variable.")
      groups <- xname[1]
    }
    svar <- if (length(xname) == 1) 
      factor(rep(".ALL.", nrow(X)))
    else do.call("interaction", list(X[setdiff(xname, groups)], 
                                     sep = " "))
    group <- X[[groups]]
    glabel <- getlab(group, groups)
  }
  else {
    svar <- factor(rep(".ALL.", nrow(Y)))
    group <- rep("", nrow(Y))
    groups <- group.freq <- NULL
    glabel <- ""
  }
  quants <- unique(c(quant, 0.025, 0.05, 0.125, 0.25, 0.375, 
                     0.5, 0.625, 0.75, 0.875, 0.95, 0.975))
  nv <- ncol(Y)
  nameY <- names(Y)
  R <- list()
  for (strat in levels(svar)) {
    instrat <- svar == strat
    n <- integer(nv)
    type <- n
    comp <- dat <- vector("list", nv)
    names(comp) <- names(dat) <- nameY
    labels <- Units <- vector("character", nv)
    if (test) {
      testresults <- vector("list", nv)
      names(testresults) <- names(comp)
    }
    gr <- group[instrat]
    xms <- xm[instrat]
    if (all(xms != "")) 
      xms <- rep("", length(xms))
    group.freq <- table(gr)
    group.freq <- group.freq[group.freq > 0]
    if (overall) 
      group.freq <- c(group.freq, Combined = sum(group.freq))
    for (i in 1:nv) {
      w <- Y[instrat, i]
      if (length(attr(w, "label"))) 
        labels[i] <- attr(w, "label")
      if (length(attr(w, "units"))) 
        Units[i] <- attr(w, "units")
      if (is.character(w)) 
        w <- as.factor(w)
      if (!inherits(w, "mChoice")) {
        if (!is.factor(w) && !is.logical(w) && length(unique(w[!is.na(w)])) < 
            continuous) 
          w <- as.factor(w)
        s <- !is.na(w)
        if (na.include && !all(s) && length(levels(w))) {
          w <- na.include(w)
          levels(w)[is.na(levels(w))] <- "NA"
          s <- rep(TRUE, length(s))
        }
        n[i] <- sum(s & xms == "")
        w <- w[s]
        g <- gr[s, drop = TRUE]
        if (is.factor(w) || is.logical(w)) {
          tab <- table(w, g)
          if (test) {
            if (is.ordered(w)) 
              testresults[[i]] <- ordTest(g, w)
            else testresults[[i]] <- catTest(tab)
          }
          if (nrow(tab) == 1) {
            b <- casefold(dimnames(tab)[[1]], upper = TRUE)
            pres <- c("1", "Y", "YES", "PRESENT")
            abse <- c("0", "N", "NO", "ABSENT")
            jj <- match(b, pres, nomatch = 0)
            if (jj > 0) 
              bc <- abse[jj]
            else {
              jj <- match(b, abse, nomatch = 0)
              if (jj > 0) 
                bc <- pres[jj]
            }
            if (jj) {
              tab <- rbind(tab, rep(0, ncol(tab)))
              dimnames(tab)[[1]][2] <- bc
            }
          }
          if (overall) 
            tab <- cbind(tab, Combined = apply(tab, 1, 
                                               sum))
          comp[[i]] <- tab
          type[i] <- 1
        }
        else {
          sfn <- function(x, quant) {
            y <- c(quantile(x, quant), Mean = mean(x), 
                   SD = sqrt(var(x)), N = sum(!is.na(x)))
            names(y) <- c(paste0(formatC(100 * quant, 
                                         format = "fg", width = 1, digits = 15), 
                                 "%"), "Mean", "SD", "N")
            y
          }
          qu <- tapply(w, g, sfn, simplify = TRUE, quants)
          if (test) 
            testresults[[i]] <- conTest(g, w)
          if (overall) 
            qu$Combined <- sfn(w, quants)
          comp[[i]] <- matrix(unlist(qu), ncol = length(quants) + 
                                3, byrow = TRUE, dimnames = list(names(qu), 
                                                                 c(format(quants), "Mean", "SD", "N")))
          if (any(group.freq <= nmin)) 
            dat[[i]] <- lapply(split(w, g), nmin = nmin, 
                               function(x, nmin) if (length(x) <= nmin) 
                                 x
                               else NULL)
          type[i] <- 2
        }
      }
      else {
        w <- as.numeric(w) == 1
        n[i] <- sum(!is.na(apply(w, 1, sum)) & xms == 
                      "")
        g <- as.factor(gr)
        ncat <- ncol(w)
        tab <- matrix(NA, nrow = ncat, ncol = length(levels(g)), 
                      dimnames = list(dimnames(w)[[2]], levels(g)))
        if (test) {
          pval <- numeric(ncat)
          names(pval) <- dimnames(w)[[2]]
          d.f. <- stat <- pval
        }
        for (j in 1:ncat) {
          tab[j, ] <- tapply(w[, j], g, sum, simplify = TRUE, 
                             na.rm = TRUE)
          if (test) {
            tabj <- rbind(table(g) - tab[j, ], tab[j, 
                                                   ])
            st <- catTest(tabj)
            pval[j] <- st$P
            stat[j] <- st$stat
            d.f.[j] <- st$df
          }
        }
        if (test) 
          testresults[[i]] <- list(P = pval, stat = stat, 
                                   df = d.f., testname = st$testname, namefun = st$namefun, 
                                   statname = st$statname, latexstat = st$latexstat, 
                                   plotmathstat = st$plotmathstat)
        if (overall) 
          tab <- cbind(tab, Combined = apply(tab, 1, 
                                             sum))
        comp[[i]] <- tab
        type[i] <- 3
      }
    }
    labels <- ifelse(nchar(labels), labels, names(comp))
    R[[strat]] <- list(stats = comp, type = type, group.freq = group.freq, 
                       labels = labels, units = Units, quant = quant, data = dat, 
                       N = sum(!is.na(gr) & xms == ""), n = n, testresults = if (test) testresults)
  }
  structure(list(results = R, group.name = groups, group.label = glabel, 
                 call = call, formula = formula), class = "summaryM")
}

然后我在一个非常简单的测试数据集上使用 summaryM2,但是在打印输出中,d.f 是 NA?

options(digits=3)
set.seed(173)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
treatment <- factor(sample(c("Drug","Placebo"), 500, rep=TRUE))
f = summaryM2(sex ~ treatment, test=TRUE, overall = TRUE)
latex(f, vnames = "labels",long = TRUE, 
      exclude1 = FALSE, prmsd = FALSE, file = "", where = "!h", size = "scriptsize", digits = 2, 
      what = "%", landscape = FALSE, longtable = TRUE, booktabs = TRUE, prtest= c('P', 'stat'),
      lines.page = (.Machine$integer.max + 9)) 

如何确保卡方检验统计下的下标在 Latex longtable 输出中不显示为 NA

查看 class summaryM (Hmisc:::latex.summaryM) 对象的 latex 方法,我看到它有一个参数

prtest = c("P", "stat", "df", "name")

这表明省略 "df",即

latex(f, prtest = c("P","stat","name"))

应该可以。 (更雄心勃勃的是破解 latex.summaryM 以测试 is.na(df) 并在 TRUE 时跳过它,但我的解决方案可能就足够了。)