在 ggpmisc 和 ggplot 中使用 stat_fit_tb() 时编辑表格显示的*行*名称

Editing *row* names of tabular display when using stat_fit_tb() in ggpmisc & ggplot

虽然 stat_poly_eq() 允许使用 eq.with.lhseq.x.rhs 更改变量名称,但根据我的阅读,stat_fit_tb() 中似乎没有类似的功能ggpmisc 文档。

有没有一种方法可以修改以下示例中的 plt 对象以强制 table 显示显示更容易理解且与方程和轴标签更一致的参数名称?

## dummy data
set.seed(1)
df <- data.frame(month = c(1:60))
df$observed <- 2.5 + 0.05*df$month + rnorm(60, sd = 1)
## min plot example
my.formula <- y ~ poly(x,2,raw=TRUE) ## formula with generic variable names

plt <- ggplot(df, aes(x=month, y=observed)) +
  geom_point() +
  ## show fit and CI
  geom_smooth(method = "lm", se=TRUE, level=0.95, formula = my.formula) +
  ## display equation with useful variable names (i.e. not x and y)
  stat_poly_eq(eq.with.lhs = "italic(Obs)~`=`~",
               eq.x.rhs = ".month",
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE,
               formula = my.formula, label.y = 0.9) +
  ## show table of each coefficient's p-value
  stat_fit_tb(method.args = list(formula = my.formula),
              tb.vars = c(parameter = "term", ## can change column headings
                          coeff = "estimate", 
                          "p-val" = "p.value"),
              label.y = 0.8, label.x = "left")

plt

这可能在将 plt 转换为 grob 对象后被破解,但现在我喜欢一次解决问题并完成它,所以我改用底层的 ggproto 对象。

运行如下代码(与原文有不同之处在注释中注明):

library(ggpmisc)

StatFitTb2 <- ggproto(
  "StatFitTb2",
  StatFitTb,
  compute_panel = function (data, scales, method, method.args, tb.type, tb.vars, 
                            tb.row.names, digits, npc.used = TRUE, label.x, label.y) {
    force(data)
    if (length(unique(data$x)) < 2) {
      return(data.frame())
    }
    panel.idx <- as.integer(as.character(data$PANEL[1]))
    if (length(label.x) >= panel.idx) {
      label.x <- label.x[panel.idx]
    }
    else if (length(label.x) > 0) {
      label.x <- label.x[1]
    }
    if (length(label.y) >= panel.idx) {
      label.y <- label.y[panel.idx]
    }
    else if (length(label.y) > 0) {
      label.y <- label.y[1]
    }
    method.args <- c(method.args, list(data = quote(data)))
    if (is.character(method)) 
      method <- match.fun(method)
    mf <- do.call(method, method.args)
    if (tolower(tb.type) %in% c("fit.anova", "anova")) {
      mf_tb <- broom::tidy(stats::anova(mf))
    }
    else if (tolower(tb.type) %in% c("fit.summary", "summary")) {
      mf_tb <- broom::tidy(mf)
    }
    else if (tolower(tb.type) %in% c("fit.coefs", "coefs")) {
      mf_tb <- broom::tidy(mf)[c("term", "estimate")]
    }
    num.cols <- sapply(mf_tb, is.numeric)
    mf_tb[num.cols] <- signif(mf_tb[num.cols], digits = digits)
    if (!is.null(tb.vars)) {
      mf_tb <- dplyr::select(mf_tb, !!tb.vars)
    }
    
    # new condition for modifying row names, if they are specified
    if(!is.null(tb.row.names)) {
      mf_tb[, 1] <- tb.row.names
    }
    
    z <- tibble::tibble(mf_tb = list(mf_tb))
    if (npc.used) {
      margin.npc <- 0.05
    }
    else {
      margin.npc <- 0
    }
    if (is.character(label.x)) {
      label.x <- switch(label.x, right = (1 - margin.npc), 
                        center = 0.5, centre = 0.5, 
                        middle = 0.5, left = (0 + margin.npc))
      if (!npc.used) {
        x.delta <- abs(diff(range(data$x)))
        x.min <- min(data$x)
        label.x <- label.x * x.delta + x.min
      }
    }
    if (is.character(label.y)) {
      label.y <- switch(label.y, top = (1 - margin.npc), center = 0.5, 
                        centre = 0.5, middle = 0.5, bottom = (0 + margin.npc))
      if (!npc.used) {
        y.delta <- abs(diff(range(data$y)))
        y.min <- min(data$y)
        label.y <- label.y * y.delta + y.min
      }
    }
    if (npc.used) {
      z$npcx <- label.x
      z$x <- NA_real_
      z$npcy <- label.y
      z$y <- NA_real_
    }
    else {
      z$x <- label.x
      z$npcx <- NA_real_
      z$y <- label.y
      z$npcy <- NA_real_
    }
    z
  })

stat_fit_tb2 <- function(mapping = NULL, data = NULL, geom = "table_npc",
                         method = "lm", method.args = list(formula = y ~ x), 
                         tb.type = "fit.summary", tb.vars = NULL, digits = 3, 
                         tb.row.names = NULL, # new parameter for row names (defaults to NULL)
                         label.x = "center", label.y = "top", label.x.npc = NULL, 
                         label.y.npc = NULL, position = "identity", table.theme = NULL, 
                         table.rownames = FALSE, table.colnames = TRUE, table.hjust = 1, 
                         parse = FALSE, na.rm = FALSE, show.legend = FALSE, inherit.aes = TRUE, 
                         ...) {
  if (!is.null(label.x.npc)) {
    stopifnot(grepl("_npc", geom))
    label.x <- label.x.npc
  }
  if (!is.null(label.y.npc)) {
    stopifnot(grepl("_npc", geom))
    label.y <- label.y.npc
  }
  ggplot2::layer(stat = StatFitTb2, # reference modified StatFitTb2 instead of the original
                 data = data, mapping = mapping, 
                 geom = geom, position = position, show.legend = show.legend, 
                 inherit.aes = inherit.aes, 
                 params = list(method = method, method.args = method.args, 
                               tb.type = tb.type, tb.vars = tb.vars, 
                               tb.row.names = tb.row.names, # new parameter here
                               digits = digits, label.x = label.x, label.y = label.y, 
                               npc.used = grepl("_npc", geom), table.theme = table.theme, 
                               table.rownames = table.rownames, table.colnames = table.colnames, 
                               table.hjust = table.hjust, parse = parse, na.rm = na.rm, 
                               ...))
}

用法:

ggplot(df, aes(x=month, y=observed)) +
  geom_point() +
  ## show fit and CI
  geom_smooth(method = "lm", se=TRUE, level=0.95, formula = my.formula) +
  ## display equation with useful variable names (i.e. not x and y)
  stat_poly_eq(eq.with.lhs = "italic(Obs)~`=`~",
               eq.x.rhs = ".month",
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE,
               formula = my.formula, label.y = 0.9) +
  ## show table of each coefficient's p-value
  stat_fit_tb2(method.args = list(formula = my.formula),
              tb.vars = c(parameter = "term", ## can change column headings
                          coeff = "estimate", 
                          "p-val" = "p.value"),
              tb.row.names = c("(Intercept)", "month", "month^2"),
              label.y = 0.8, label.x = "left", parse = TRUE)

注意:parse = TRUE 使 month^2 行名称看起来更好,但它也会影响 table 中的所有其他值(例如 p-value 的破折号变成减号,数字四舍五入到不同的位数等)

注意: 如果您仍在使用 'ggpmisc' (<= 0.3.6),此答案可能会有用。否则,请使用 'ggpmisc' (>= 0.3.7).

查看单独的答案

在将其内置到包中之前,一个相当简单的技巧是在 aes() 中即时编辑小标题。我先定义一个函数,以免代码混乱。

library(ggpmisc)

## dummy data
set.seed(1)
df <- data.frame(month = c(1:60))
df$observed <- 2.5 + 0.05*df$month + rnorm(60, sd = 1)

## min plot example
my.formula <- y ~ poly(x,2,raw=TRUE) ## formula with generic variable names

## define function for renaming parameters in tibble(s) returned by the stat
## walk through the list an operate on all the tibbles found so that
## grouping and facets are also supported.
set_param_names <- function(x, names) {
  for (i in seq_along(x)) {
   x[[i]][[1]] <- names
  }
  x
}

plt <- ggplot(df, aes(x=month, y=observed)) +
  geom_point() +
  ## show fit and CI
  geom_smooth(method = "lm", se=TRUE, level=0.95, formula = my.formula) +
  ## display equation with useful variable names (i.e. not x and y)
  stat_poly_eq(eq.with.lhs = "italic(Obs)~`=`~",
               eq.x.rhs = ".month",
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), 
               parse = TRUE,
               formula = my.formula, label.y = 0.9) +
  ## show table of each coefficient's p-value
  stat_fit_tb(method.args = list(formula = my.formula),
              tb.vars = c(parameter = "term", ## can change column headings
                          coeff = "estimate", 
                          "p-val" = "p.value"),
              label.y = 0.8, label.x = "left",
              aes(label = set_param_names(stat(mf_tb), 
                                          c("intercept", "month", "month^2"))),
              parse = TRUE)

plt

给出:

更新的 'ggpmisc' (>= 0.3.7) 使这个答案成为可能,在我看来应该是首选。

## ggpmisc (>= 0.3.7)
library(ggpmisc)

## dummy data
set.seed(1)
df <- data.frame(month = c(1:60))
df$observed <- 2.5 + 0.05*df$month + rnorm(60, sd = 1)

## min plot example
my.formula <- y ~ poly(x,2,raw=TRUE) ## formula with generic variable names

plt <- ggplot(df, aes(x=month, y=observed)) +
  geom_point() +
  ## show fit and CI
  geom_smooth(method = "lm", se=TRUE, level=0.95, formula = my.formula) +
  ## display equation with useful variable names (i.e. not x and y)
  stat_poly_eq(eq.with.lhs = "italic(Obs)~`=`~",
               eq.x.rhs = '" month"',
               aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
               parse = TRUE,
               formula = my.formula, label.y = 0.9) +
  ## show table of each coefficient's p-value
  stat_fit_tb(method.args = list(formula = my.formula),
              tb.vars = c(parameter = "term", ## can change column headings
                          coeff = "estimate",
                          "p-val" = "p.value"),
              tb.params = c(1, month = 2, "month^2" = 3), ##
              label.y = 0.8, label.x = "left",
              parse = TRUE)

plt

给出以下剧情。 (我确实也将参数更改为 eq.x.rhs 虽然不是问题的直接部分。P-values 的更好格式在 [=21 的新版本中实现=]包。)