在 R 中标记循环图

Question

我正在使用一个循环来绘制直方图，一次按 column_a 的不同值分组，效果非常好。这是代码：

par(ask=F)

for (i in unique(Data$column_a)) {
  dat <- Data[Data$column_a== i, ]
  plotdist(dat$count,histo = TRUE, demp = TRUE, discrete = T,
           pch = 16, col = "dodgerblue1") 
}

唯一的问题是我无法根据 column_a 值标记每个数字以将这些数字与另一个数字区分开来。

在此先感谢您的帮助。

我的数据包含列名称为“计数”的损失数，在 column_a(R,I,F)) 中有 3 个不同的值。我想绘制这三个值的损失数量直方图。

Answer 1

有点 hacky 的解决方案是改变函数本身。

下面是 alteret 函数，它不包括 title 参数（并且仅适用于您在问题中的配置！）

plotdist_alt <- function (data, distr, para, histo = TRUE, breaks = "default", 
                      demp = FALSE, discrete, title = "default", ...) 
{
  def.par <- par(no.readonly = TRUE)
  if (missing(data) || !is.vector(data, mode = "numeric")) 
    stop("data must be a numeric vector")
  if ((missing(distr) & !missing(para)) || (missing(distr) & 
                                            !missing(para))) 
    stop("distr and para must defined")
  if (!histo & !demp) 
    stop("one the arguments histo and demp must be put to TRUE")
  xlim <- c(min(data), max(data))
  s <- sort(data)
  n <- length(data)
  if (missing(distr)) {
    par(mfrow = c(1, 2))
    if (missing(discrete)) 
      discrete <- FALSE
    if (!discrete) {
      obsp <- ppoints(s)
      if (histo) {
        if (demp) {
          if (breaks == "default") 
            h <- hist(data, freq = FALSE, xlab = "Data", 
                      main = "Empirical density", ...)
          else h <- hist(data, freq = FALSE, xlab = "Data", 
                         main = "Empirical density", breaks = breaks, 
                         ...)
          lines(density(data)$x, density(data)$y, lty = 2, 
                col = "black")
        }
        else {
          if (breaks == "default") 
            h <- hist(data, freq = FALSE, xlab = "Data", 
                      main = "Histogram", ...)
          else h <- hist(data, freq = FALSE, xlab = "Data", 
                         main = "Histogram", breaks = breaks, 
                         ...)
        }
      }
      else {
        h <- hist(data, freq = FALSE, xlab = "Data", 
                  main = "Histogram", plot = FALSE, ...)
        plot(density(data)$x, density(data)$y, lty = 1, 
             col = "black", type = "l", xlab = "Data", 
             main = paste("Empirical density"), ylab = "Density", 
             ...)
      }
      plot(s, obsp, main = paste("Cumulative distribution"), 
           xlab = "Data", xlim = c(h$breaks[1], h$breaks[length(h$breaks)]), 
           ylab = "CDF", ...)
    }
    else {
      if (breaks != "default") 
        warning("Breaks are\tnot taken into account for discrete data")
      t <- table(data)
      xval <- as.numeric(names(t))
      ydobs <- as.vector(t)/n
      ydmax <- max(ydobs)
      plot(xval, ydobs, type = "h", xlim = xlim, 
           ylim = c(0, ydmax), main = paste0("Empirical distribution ", title), 
           xlab = "Data", ylab = "Density", 
           ...)
      ycdfobs <- cumsum(ydobs)
      plot(xval, ycdfobs, type = "p", xlim = xlim, 
           ylim = c(0, 1), main = paste0("Empirical CDFs ", title), 
           xlab = "Data", ylab = "CDF", ...)
    }
  }
  else {
    if (!is.character(distr)) 
      distname <- substring(as.character(match.call()$distr), 
                            2)
    else distname <- distr
    if (!is.list(para)) 
      stop("'para' must be a named list")
    ddistname <- paste("d", distname, sep = "")
    if (!exists(ddistname, mode = "function")) 
      stop(paste("The ", ddistname, " function must be defined"))
    pdistname <- paste("p", distname, sep = "")
    if (!exists(pdistname, mode = "function")) 
      stop(paste("The ", pdistname, " function must be defined"))
    qdistname <- paste("q", distname, sep = "")
    if (!exists(qdistname, mode = "function")) 
      stop(paste("The ", qdistname, " function must be defined"))
    densfun <- get(ddistname, mode = "function")
    nm <- names(para)
    f <- formals(densfun)
    args <- names(f)
    m <- match(nm, args)
    if (any(is.na(m))) 
      stop(paste("'para' specifies names which are not arguments to ", 
                 ddistname))
    if (missing(discrete)) {
      if (is.element(distname, c("binom", "nbinom", 
                                 "geom", "hyper", "pois"))) 
        discrete <- TRUE
      else discrete <- FALSE
    }
    if (!discrete) {
      par(mfrow = c(2, 2))
      obsp <- ppoints(s)
      if (breaks == "default") 
        h <- hist(data, plot = FALSE)
      else h <- hist(data, breaks = breaks, plot = FALSE, 
                     ...)
      xhist <- seq(min(h$breaks), max(h$breaks), length = 1000)
      yhist <- do.call(ddistname, c(list(xhist), as.list(para)))
      if (length(yhist) != length(xhist)) 
        stop("problem when computing densities.")
      ymax <- ifelse(is.finite(max(yhist)), max(max(h$density), 
                                                max(yhist)), max(h$density))
      if (histo) {
        hist(data, freq = FALSE, xlab = "Data", 
             ylim = c(0, ymax), breaks = h$breaks, main = paste("Empirical and theoretical dens."), 
             ...)
        if (demp) {
          lines(density(data)$x, density(data)$y, lty = 2, 
                col = "black")
        }
      }
      else plot(density(data)$x, density(data)$y, lty = 2, 
                col = "black", type = "l", xlab = "Data", 
                main = paste("Empirical and theoretical dens."), 
                ylab = "Density", xlim = c(min(h$breaks), 
                                           max(h$breaks)), ...)
      if (demp) 
        legend("topright", bty = "n", lty = c(2, 
                                              1), col = c("black", "red"), legend = c("empirical", 
                                                                                      "theoretical"), bg = "white", cex = 0.7)
      lines(xhist, yhist, lty = 1, col = "red")
      theoq <- do.call(qdistname, c(list(obsp), as.list(para)))
      if (length(theoq) != length(obsp)) 
        stop("problem when computing quantities.")
      plot(theoq, s, main = " Q-Q plot", xlab = "Theoretical quantiles", 
           ylab = "Empirical quantiles", ...)
      abline(0, 1)
      xmin <- h$breaks[1]
      xmax <- h$breaks[length(h$breaks)]
      if (length(s) != length(obsp)) 
        stop("problem when computing probabilities.")
      plot(s, obsp, main = paste("Empirical and theoretical CDFs"), 
           xlab = "Data", ylab = "CDF", xlim = c(xmin, 
                                                 xmax), ...)
      sfin <- seq(xmin, xmax, by = (xmax - xmin)/100)
      theopfin <- do.call(pdistname, c(list(sfin), as.list(para)))
      lines(sfin, theopfin, lty = 1, col = "red")
      theop <- do.call(pdistname, c(list(s), as.list(para)))
      if (length(theop) != length(obsp)) 
        stop("problem when computing probabilities.")
      plot(theop, obsp, main = "P-P plot", xlab = "Theoretical probabilities", 
           ylab = "Empirical probabilities", ...)
      abline(0, 1)
    }
    else {
      par(mfrow = c(1, 2))
      if (breaks != "default") 
        warning("Breaks are not taken into account for discrete distributions")
      t <- table(data)
      xval <- as.numeric(names(t))
      xvalfin <- seq(min(xval), max(xval), by = 1)
      xlinesdec <- min((max(xval) - min(xval))/30, 0.4)
      yd <- do.call(ddistname, c(list(xvalfin), as.list(para)))
      if (length(yd) != length(xvalfin)) 
        stop("problem when computing density points.")
      ydobs <- as.vector(t)/n
      ydmax <- max(yd, ydobs)
      plot(xvalfin + xlinesdec, yd, type = "h", xlim = c(min(xval), 
                                                         max(xval) + xlinesdec), ylim = c(0, ydmax), lty = 1, 
           col = "red", main = "Emp. and theo. distr.", 
           xlab = "Data", ylab = "Density", 
           ...)
      points(xval, ydobs, type = "h", lty = 1, col = "black", 
             ...)
      legend("topright", lty = c(1, 1), col = c("black", 
                                                "red"), legend = c("empirical", paste("theoretical")), 
             bty = "o", bg = "white", cex = 0.6, 
             ...)
      ycdf <- do.call(pdistname, c(list(xvalfin), as.list(para)))
      if (length(ycdf) != length(xvalfin)) 
        stop("problem when computing probabilities.")
      plot(xvalfin, ycdf, type = "s", xlim = c(min(xval), 
                                               max(xval) + xlinesdec), ylim = c(0, 1), lty = 1, 
           col = "red", main = "Emp. and theo. CDFs", 
           xlab = "Data", ylab = "CDF", ...)
      ycdfobs <- cumsum(ydobs)
      points(xval, ycdfobs, type = "p", col = "black", 
             ...)
      legend("bottomright", lty = c(1, 1), col = c("black", 
                                                   "red"), legend = c("empirical", paste("theoretical")), 
             bty = "o", bg = "white", cex = 0.6, 
             ...)
    }
  }
  par(def.par)
  invisible()
}

现在要为您的情节添加标题，只需使用：

par(ask=F)

for (i in unique(Data$column_a)) {
  dat <- Data[Data$column_a== i, ]
  plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
           pch = 16, col = "dodgerblue1", title = i) 
}

编辑：添加了虚拟数据以测试提供的循环。

df <- data.frame(column_a = rep(c("a", "b"), each = 50),
                 count = sample(1:1000, 100, replace = T))
par(ask=F)

for (i in unique(df$column_a)) {
  dat <- df[df$column_a== i, ]
  plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
               pch = 16, col = "dodgerblue1", title = i) 
}

在 R 中标记循环图

Label a looped plot in R

plot

r