在 R 中标记循环图
Label a looped plot in R
我正在使用一个循环来绘制直方图,一次按 column_a
的不同值分组,效果非常好。这是代码:
par(ask=F)
for (i in unique(Data$column_a)) {
dat <- Data[Data$column_a== i, ]
plotdist(dat$count,histo = TRUE, demp = TRUE, discrete = T,
pch = 16, col = "dodgerblue1")
}
唯一的问题是我无法根据 column_a
值标记每个数字以将这些数字与另一个数字区分开来。
在此先感谢您的帮助。
我的数据包含列名称为“计数”的损失数,在 column_a(R,I,F)) 中有 3 个不同的值。我想绘制这三个值的损失数量直方图。
有点 hacky 的解决方案是改变函数本身。
下面是 alteret 函数,它不包括 title
参数(并且仅适用于您在问题中的配置!)
plotdist_alt <- function (data, distr, para, histo = TRUE, breaks = "default",
demp = FALSE, discrete, title = "default", ...)
{
def.par <- par(no.readonly = TRUE)
if (missing(data) || !is.vector(data, mode = "numeric"))
stop("data must be a numeric vector")
if ((missing(distr) & !missing(para)) || (missing(distr) &
!missing(para)))
stop("distr and para must defined")
if (!histo & !demp)
stop("one the arguments histo and demp must be put to TRUE")
xlim <- c(min(data), max(data))
s <- sort(data)
n <- length(data)
if (missing(distr)) {
par(mfrow = c(1, 2))
if (missing(discrete))
discrete <- FALSE
if (!discrete) {
obsp <- ppoints(s)
if (histo) {
if (demp) {
if (breaks == "default")
h <- hist(data, freq = FALSE, xlab = "Data",
main = "Empirical density", ...)
else h <- hist(data, freq = FALSE, xlab = "Data",
main = "Empirical density", breaks = breaks,
...)
lines(density(data)$x, density(data)$y, lty = 2,
col = "black")
}
else {
if (breaks == "default")
h <- hist(data, freq = FALSE, xlab = "Data",
main = "Histogram", ...)
else h <- hist(data, freq = FALSE, xlab = "Data",
main = "Histogram", breaks = breaks,
...)
}
}
else {
h <- hist(data, freq = FALSE, xlab = "Data",
main = "Histogram", plot = FALSE, ...)
plot(density(data)$x, density(data)$y, lty = 1,
col = "black", type = "l", xlab = "Data",
main = paste("Empirical density"), ylab = "Density",
...)
}
plot(s, obsp, main = paste("Cumulative distribution"),
xlab = "Data", xlim = c(h$breaks[1], h$breaks[length(h$breaks)]),
ylab = "CDF", ...)
}
else {
if (breaks != "default")
warning("Breaks are\tnot taken into account for discrete data")
t <- table(data)
xval <- as.numeric(names(t))
ydobs <- as.vector(t)/n
ydmax <- max(ydobs)
plot(xval, ydobs, type = "h", xlim = xlim,
ylim = c(0, ydmax), main = paste0("Empirical distribution ", title),
xlab = "Data", ylab = "Density",
...)
ycdfobs <- cumsum(ydobs)
plot(xval, ycdfobs, type = "p", xlim = xlim,
ylim = c(0, 1), main = paste0("Empirical CDFs ", title),
xlab = "Data", ylab = "CDF", ...)
}
}
else {
if (!is.character(distr))
distname <- substring(as.character(match.call()$distr),
2)
else distname <- distr
if (!is.list(para))
stop("'para' must be a named list")
ddistname <- paste("d", distname, sep = "")
if (!exists(ddistname, mode = "function"))
stop(paste("The ", ddistname, " function must be defined"))
pdistname <- paste("p", distname, sep = "")
if (!exists(pdistname, mode = "function"))
stop(paste("The ", pdistname, " function must be defined"))
qdistname <- paste("q", distname, sep = "")
if (!exists(qdistname, mode = "function"))
stop(paste("The ", qdistname, " function must be defined"))
densfun <- get(ddistname, mode = "function")
nm <- names(para)
f <- formals(densfun)
args <- names(f)
m <- match(nm, args)
if (any(is.na(m)))
stop(paste("'para' specifies names which are not arguments to ",
ddistname))
if (missing(discrete)) {
if (is.element(distname, c("binom", "nbinom",
"geom", "hyper", "pois")))
discrete <- TRUE
else discrete <- FALSE
}
if (!discrete) {
par(mfrow = c(2, 2))
obsp <- ppoints(s)
if (breaks == "default")
h <- hist(data, plot = FALSE)
else h <- hist(data, breaks = breaks, plot = FALSE,
...)
xhist <- seq(min(h$breaks), max(h$breaks), length = 1000)
yhist <- do.call(ddistname, c(list(xhist), as.list(para)))
if (length(yhist) != length(xhist))
stop("problem when computing densities.")
ymax <- ifelse(is.finite(max(yhist)), max(max(h$density),
max(yhist)), max(h$density))
if (histo) {
hist(data, freq = FALSE, xlab = "Data",
ylim = c(0, ymax), breaks = h$breaks, main = paste("Empirical and theoretical dens."),
...)
if (demp) {
lines(density(data)$x, density(data)$y, lty = 2,
col = "black")
}
}
else plot(density(data)$x, density(data)$y, lty = 2,
col = "black", type = "l", xlab = "Data",
main = paste("Empirical and theoretical dens."),
ylab = "Density", xlim = c(min(h$breaks),
max(h$breaks)), ...)
if (demp)
legend("topright", bty = "n", lty = c(2,
1), col = c("black", "red"), legend = c("empirical",
"theoretical"), bg = "white", cex = 0.7)
lines(xhist, yhist, lty = 1, col = "red")
theoq <- do.call(qdistname, c(list(obsp), as.list(para)))
if (length(theoq) != length(obsp))
stop("problem when computing quantities.")
plot(theoq, s, main = " Q-Q plot", xlab = "Theoretical quantiles",
ylab = "Empirical quantiles", ...)
abline(0, 1)
xmin <- h$breaks[1]
xmax <- h$breaks[length(h$breaks)]
if (length(s) != length(obsp))
stop("problem when computing probabilities.")
plot(s, obsp, main = paste("Empirical and theoretical CDFs"),
xlab = "Data", ylab = "CDF", xlim = c(xmin,
xmax), ...)
sfin <- seq(xmin, xmax, by = (xmax - xmin)/100)
theopfin <- do.call(pdistname, c(list(sfin), as.list(para)))
lines(sfin, theopfin, lty = 1, col = "red")
theop <- do.call(pdistname, c(list(s), as.list(para)))
if (length(theop) != length(obsp))
stop("problem when computing probabilities.")
plot(theop, obsp, main = "P-P plot", xlab = "Theoretical probabilities",
ylab = "Empirical probabilities", ...)
abline(0, 1)
}
else {
par(mfrow = c(1, 2))
if (breaks != "default")
warning("Breaks are not taken into account for discrete distributions")
t <- table(data)
xval <- as.numeric(names(t))
xvalfin <- seq(min(xval), max(xval), by = 1)
xlinesdec <- min((max(xval) - min(xval))/30, 0.4)
yd <- do.call(ddistname, c(list(xvalfin), as.list(para)))
if (length(yd) != length(xvalfin))
stop("problem when computing density points.")
ydobs <- as.vector(t)/n
ydmax <- max(yd, ydobs)
plot(xvalfin + xlinesdec, yd, type = "h", xlim = c(min(xval),
max(xval) + xlinesdec), ylim = c(0, ydmax), lty = 1,
col = "red", main = "Emp. and theo. distr.",
xlab = "Data", ylab = "Density",
...)
points(xval, ydobs, type = "h", lty = 1, col = "black",
...)
legend("topright", lty = c(1, 1), col = c("black",
"red"), legend = c("empirical", paste("theoretical")),
bty = "o", bg = "white", cex = 0.6,
...)
ycdf <- do.call(pdistname, c(list(xvalfin), as.list(para)))
if (length(ycdf) != length(xvalfin))
stop("problem when computing probabilities.")
plot(xvalfin, ycdf, type = "s", xlim = c(min(xval),
max(xval) + xlinesdec), ylim = c(0, 1), lty = 1,
col = "red", main = "Emp. and theo. CDFs",
xlab = "Data", ylab = "CDF", ...)
ycdfobs <- cumsum(ydobs)
points(xval, ycdfobs, type = "p", col = "black",
...)
legend("bottomright", lty = c(1, 1), col = c("black",
"red"), legend = c("empirical", paste("theoretical")),
bty = "o", bg = "white", cex = 0.6,
...)
}
}
par(def.par)
invisible()
}
现在要为您的情节添加标题,只需使用:
par(ask=F)
for (i in unique(Data$column_a)) {
dat <- Data[Data$column_a== i, ]
plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
pch = 16, col = "dodgerblue1", title = i)
}
编辑:添加了虚拟数据以测试提供的循环。
df <- data.frame(column_a = rep(c("a", "b"), each = 50),
count = sample(1:1000, 100, replace = T))
par(ask=F)
for (i in unique(df$column_a)) {
dat <- df[df$column_a== i, ]
plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
pch = 16, col = "dodgerblue1", title = i)
}
我正在使用一个循环来绘制直方图,一次按 column_a
的不同值分组,效果非常好。这是代码:
par(ask=F)
for (i in unique(Data$column_a)) {
dat <- Data[Data$column_a== i, ]
plotdist(dat$count,histo = TRUE, demp = TRUE, discrete = T,
pch = 16, col = "dodgerblue1")
}
唯一的问题是我无法根据 column_a
值标记每个数字以将这些数字与另一个数字区分开来。
在此先感谢您的帮助。
我的数据包含列名称为“计数”的损失数,在 column_a(R,I,F)) 中有 3 个不同的值。我想绘制这三个值的损失数量直方图。
有点 hacky 的解决方案是改变函数本身。
下面是 alteret 函数,它不包括 title
参数(并且仅适用于您在问题中的配置!)
plotdist_alt <- function (data, distr, para, histo = TRUE, breaks = "default",
demp = FALSE, discrete, title = "default", ...)
{
def.par <- par(no.readonly = TRUE)
if (missing(data) || !is.vector(data, mode = "numeric"))
stop("data must be a numeric vector")
if ((missing(distr) & !missing(para)) || (missing(distr) &
!missing(para)))
stop("distr and para must defined")
if (!histo & !demp)
stop("one the arguments histo and demp must be put to TRUE")
xlim <- c(min(data), max(data))
s <- sort(data)
n <- length(data)
if (missing(distr)) {
par(mfrow = c(1, 2))
if (missing(discrete))
discrete <- FALSE
if (!discrete) {
obsp <- ppoints(s)
if (histo) {
if (demp) {
if (breaks == "default")
h <- hist(data, freq = FALSE, xlab = "Data",
main = "Empirical density", ...)
else h <- hist(data, freq = FALSE, xlab = "Data",
main = "Empirical density", breaks = breaks,
...)
lines(density(data)$x, density(data)$y, lty = 2,
col = "black")
}
else {
if (breaks == "default")
h <- hist(data, freq = FALSE, xlab = "Data",
main = "Histogram", ...)
else h <- hist(data, freq = FALSE, xlab = "Data",
main = "Histogram", breaks = breaks,
...)
}
}
else {
h <- hist(data, freq = FALSE, xlab = "Data",
main = "Histogram", plot = FALSE, ...)
plot(density(data)$x, density(data)$y, lty = 1,
col = "black", type = "l", xlab = "Data",
main = paste("Empirical density"), ylab = "Density",
...)
}
plot(s, obsp, main = paste("Cumulative distribution"),
xlab = "Data", xlim = c(h$breaks[1], h$breaks[length(h$breaks)]),
ylab = "CDF", ...)
}
else {
if (breaks != "default")
warning("Breaks are\tnot taken into account for discrete data")
t <- table(data)
xval <- as.numeric(names(t))
ydobs <- as.vector(t)/n
ydmax <- max(ydobs)
plot(xval, ydobs, type = "h", xlim = xlim,
ylim = c(0, ydmax), main = paste0("Empirical distribution ", title),
xlab = "Data", ylab = "Density",
...)
ycdfobs <- cumsum(ydobs)
plot(xval, ycdfobs, type = "p", xlim = xlim,
ylim = c(0, 1), main = paste0("Empirical CDFs ", title),
xlab = "Data", ylab = "CDF", ...)
}
}
else {
if (!is.character(distr))
distname <- substring(as.character(match.call()$distr),
2)
else distname <- distr
if (!is.list(para))
stop("'para' must be a named list")
ddistname <- paste("d", distname, sep = "")
if (!exists(ddistname, mode = "function"))
stop(paste("The ", ddistname, " function must be defined"))
pdistname <- paste("p", distname, sep = "")
if (!exists(pdistname, mode = "function"))
stop(paste("The ", pdistname, " function must be defined"))
qdistname <- paste("q", distname, sep = "")
if (!exists(qdistname, mode = "function"))
stop(paste("The ", qdistname, " function must be defined"))
densfun <- get(ddistname, mode = "function")
nm <- names(para)
f <- formals(densfun)
args <- names(f)
m <- match(nm, args)
if (any(is.na(m)))
stop(paste("'para' specifies names which are not arguments to ",
ddistname))
if (missing(discrete)) {
if (is.element(distname, c("binom", "nbinom",
"geom", "hyper", "pois")))
discrete <- TRUE
else discrete <- FALSE
}
if (!discrete) {
par(mfrow = c(2, 2))
obsp <- ppoints(s)
if (breaks == "default")
h <- hist(data, plot = FALSE)
else h <- hist(data, breaks = breaks, plot = FALSE,
...)
xhist <- seq(min(h$breaks), max(h$breaks), length = 1000)
yhist <- do.call(ddistname, c(list(xhist), as.list(para)))
if (length(yhist) != length(xhist))
stop("problem when computing densities.")
ymax <- ifelse(is.finite(max(yhist)), max(max(h$density),
max(yhist)), max(h$density))
if (histo) {
hist(data, freq = FALSE, xlab = "Data",
ylim = c(0, ymax), breaks = h$breaks, main = paste("Empirical and theoretical dens."),
...)
if (demp) {
lines(density(data)$x, density(data)$y, lty = 2,
col = "black")
}
}
else plot(density(data)$x, density(data)$y, lty = 2,
col = "black", type = "l", xlab = "Data",
main = paste("Empirical and theoretical dens."),
ylab = "Density", xlim = c(min(h$breaks),
max(h$breaks)), ...)
if (demp)
legend("topright", bty = "n", lty = c(2,
1), col = c("black", "red"), legend = c("empirical",
"theoretical"), bg = "white", cex = 0.7)
lines(xhist, yhist, lty = 1, col = "red")
theoq <- do.call(qdistname, c(list(obsp), as.list(para)))
if (length(theoq) != length(obsp))
stop("problem when computing quantities.")
plot(theoq, s, main = " Q-Q plot", xlab = "Theoretical quantiles",
ylab = "Empirical quantiles", ...)
abline(0, 1)
xmin <- h$breaks[1]
xmax <- h$breaks[length(h$breaks)]
if (length(s) != length(obsp))
stop("problem when computing probabilities.")
plot(s, obsp, main = paste("Empirical and theoretical CDFs"),
xlab = "Data", ylab = "CDF", xlim = c(xmin,
xmax), ...)
sfin <- seq(xmin, xmax, by = (xmax - xmin)/100)
theopfin <- do.call(pdistname, c(list(sfin), as.list(para)))
lines(sfin, theopfin, lty = 1, col = "red")
theop <- do.call(pdistname, c(list(s), as.list(para)))
if (length(theop) != length(obsp))
stop("problem when computing probabilities.")
plot(theop, obsp, main = "P-P plot", xlab = "Theoretical probabilities",
ylab = "Empirical probabilities", ...)
abline(0, 1)
}
else {
par(mfrow = c(1, 2))
if (breaks != "default")
warning("Breaks are not taken into account for discrete distributions")
t <- table(data)
xval <- as.numeric(names(t))
xvalfin <- seq(min(xval), max(xval), by = 1)
xlinesdec <- min((max(xval) - min(xval))/30, 0.4)
yd <- do.call(ddistname, c(list(xvalfin), as.list(para)))
if (length(yd) != length(xvalfin))
stop("problem when computing density points.")
ydobs <- as.vector(t)/n
ydmax <- max(yd, ydobs)
plot(xvalfin + xlinesdec, yd, type = "h", xlim = c(min(xval),
max(xval) + xlinesdec), ylim = c(0, ydmax), lty = 1,
col = "red", main = "Emp. and theo. distr.",
xlab = "Data", ylab = "Density",
...)
points(xval, ydobs, type = "h", lty = 1, col = "black",
...)
legend("topright", lty = c(1, 1), col = c("black",
"red"), legend = c("empirical", paste("theoretical")),
bty = "o", bg = "white", cex = 0.6,
...)
ycdf <- do.call(pdistname, c(list(xvalfin), as.list(para)))
if (length(ycdf) != length(xvalfin))
stop("problem when computing probabilities.")
plot(xvalfin, ycdf, type = "s", xlim = c(min(xval),
max(xval) + xlinesdec), ylim = c(0, 1), lty = 1,
col = "red", main = "Emp. and theo. CDFs",
xlab = "Data", ylab = "CDF", ...)
ycdfobs <- cumsum(ydobs)
points(xval, ycdfobs, type = "p", col = "black",
...)
legend("bottomright", lty = c(1, 1), col = c("black",
"red"), legend = c("empirical", paste("theoretical")),
bty = "o", bg = "white", cex = 0.6,
...)
}
}
par(def.par)
invisible()
}
现在要为您的情节添加标题,只需使用:
par(ask=F)
for (i in unique(Data$column_a)) {
dat <- Data[Data$column_a== i, ]
plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
pch = 16, col = "dodgerblue1", title = i)
}
编辑:添加了虚拟数据以测试提供的循环。
df <- data.frame(column_a = rep(c("a", "b"), each = 50),
count = sample(1:1000, 100, replace = T))
par(ask=F)
for (i in unique(df$column_a)) {
dat <- df[df$column_a== i, ]
plotdist_alt(dat$count,histo = TRUE, demp = TRUE, discrete = T,
pch = 16, col = "dodgerblue1", title = i)
}