假阳性与假阴性权衡图
False positive vs. false negative trade off plot
我正在进行决策分析,我试图使用 R 来说明假阳性(假通过)与假阴性(假不通过)之间的权衡。我创建了一个密度图具有零假设曲线和替代假设曲线,但想通过这些示例图进一步说明这种关系。感谢您帮助创建示例 1 和示例 2 等图,尤其是示例 1。谢谢!
密度图
示例 1
示例 2
看来您的症结在于如何在给定的 x 或 y 值处访问密度曲线的值。
您可以使用 ggplot_build()
访问由 geom_density
函数构造的底层 data.frame
。 Here is some further discussion.
我使用了两个偏移正态分布,前提是你可以想象任何曲线。
library(ggplot2)
xs <- seq(-2, 4, length.out = 201)
dat <- do.call(rbind,
list(data.frame(x=xs, y=dnorm(xs), id="1"),
data.frame(x=xs, y=dnorm(xs, 2), id="2")))
地块 1
vline <- 1
eps <- 1e-3
ggplot(dat, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "1" & x > (vline+eps)) | (id == "2" & x < (vline-eps)))) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "Hazard Ratio", y = NULL) +
guides(color = "none", fill = "none") +
theme_classic() +
theme(
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
这里的vline
是分界线,不在路口的话还是有用的。例如,
vline <- 1.2
地块 2
rng <- c(0.75, 0.85)
rngdat <- do.call(rbind,
by(dat, dat$id, function(z) with(z, data.frame(approx(x, y, xout = rng), id = id[1]))))
rngdat$otherx <- fifelse(rngdat$id == "1", Inf, -Inf)
ggplot(dat, aes(x, y, group = id, color = id)) +
geom_line(na.rm = TRUE) +
geom_segment(aes(xend = x, yend = 0),
data = subset(rngdat, id == 1),
color = "black", linetype = "dashed") +
geom_segment(aes(xend = otherx, yend = y),
data = rngdat, linetype = "dashed") +
coord_cartesian(xlim = c(0, 2)) +
scale_x_continuous(name = "HR gate") +
scale_y_continuous(
name = "False Go Probability",
sec.axis = sec_axis(~ ., name = "False No-Go Probability")) +
scale_color_manual(values = c("1" = "blue", "2" = "red")) +
guides(color = "none") +
theme_classic() +
theme(
axis.line.y.left = element_line(color = "red"),
axis.line.y.right = element_line(color = "blue")
)
情节 3
offset <- max(rngdat$y[rngdat$id == "1"]) + 0.1
cutoff <- 0
dat <- transform(
dat,
yoff = ifelse(id == "1", 0.05 + offset, 0),
cat = ifelse(id == "1",
ifelse(x < cutoff, "True Positive", "False Negative"),
ifelse(x < cutoff, "False Positive", "True Negative")))
ggplot(dat, aes(x, y = y + yoff)) +
geom_ribbon(aes(ymin = yoff, ymax = y + yoff,
group = cat, fill = cat, alpha = cat),
na.rm = TRUE) +
geom_vline(xintercept = cutoff) +
scale_fill_manual(
name = NULL,
values = c("True Positive" = "red", "False Negative" = "red",
"False Positive" = "blue", "True Negative" = "blue")) +
scale_alpha_manual(
name = NULL,
values = c("True Positive" = 1, "False Negative" = 0.2,
"False Positive" = 0.2, "True Negative" = 1)) +
labs(x = NULL, y = NULL) +
theme(
legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
我正在进行决策分析,我试图使用 R 来说明假阳性(假通过)与假阴性(假不通过)之间的权衡。我创建了一个密度图具有零假设曲线和替代假设曲线,但想通过这些示例图进一步说明这种关系。感谢您帮助创建示例 1 和示例 2 等图,尤其是示例 1。谢谢!
密度图
示例 1
示例 2
看来您的症结在于如何在给定的 x 或 y 值处访问密度曲线的值。
您可以使用 ggplot_build()
访问由 geom_density
函数构造的底层 data.frame
。 Here is some further discussion.
我使用了两个偏移正态分布,前提是你可以想象任何曲线。
library(ggplot2)
xs <- seq(-2, 4, length.out = 201)
dat <- do.call(rbind,
list(data.frame(x=xs, y=dnorm(xs), id="1"),
data.frame(x=xs, y=dnorm(xs, 2), id="2")))
地块 1
vline <- 1
eps <- 1e-3
ggplot(dat, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "1" & x > (vline+eps)) | (id == "2" & x < (vline-eps)))) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "Hazard Ratio", y = NULL) +
guides(color = "none", fill = "none") +
theme_classic() +
theme(
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
这里的vline
是分界线,不在路口的话还是有用的。例如,
vline <- 1.2
地块 2
rng <- c(0.75, 0.85)
rngdat <- do.call(rbind,
by(dat, dat$id, function(z) with(z, data.frame(approx(x, y, xout = rng), id = id[1]))))
rngdat$otherx <- fifelse(rngdat$id == "1", Inf, -Inf)
ggplot(dat, aes(x, y, group = id, color = id)) +
geom_line(na.rm = TRUE) +
geom_segment(aes(xend = x, yend = 0),
data = subset(rngdat, id == 1),
color = "black", linetype = "dashed") +
geom_segment(aes(xend = otherx, yend = y),
data = rngdat, linetype = "dashed") +
coord_cartesian(xlim = c(0, 2)) +
scale_x_continuous(name = "HR gate") +
scale_y_continuous(
name = "False Go Probability",
sec.axis = sec_axis(~ ., name = "False No-Go Probability")) +
scale_color_manual(values = c("1" = "blue", "2" = "red")) +
guides(color = "none") +
theme_classic() +
theme(
axis.line.y.left = element_line(color = "red"),
axis.line.y.right = element_line(color = "blue")
)
情节 3
offset <- max(rngdat$y[rngdat$id == "1"]) + 0.1
cutoff <- 0
dat <- transform(
dat,
yoff = ifelse(id == "1", 0.05 + offset, 0),
cat = ifelse(id == "1",
ifelse(x < cutoff, "True Positive", "False Negative"),
ifelse(x < cutoff, "False Positive", "True Negative")))
ggplot(dat, aes(x, y = y + yoff)) +
geom_ribbon(aes(ymin = yoff, ymax = y + yoff,
group = cat, fill = cat, alpha = cat),
na.rm = TRUE) +
geom_vline(xintercept = cutoff) +
scale_fill_manual(
name = NULL,
values = c("True Positive" = "red", "False Negative" = "red",
"False Positive" = "blue", "True Negative" = "blue")) +
scale_alpha_manual(
name = NULL,
values = c("True Positive" = 1, "False Negative" = 0.2,
"False Positive" = 0.2, "True Negative" = 1)) +
labs(x = NULL, y = NULL) +
theme(
legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)