ggplot2:每组的四分位数范围
ggplot2: interquartile range for each group
基于ggplot2,我想知道如何可视化每个组(类型)的四分位数范围,其中每个组的密度都在一个大图中,如下所示:
在当前图中,点代表对应于y最大值的x值,其中灰线的minimum/maximum值代表每组的2.5%和97.5%quantiles。
在每个图中,我有三个点,它们是三组中的最大值。
然而,我需要的是每个组只有一个点(一个 x 值),我从这里迷路了。
请帮帮我!
为了以防万一,我附上了可以重现该图的代码。
my_data <- data.frame(mean = c(0.04, 0.015, -0.04),
stdev = c(0.019, 0.019, 0.02),
Type = factor(c("A",
"B",
"C")))
# points at which to evaluate the Gaussian densities
x <- seq(-0.1, 0.1, by = 0.001)
# build list of Gaussian density vectors based on means and standard deviations
pdfs <- mapply(dnorm, mean = my_data$mean, sd = my_data$stdev, MoreArgs = list(x = x),
SIMPLIFY = FALSE)
# add group names
names(pdfs) <- my_data$Type
# convert list to dataframe
pdfs <- do.call(cbind.data.frame, pdfs)
# Summary stat
x.com <- data.frame(pdfs, x)
sum_stat <- matrix(NA, ncol = 7, nrow = 3)
for (i in 1:3){
sum_stat[i,1] <- min(x.com[,4][x.com[,i]==quantile(x.com[,i], probs=.025)])
sum_stat[i,2] <- quantile(x.com[,i], probs=.025)
sum_stat[i,3] <- x.com[,4][x.com[,i]==max(x.com[,i])]
sum_stat[i,4] <- max(x.com[,i])
sum_stat[i,5] <- max(x.com[,4][x.com[,i]==quantile(x.com[,i], probs=.975)])
sum_stat[i,6] <- quantile(x.com[,i], probs=.975)
}
sum_stat <- data.frame(sum_stat)
sum_stat[,7] <- colnames(x.com)[1:3]
sum_stat[,7] <- as.factor(sum_stat[,7])
colnames(sum_stat) <- c("left", "left_val", "max", "max_val", "right", "right_val", "variable")
sum_stat
# convert dataframe to tall format
# library(tidyr)
pdfs$x <- x
tall_df <- gather(pdfs, Type, density, -x)
tall_df$Type
tt <- transform(tall_df,
Type=factor(Type,levels=c("A",
"B",
"C")))
ggplot(tt, aes(x = x, y = density, fill = Type, color = Type)) +
geom_line() +
geom_point(inherit.aes = FALSE,
data = sum_stat,
aes(x = max, y = 0, alpha=0.4),
show.legend = FALSE) +
geom_errorbarh(inherit.aes = FALSE,
data = sum_stat,
aes(xmin = left, xmax = right, y = 0, alpha=0.4),
height = 0.1, show.legend = FALSE) +
facet_wrap(~ Type, ncol = 1) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())
您可以重命名 sum_stat
的列名以匹配 tt
:
的变量名称
colnames(sum_stat)[7] <- "Type"
然后,删除 inherit.aes = FALSE
并在 geom
中传递 aes
参数。就像当你调用 facet_grid
时,它会根据 Type
变量分离 sum_stat
:
ggplot(tt) +
geom_line(aes(x = x, y = density, fill = Type, color = Type)) +
geom_point(data = sum_stat,
aes(x = max, y = 0, alpha=0.4),
show.legend = FALSE, color = "black") +
geom_errorbarh(data = sum_stat,
aes(xmin = left, xmax = right, y = 0, alpha=0.4),
height = 0.1, show.legend = FALSE, color = "black") +
facet_wrap(~ Type, ncol = 1) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())
是您要找的吗?
基于ggplot2,我想知道如何可视化每个组(类型)的四分位数范围,其中每个组的密度都在一个大图中,如下所示:
在当前图中,点代表对应于y最大值的x值,其中灰线的minimum/maximum值代表每组的2.5%和97.5%quantiles。
在每个图中,我有三个点,它们是三组中的最大值。
然而,我需要的是每个组只有一个点(一个 x 值),我从这里迷路了。
请帮帮我!
为了以防万一,我附上了可以重现该图的代码。
my_data <- data.frame(mean = c(0.04, 0.015, -0.04),
stdev = c(0.019, 0.019, 0.02),
Type = factor(c("A",
"B",
"C")))
# points at which to evaluate the Gaussian densities
x <- seq(-0.1, 0.1, by = 0.001)
# build list of Gaussian density vectors based on means and standard deviations
pdfs <- mapply(dnorm, mean = my_data$mean, sd = my_data$stdev, MoreArgs = list(x = x),
SIMPLIFY = FALSE)
# add group names
names(pdfs) <- my_data$Type
# convert list to dataframe
pdfs <- do.call(cbind.data.frame, pdfs)
# Summary stat
x.com <- data.frame(pdfs, x)
sum_stat <- matrix(NA, ncol = 7, nrow = 3)
for (i in 1:3){
sum_stat[i,1] <- min(x.com[,4][x.com[,i]==quantile(x.com[,i], probs=.025)])
sum_stat[i,2] <- quantile(x.com[,i], probs=.025)
sum_stat[i,3] <- x.com[,4][x.com[,i]==max(x.com[,i])]
sum_stat[i,4] <- max(x.com[,i])
sum_stat[i,5] <- max(x.com[,4][x.com[,i]==quantile(x.com[,i], probs=.975)])
sum_stat[i,6] <- quantile(x.com[,i], probs=.975)
}
sum_stat <- data.frame(sum_stat)
sum_stat[,7] <- colnames(x.com)[1:3]
sum_stat[,7] <- as.factor(sum_stat[,7])
colnames(sum_stat) <- c("left", "left_val", "max", "max_val", "right", "right_val", "variable")
sum_stat
# convert dataframe to tall format
# library(tidyr)
pdfs$x <- x
tall_df <- gather(pdfs, Type, density, -x)
tall_df$Type
tt <- transform(tall_df,
Type=factor(Type,levels=c("A",
"B",
"C")))
ggplot(tt, aes(x = x, y = density, fill = Type, color = Type)) +
geom_line() +
geom_point(inherit.aes = FALSE,
data = sum_stat,
aes(x = max, y = 0, alpha=0.4),
show.legend = FALSE) +
geom_errorbarh(inherit.aes = FALSE,
data = sum_stat,
aes(xmin = left, xmax = right, y = 0, alpha=0.4),
height = 0.1, show.legend = FALSE) +
facet_wrap(~ Type, ncol = 1) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())
您可以重命名 sum_stat
的列名以匹配 tt
:
colnames(sum_stat)[7] <- "Type"
然后,删除 inherit.aes = FALSE
并在 geom
中传递 aes
参数。就像当你调用 facet_grid
时,它会根据 Type
变量分离 sum_stat
:
ggplot(tt) +
geom_line(aes(x = x, y = density, fill = Type, color = Type)) +
geom_point(data = sum_stat,
aes(x = max, y = 0, alpha=0.4),
show.legend = FALSE, color = "black") +
geom_errorbarh(data = sum_stat,
aes(xmin = left, xmax = right, y = 0, alpha=0.4),
height = 0.1, show.legend = FALSE, color = "black") +
facet_wrap(~ Type, ncol = 1) +
theme(strip.background = element_blank(),
strip.text.x = element_blank())
是您要找的吗?