ggplot2:将离散比例的断点更改为两个断点之间
ggplot2: change break points of discrete scale to be between two break points
我有以下数据集:
df <- data.frame(dens = rnorm(5000),
split = as.factor(sample(1:2, 5000, replace = T)),
method = as.factor(sample(c("A","B"), 5000, replace = T)),
counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))
对于 A 组和 B 组中的每个计数,我有以下拆分小提琴图,用于拆分 1 和 2。每个设置我们有四个组,但它有一个嵌套方面:
library(ggplot2)
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL){
## By @YAK:
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
dens <- cumsum(data$density)/sum(data$density)
ecdf <- stats::approxfun(dens, data$y)
ys <- ecdf(draw_quantiles)
violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
violin.xs <- (stats::approxfun(data$y, data$x))(ys)
if (grp %% 2 == 0) {
data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
} else {
data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
}
}
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
df$key <- factor(paste(df$split, df$method))
levels(df$split) <- factor(0:2)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE, name = "Counts")
我得到以下信息:
这很好,只是我希望在 x 轴上以及在蓝色和绿色小提琴图之间仅具有计数 1、10、100、1000、10000 的标签。因此,在第一个蓝色和绿色小提琴图之间标记 1,在第二个蓝色和绿色小提琴图之间标记 10,在第二个蓝色和绿色小提琴图之间标记 100,依此类推。
感谢您就如何执行此操作提供任何建议。
您可以尝试向绘图本身添加一个文本层,而不是更改离散比例的断点,它能够接受离散比例位置的 non-integer 值:
ggplot(df,
aes(x = x, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
# annotate layer with non-integer positions
annotate(geom = "text", x = c(1.5, 4.5, 7.5, 10.5, 13.5), y = -3.75,
label = c("1", "10", "100", "1000", "10000")) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired", n=4)) +
scale_x_discrete(name = "Counts", drop = FALSE) +
theme_minimal() +
# hide the actual discrete labels / ticks
theme(legend.position="bottom",
axis.ticks.x = element_blank(),
axis.text.x = element_blank())
我通常用小平面解决这些问题,然后将条带格式化为轴标签。这也很自然地使这些对靠得更近,没有任何技巧,如果需要,您可以通过更改 theme(panel.spacing = .....)
来更改距离。例如:
ggplot(df, aes(x = split, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
xlab('count') +
facet_grid(~counts, scales = 'free_x', switch = 'x') +
theme_light() +
theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text = element_text(color = 'black'))
或具有不太明显方面的不同主题:
ggplot(df, aes(x = split, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
xlab('count') +
facet_grid(~counts, scales = 'free_x', switch = 'x') +
theme_minimal() +
theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank())
我有以下数据集:
df <- data.frame(dens = rnorm(5000),
split = as.factor(sample(1:2, 5000, replace = T)),
method = as.factor(sample(c("A","B"), 5000, replace = T)),
counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))
对于 A 组和 B 组中的每个计数,我有以下拆分小提琴图,用于拆分 1 和 2。每个设置我们有四个组,但它有一个嵌套方面:
library(ggplot2)
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL){
## By @YAK:
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
dens <- cumsum(data$density)/sum(data$density)
ecdf <- stats::approxfun(dens, data$y)
ys <- ecdf(draw_quantiles)
violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
violin.xs <- (stats::approxfun(data$y, data$x))(ys)
if (grp %% 2 == 0) {
data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
} else {
data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
}
}
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
df$key <- factor(paste(df$split, df$method))
levels(df$split) <- factor(0:2)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE, name = "Counts")
我得到以下信息:
这很好,只是我希望在 x 轴上以及在蓝色和绿色小提琴图之间仅具有计数 1、10、100、1000、10000 的标签。因此,在第一个蓝色和绿色小提琴图之间标记 1,在第二个蓝色和绿色小提琴图之间标记 10,在第二个蓝色和绿色小提琴图之间标记 100,依此类推。
感谢您就如何执行此操作提供任何建议。
您可以尝试向绘图本身添加一个文本层,而不是更改离散比例的断点,它能够接受离散比例位置的 non-integer 值:
ggplot(df,
aes(x = x, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
# annotate layer with non-integer positions
annotate(geom = "text", x = c(1.5, 4.5, 7.5, 10.5, 13.5), y = -3.75,
label = c("1", "10", "100", "1000", "10000")) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired", n=4)) +
scale_x_discrete(name = "Counts", drop = FALSE) +
theme_minimal() +
# hide the actual discrete labels / ticks
theme(legend.position="bottom",
axis.ticks.x = element_blank(),
axis.text.x = element_blank())
我通常用小平面解决这些问题,然后将条带格式化为轴标签。这也很自然地使这些对靠得更近,没有任何技巧,如果需要,您可以通过更改 theme(panel.spacing = .....)
来更改距离。例如:
ggplot(df, aes(x = split, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
xlab('count') +
facet_grid(~counts, scales = 'free_x', switch = 'x') +
theme_light() +
theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
strip.background = element_blank(), strip.text = element_text(color = 'black'))
或具有不太明显方面的不同主题:
ggplot(df, aes(x = split, y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
xlab('count') +
facet_grid(~counts, scales = 'free_x', switch = 'x') +
theme_minimal() +
theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank())