不重叠的抖动点
Jitter dots without overlap
我的数据:
a <- sample(1:5, 100, replace = TRUE)
b <- sample(1:5, 100, replace = TRUE)
c <- sample(1:10, 100, replace = TRUE)
d <- sample(1:40, 100, replace = TRUE)
df <- data.frame(a, b, c, d)
使用 ggplot2
,我创建了 x = a 和 y = b 的散点图,二维加权(colour = c
和 size = d
)。请注意,x 和 y 是故意的 1:5
.
很明显,不同大小和颜色的点因此重叠,所以我尝试抖动避免重叠:
ggplot(df, aes(a, b, colour = c, size = d)) +
geom_point(position = position_jitter())
现在我想让点聚在一起更紧密,所以我尝试了几个
height
和width
的组合用于抖动函数,例如
ggplot(df, aes(a, b, colour = c, size = d)) +
geom_point(position = position_jitter(width = 0.2, height = 0.2))
抖动使点仍然重叠,并将它们随机分布在给定区域上。
有没有办法让点完全不重叠,但尽可能靠近聚集在一起,甚至可能接触而不 "side by side" 或堆叠? (在某种程度上,用较小的点创建一种气泡)?
谢谢!
一个有趣的可视化工具是 beeswarm 图。
在 R 中,beeswarm
和 ggbeeswarm
包实现了这种情节。
这里有一个 ggbeeswarm
的例子:
set.seed(1234)
a <- sample(1:5,100,rep=TRUE)
b <- sample(1:5,100,rep=TRUE)
c <- sample(1:10,100,rep=TRUE)
d <- sample(1:40,100,rep=TRUE)
df <- data.frame(a,b,c,d)
library(ggbeeswarm)
ggplot(aes(x=a, y=b, col=c, size=d), data = df)+
geom_beeswarm(priority='random',cex=3.5, groupOnX=T)+coord_flip()
希望对您有所帮助
这是@Tjebo 抖动问题的另一种可能解决方案。
参数 dst
需要一些调整。
set.seed(1234)
a <- sample(1:5,100,rep=TRUE)
b <- sample(1:5,100,rep=TRUE)
c <- sample(1:10,100,rep=TRUE)
d <- sample(1:40,100,rep=TRUE)
df <- data.frame(a,b,c,d)
dst <- .2
df.mod <- df
avals <- unique(df$a)
bvals <- unique(df$b)
for (k1 in seq_along(avals)) {
for (k2 in seq_along(bvals)) {
subk <- (df$a==avals[k1] & df$b==bvals[k2])
if (sum(subk)>1) {
subdf <- df[subk,]
angsk <- seq(0,2*pi,length.out=nrow(subdf)+1)
ak <- subdf$a+cos(angsk[-1])*dst
bk <- subdf$b+sin(angsk[-1])*dst
df.mod[subk,c("a","b")] <- cbind(ak,bk)
}
}
}
library(ggplot2)
ggplot(df.mod, aes(a, b, colour = c, size = d)) + geom_point()
根据@Tjebo的建议,我在"heaps".
中安排了点
set.seed(1234)
n <- 100
a <- sample(1:5,n,rep=TRUE)
b <- sample(1:5,n,rep=TRUE)
c <- sample(1:10,n,rep=TRUE)
d <- sample(1:40,n,rep=TRUE)
df0 <- data.frame(a,b,c,d)
# These parameters need carefully tuning
minr <- 0.05
maxr <- 0.2
# Order circles by dimension
ord <- FALSE
df1 <- df0
df1$d <- minr+(maxr-minr)*(df1$d-min(df1$d))/(max(df1$d)-min(df1$d))
avals <- unique(df1$a)
bvals <- unique(df1$b)
for (k1 in seq_along(avals)) {
for (k2 in seq_along(bvals)) {
print(paste(k1,k2))
subk <- (df1$a==avals[k1] & df1$b==bvals[k2])
if (sum(subk)>1) {
subdfk <- df1[subk,]
if (ord) {
idx <- order(subdfk$d)
subdfk <- subdfk[idx,]
}
subdfk.mod <- subdfk
posmx <- which.max(subdfk$d)
subdfk1 <- subdfk[posmx,]
subdfk2 <- subdfk[-posmx,]
angsk <- seq(0,2*pi,length.out=nrow(subdfk2)+1)
subdfk2$a <- subdfk2$a+cos(angsk[-length(angsk)])*(subdfk1$d+subdfk2$d)/2
subdfk2$b <- subdfk2$b+sin(angsk[-length(angsk)])*(subdfk1$d+subdfk2$d)/2
subdfk.mod[posmx,] <- subdfk1
subdfk.mod[-posmx,] <- subdfk2
df1[subk,] <- subdfk.mod
}
}
}
library(ggplot2)
library(ggforce)
ggplot(df1, aes()) +
geom_circle(aes(x0=a, y0=b, r=d/2, fill=c), alpha=0.7)+ coord_fixed()
我的数据:
a <- sample(1:5, 100, replace = TRUE)
b <- sample(1:5, 100, replace = TRUE)
c <- sample(1:10, 100, replace = TRUE)
d <- sample(1:40, 100, replace = TRUE)
df <- data.frame(a, b, c, d)
使用 ggplot2
,我创建了 x = a 和 y = b 的散点图,二维加权(colour = c
和 size = d
)。请注意,x 和 y 是故意的 1:5
.
很明显,不同大小和颜色的点因此重叠,所以我尝试抖动避免重叠:
ggplot(df, aes(a, b, colour = c, size = d)) +
geom_point(position = position_jitter())
现在我想让点聚在一起更紧密,所以我尝试了几个
height
和width
的组合用于抖动函数,例如
ggplot(df, aes(a, b, colour = c, size = d)) +
geom_point(position = position_jitter(width = 0.2, height = 0.2))
抖动使点仍然重叠,并将它们随机分布在给定区域上。
有没有办法让点完全不重叠,但尽可能靠近聚集在一起,甚至可能接触而不 "side by side" 或堆叠? (在某种程度上,用较小的点创建一种气泡)?
谢谢!
一个有趣的可视化工具是 beeswarm 图。
在 R 中,beeswarm
和 ggbeeswarm
包实现了这种情节。
这里有一个 ggbeeswarm
的例子:
set.seed(1234)
a <- sample(1:5,100,rep=TRUE)
b <- sample(1:5,100,rep=TRUE)
c <- sample(1:10,100,rep=TRUE)
d <- sample(1:40,100,rep=TRUE)
df <- data.frame(a,b,c,d)
library(ggbeeswarm)
ggplot(aes(x=a, y=b, col=c, size=d), data = df)+
geom_beeswarm(priority='random',cex=3.5, groupOnX=T)+coord_flip()
这是@Tjebo 抖动问题的另一种可能解决方案。
参数 dst
需要一些调整。
set.seed(1234)
a <- sample(1:5,100,rep=TRUE)
b <- sample(1:5,100,rep=TRUE)
c <- sample(1:10,100,rep=TRUE)
d <- sample(1:40,100,rep=TRUE)
df <- data.frame(a,b,c,d)
dst <- .2
df.mod <- df
avals <- unique(df$a)
bvals <- unique(df$b)
for (k1 in seq_along(avals)) {
for (k2 in seq_along(bvals)) {
subk <- (df$a==avals[k1] & df$b==bvals[k2])
if (sum(subk)>1) {
subdf <- df[subk,]
angsk <- seq(0,2*pi,length.out=nrow(subdf)+1)
ak <- subdf$a+cos(angsk[-1])*dst
bk <- subdf$b+sin(angsk[-1])*dst
df.mod[subk,c("a","b")] <- cbind(ak,bk)
}
}
}
library(ggplot2)
ggplot(df.mod, aes(a, b, colour = c, size = d)) + geom_point()
根据@Tjebo的建议,我在"heaps".
set.seed(1234)
n <- 100
a <- sample(1:5,n,rep=TRUE)
b <- sample(1:5,n,rep=TRUE)
c <- sample(1:10,n,rep=TRUE)
d <- sample(1:40,n,rep=TRUE)
df0 <- data.frame(a,b,c,d)
# These parameters need carefully tuning
minr <- 0.05
maxr <- 0.2
# Order circles by dimension
ord <- FALSE
df1 <- df0
df1$d <- minr+(maxr-minr)*(df1$d-min(df1$d))/(max(df1$d)-min(df1$d))
avals <- unique(df1$a)
bvals <- unique(df1$b)
for (k1 in seq_along(avals)) {
for (k2 in seq_along(bvals)) {
print(paste(k1,k2))
subk <- (df1$a==avals[k1] & df1$b==bvals[k2])
if (sum(subk)>1) {
subdfk <- df1[subk,]
if (ord) {
idx <- order(subdfk$d)
subdfk <- subdfk[idx,]
}
subdfk.mod <- subdfk
posmx <- which.max(subdfk$d)
subdfk1 <- subdfk[posmx,]
subdfk2 <- subdfk[-posmx,]
angsk <- seq(0,2*pi,length.out=nrow(subdfk2)+1)
subdfk2$a <- subdfk2$a+cos(angsk[-length(angsk)])*(subdfk1$d+subdfk2$d)/2
subdfk2$b <- subdfk2$b+sin(angsk[-length(angsk)])*(subdfk1$d+subdfk2$d)/2
subdfk.mod[posmx,] <- subdfk1
subdfk.mod[-posmx,] <- subdfk2
df1[subk,] <- subdfk.mod
}
}
}
library(ggplot2)
library(ggforce)
ggplot(df1, aes()) +
geom_circle(aes(x0=a, y0=b, r=d/2, fill=c), alpha=0.7)+ coord_fixed()