添加自定义图像到 geom_polygon 填写 ggplot

Adding custom image to geom_polygon fill in ggplot

一位学生问我是否可以使用 R 重新创建类似于下面的图:

这是来自 this paper....

这类东西不是我的专长,但使用以下代码我能够创建 95% CI 椭圆并用 geom_polygon() 绘制它们。我用 rphylopic 包从系统库中抓取的图像填充图像。

#example data/ellipses
set.seed(101)
n <- 1000
x1 <- rnorm(n, mean=2)
y1 <- 1.75 + 0.4*x1 + rnorm(n)
df <- data.frame(x=x1, y=y1, group="A")
x2 <- rnorm(n, mean=8)
y2 <- 0.7*x2 + 2 + rnorm(n)
df <- rbind(df, data.frame(x=x2, y=y2, group="B"))
x3 <- rnorm(n, mean=6)
y3 <- x3 - 5 - rnorm(n)
df <- rbind(df, data.frame(x=x3, y=y3, group="C"))


#calculating ellipses
library(ellipse)
df_ell <- data.frame()
for(g in levels(df$group)){
  df_ell <- rbind(df_ell, cbind(as.data.frame(with(df[df$group==g,], ellipse(cor(x, y), 
                                                                             scale=c(sd(x),sd(y)), 
                                                                             centre=c(mean(x),mean(y))))),group=g))
}
#drawing
library(ggplot2)
p <- ggplot(data=df, aes(x=x, y=y,colour=group)) + 
  #geom_point(size=1.5, alpha=.6) +
  geom_polygon(data=df_ell, aes(x=x, y=y,colour=group, fill=group), alpha=0.1, size=1, linetype=1)


### get center points of ellipses
library(dplyr)
ell_center <- df_ell %>% group_by(group) %>% summarise(x=mean(x), y=mean(y))

### animal images
library(rphylopic)
lion <- get_image("e2015ba3-4f7e-4950-9bde-005e8678d77b", size = "512")[[1]]
mouse <- get_image("6b2b98f6-f879-445f-9ac2-2c2563157025", size="512")[[1]] 
bug <- get_image("136edfe2-2731-4acd-9a05-907262dd1311", size="512")[[1]]

### overlay images on center points
p + add_phylopic(lion, alpha=0.9, x=ell_center[[1,2]], y=ell_center[[1,3]], ysize=2, color="firebrick1")  + 
    add_phylopic(mouse, alpha=1, x=ell_center[[2,2]], y=ell_center[[2,3]], ysize=2, color="darkgreen") +
    add_phylopic(bug, alpha=0.9, x=ell_center[[3,2]], y=ell_center[[3,3]], ysize=2, color="mediumblue") + 
  theme_bw()

给出以下内容:

这没问题,但我真正想做的是将图像直接添加到 geom_polygon 的 'fill' 命令中。这可能吗 ?

#example data/ellipses set.seed(101) n <- 1000 x1 <- rnorm(n, mean=2) y1 <- 1.75 + 0.4*x1 + rnorm(n) df <- data.frame(x=x1, y=y1,
    group="A") x2 <- rnorm(n, mean=8) y2 <- 0.7*x2 + 2 + rnorm(n) df <-
    rbind(df, data.frame(x=x2, y=y2, group="B")) x3 <- rnorm(n, mean=6)
    y3 <- x3 - 5 - rnorm(n) df <- rbind(df, data.frame(x=x3, y=y3,
    group="C"))


#calculating ellipses library(ellipse) df_ell <- data.frame() for(g in levels(df$group)){
  df_ell <- rbind(df_ell,
    cbind(as.data.frame(with(df[df$group==g,], ellipse(cor(x, y),                                                                            
   scale=c(sd(x),sd(y)),                                                                                  
   centre=c(mean(x),mean(y))))),group=g)) }

#drawing library(ggplot2) p <- ggplot(data=df, aes(x=x, y=y,colour=group)) +    
    #geom_point(size=1.5, alpha=.6) +  
    geom_polygon(data=df_ell, aes(x=x, y=y,colour=group, fill=group),
    alpha=0.1, size=1, linetype=1)

一个不使用 ggplot 的快速而丑陋的解决方案可能是使用 rasterImagerpackage(jpg)(或 png,具体取决于您的图像格式):

set.seed(101)
n <- 1000
x1 <- rnorm(n, mean=2)
y1 <- 1.75 + 0.4*x1 + rnorm(n)
df <- data.frame(x=x1, y=y1, group="1")
x2 <- rnorm(n, mean=8)
y2 <- 0.7*x2 + 2 + rnorm(n)
df <- rbind(df, data.frame(x=x2, y=y2, group="2"))
x3 <- rnorm(n, mean=6)
y3 <- x3 - 5 - rnorm(n)
df <- rbind(df, data.frame(x=x3, y=y3, group="3"))

plot(df$x,df$y,type="n")
for(g in unique(df$group)){
    ifile=readJPEG(paste(g,".jpg",sep=""),FALSE)
    x=df$x[df$group == g]
    y=df$y[df$group == g]
    xmin=mean(x)-sd(x)*2
    ymin=mean(y)-sd(y)*2
    xmax=mean(x)+sd(x)*2
    ymax=mean(y)+sd(y)*2
    rasterImage(ifile,xmin,ymin,xmax,ymax)
}

(图片是 "random"在维基媒体上找到的图片,因此重命名)

在这里,我只是将图像以每组的平均值为中心(如文章中所示),并使它们的大小与标准差成正比。使其符合文章中使用的 95% 置信区间并不难。

这不完全是所需的结果,但它很容易做到(尽管如果你真的想让你的图像适合椭圆,我更愿意使用 gimp 解决方案,正如@Mike 所建议的)

我们无法为 ggplot 设置图案填充,但我们可以在 geom_tile 的帮助下做出一个非常简单的解决方法。重现您的初始数据:

#example data/ellipses
set.seed(101)
n <- 1000
x1 <- rnorm(n, mean=2)
y1 <- 1.75 + 0.4*x1 + rnorm(n)
df <- data.frame(x=x1, y=y1, group="A")
x2 <- rnorm(n, mean=8)
y2 <- 0.7*x2 + 2 + rnorm(n)
df <- rbind(df, data.frame(x=x2, y=y2, group="B"))
x3 <- rnorm(n, mean=6)
y3 <- x3 - 5 - rnorm(n)
df <- rbind(df, data.frame(x=x3, y=y3, group="C"))

#calculating ellipses
library(ellipse)
df_ell <- data.frame()
for(g in levels(df$group)){
  df_ell <- 
    rbind(df_ell, cbind(as.data.frame(
      with(df[df$group==g,], ellipse(cor(x, y), scale=c(sd(x),sd(y)), 
                                     centre=c(mean(x),mean(y))))),group=g))
}

我想展示的主要功能是将光栅图像转换为 data.frame,列为 XYcolor,这样我们稍后可以用 geom_tile

require("dplyr")
require("tidyr")
require("ggplot2")
require("png")

# getting sample pictures
download.file("http://content.mycutegraphics.com/graphics/alligator/alligator-reading-a-book.png", "alligator.png", mode = "wb")
download.file("http://content.mycutegraphics.com/graphics/animal/elephant-and-bird.png", "elephant.png", mode = "wb")
download.file("http://content.mycutegraphics.com/graphics/turtle/girl-turtle.png", "turtle.png", mode = "wb")
pic_allig <- readPNG("alligator.png")
pic_eleph <- readPNG("elephant.png")
pic_turtl <- readPNG("turtle.png")

# converting raster image to plottable data.frame
ggplot_rasterdf <- function(color_matrix, bottom = 0, top = 1, left = 0, right = 1) {
  require("dplyr")
  require("tidyr")

  if (dim(color_matrix)[3] > 3) hasalpha <- T else hasalpha <- F

  outMatrix <- matrix("#00000000", nrow = dim(color_matrix)[1], ncol = dim(color_matrix)[2])

  for (i in 1:dim(color_matrix)[1])
    for (j in 1:dim(color_matrix)[2]) 
      outMatrix[i, j] <- rgb(color_matrix[i,j,1], color_matrix[i,j,2], color_matrix[i,j,3], ifelse(hasalpha, color_matrix[i,j,4], 1))

  colnames(outMatrix) <- seq(1, ncol(outMatrix))
  rownames(outMatrix) <- seq(1, nrow(outMatrix))
  as.data.frame(outMatrix) %>% mutate(Y = nrow(outMatrix):1) %>% gather(X, color, -Y) %>% 
    mutate(X = left + as.integer(as.character(X))*(right-left)/ncol(outMatrix), Y = bottom + Y*(top-bottom)/nrow(outMatrix))
}

正在转换图像:

# preparing image data
pic_allig_dat <- 
  ggplot_rasterdf(pic_allig, 
                  left = min(df_ell[df_ell$group == "A",]$x), 
                  right = max(df_ell[df_ell$group == "A",]$x),
                  bottom = min(df_ell[df_ell$group == "A",]$y),
                  top = max(df_ell[df_ell$group == "A",]$y) )

pic_eleph_dat <- 
  ggplot_rasterdf(pic_eleph, left = min(df_ell[df_ell$group == "B",]$x), 
                  right = max(df_ell[df_ell$group == "B",]$x),
                  bottom = min(df_ell[df_ell$group == "B",]$y),
                  top = max(df_ell[df_ell$group == "B",]$y) )

pic_turtl_dat <- 
  ggplot_rasterdf(pic_turtl, left = min(df_ell[df_ell$group == "C",]$x), 
                  right = max(df_ell[df_ell$group == "C",]$x),
                  bottom = min(df_ell[df_ell$group == "C",]$y),
                  top = max(df_ell[df_ell$group == "C",]$y) )

据我所知,作者只想在椭圆内绘制图像,而不是原始矩形。我们可以借助包 sp.

中的 point.in.polygon 函数来实现它
# filter image-data.frames keeping only rows inside ellipses
require("sp")

gr_A_df <- 
  pic_allig_dat[point.in.polygon(pic_allig_dat$X, pic_allig_dat$Y, 
                                 df_ell[df_ell$group == "A",]$x, 
                                 df_ell[df_ell$group == "A",]$y ) %>% as.logical,]
gr_B_df <- 
  pic_eleph_dat[point.in.polygon(pic_eleph_dat$X, pic_eleph_dat$Y, 
                                 df_ell[df_ell$group == "B",]$x, 
                                 df_ell[df_ell$group == "B",]$y ) %>% as.logical,]
gr_C_df <- 
  pic_turtl_dat[point.in.polygon(pic_turtl_dat$X, pic_turtl_dat$Y, 
                                 df_ell[df_ell$group == "C",]$x, 
                                 df_ell[df_ell$group == "C",]$y ) %>% as.logical,]

最后...

#drawing
p <- ggplot(data=df) + 
  geom_polygon(data=df_ell, aes(x=x, y=y,colour=group, fill=group), alpha=0.1, size=1, linetype=1)

p + geom_tile(data = gr_A_df, aes(x = X, y = Y), fill = gr_A_df$color) + 
  geom_tile(data = gr_B_df, aes(x = X, y = Y), fill = gr_B_df$color) + 
  geom_tile(data = gr_C_df, aes(x = X, y = Y), fill = gr_C_df$color) + theme_bw()

我们可以轻松地调整图的大小而无需更改代码。

当然,您应该牢记机器的性能,并且可能不会选择 20MP 图片在 ggplot =)

中打印