使用自定义 geom 分组失败 - 如何从 draw_panel() 检查内部对象

Grouping with custom geom fails - how to inspect internal object from draw_panel()

这是一个与从 修改而来的自定义 geom 相关的问题。给定的 geom 分组失败,所以我将 coord_munch 包含在 draw_panel 中,受到 GeomLineGeomPath 的启发。它在很多情况下确实有效,但我感觉它经常失败。

特别是,它似乎在两人一组时失败(请参见下面的示例),并且在使用拼凑时它在某些情节中奇怪地失败了。我opened an issue,但是还没有得到回复,对此我并不感到意外,我同意并觉得这实际上是geom写得不好的问题,而不是拼凑的问题。

我相信用于 GeomPath 的分组(在代码中,用 ## Work out grouping variables for grobs 标记)对于这个 grob 失败,但我不知道如何检查在两者之间创建的 munch 对象。

我的主要问题是,如何检查这个对象?

如果有人看到并理解我的 geom 的问题,我将更加感激。干杯

示例:

library(tidyverse)

## this is not an arrange problem, as shown by the correct plot using geom_path
testdf <- testdf %>% arrange(id, group, x) 

适用于 geom_path

ggplot(testdf, aes(x, y)) +
  geom_path(aes(group = id))

失败 geom_trail

ggplot(testdf, aes(x, y)) +
  geom_trail(aes(group = id))

使用颜色时更糟

ggplot(testdf, aes(x, y)) +
  geom_trail(aes(group = id, color = group))

reprex package (v0.3.0)

于 2020-07-02 创建

GeomTrail

geom_trail <-
  function (mapping = NULL, data = NULL, stat = "identity", position = "identity",
            na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) {
    layer(data = data, mapping = mapping, stat = stat, geom = GeomTrail,
          position = position, show.legend = show.legend, inherit.aes = inherit.aes,
          params = list(na.rm = na.rm, ...))
  }

GeomTrail <- ggplot2::ggproto(
  "GeomTrail", ggplot2::GeomPoint,
  
  default_aes = ggplot2::aes(
    shape = 19, colour = "black", size = 1.5, fill = NA, alpha = NA, stroke = 0.5,
    linesize = 0.5, linetype = 1, gap = .9,
  ),
  
  ## tjebo: 
  ## here is a function handle_na(), which does have no effect on the problem
  
  draw_panel = function(data, panel_params, coord, arrow = NULL,
                        lineend = "butt", linejoin = "round", linemitre = 10,
                        na.rm = FALSE) {
    if (!anyDuplicated(data$group)) {
      message_wrap("geom_path: Each group consists of only one observation. ",
                   "Do you need to adjust the group aesthetic?")
    }
    
    
    # ggplot: 
    ##must be sorted on group
    data <- data[order(data$group), , drop = FALSE]
    munched <- coord_munch(coord, data, panel_params)
    
    # ggplot: 
    ##Default geom point behaviour
    if (is.character(data$shape)) {
      data$shape <- translate_shape_string(data$shape)
    }
    coords <- coord$transform(data, panel_params)
    
    if (unique(coords$size == 0)) {
      my_points <- NULL
    } else {
      my_points <- grid::pointsGrob(
        coords$x,
        coords$y,
        pch = coords$shape,
        gp = grid::gpar(
          col = alpha(coords$colour, coords$alpha),
          fill = alpha(coords$fill, coords$alpha),
          fontsize = coords$size * .pt + coords$stroke * .stroke / 2,
          lwd = coords$stroke * .stroke / 2
        )
      )
    }
    
    # ggplot: 
    ##Silently drop lines with less than two points, preserving order
    rows <- stats::ave(seq_len(nrow(munched)), munched$group, FUN = length)
    munched <- munched[rows >= 2, ]
    if (nrow(munched) < 2) {
      return(zeroGrob())
    }
    
    ## tjebo: 
    ## here, ggplot2:::dapply() checks which grob to use (segment or lines), 
    ## but it also does not seem to have an effect, or at least I don't know 
    ## to change the grob in this case
    
    # teunbrand: 
    # New behaviour
    ## Convert x and y to units
    x <- unit(munched$x, "npc")
    y <- unit(munched$y, "npc")
    
    ## Work out grouping variables for grobs 
    n <- nrow(munched)
    group_diff <- munched$group[-1] != munched$group[-n]
    start <- c(TRUE, group_diff)
    end <- c(group_diff, TRUE)
    
    ## teunbrand: Custom grob class
    my_path <- grid::grob(
      x = x, y = y,
      mult = munched$gap * .pt,
      name = "trail",
      gp = grid::gpar(
        col = alpha(munched$colour, munched$alpha)[!end], # this could also be [start]
        fill = alpha(munched$colour, munched$alpha)[!end],
        lwd = munched$linesize * .pt,
        lty = munched$linetype,
        lineend = "butt",
        linejoin = "round",
        linemitre = 10
      ),
      vp = NULL,
      cl = "trail"
    )

    ggplot2:::ggname(
      "geom_trail",
      grid::grobTree(my_path, my_points)
    )
  }
)

# not modified hook
makeContent.trail <- function(x){ 
  # Convert npcs to absolute units
  x_new <- grid::convertX(x$x, "mm", TRUE)
  y_new <- grid::convertY(x$y, "mm", TRUE)
  
  # Do trigonometry stuff
  hyp <- sqrt(diff(x_new)^2 + diff(y_new)^2)
  sin_plot <- diff(y_new) / hyp
  cos_plot <- diff(x_new) / hyp
  
  diff_x0_seg <- head(x$mult, -1) * cos_plot
  diff_x1_seg <- (hyp - head(x$mult, -1)) * cos_plot
  diff_y0_seg <- head(x$mult, -1) * sin_plot
  diff_y1_seg <- (hyp - head(x$mult, -1)) * sin_plot
  
  x0 = head(x_new, -1) + diff_x0_seg
  x1 = head(x_new, -1) + diff_x1_seg
  y0 = head(y_new, -1) + diff_y0_seg
  y1 = head(y_new, -1) + diff_y1_seg
  keep <- unclass(x0) < unclass(x1)
  
  # Remove old xy coordinates
  x$x <- NULL
  x$y <- NULL
  
  # Supply new xy coordinates
  x$x0 <- unit(x0, "mm")[keep]
  x$x1 <- unit(x1, "mm")[keep]
  x$y0 <- unit(y0, "mm")[keep]
  x$y1 <- unit(y1, "mm")[keep]
  
  # Set to segments class
  class(x)[1] <- 'segments'
  x
}

数据

testdf <- tibble(
  id = c("A", "B", "B", "C", "D", "A", "E", "E", "F", "F", "G", "H", "I", "J", "I", "J", "K", "L", "M", "N", "M", "O", "P", "Q", "R", "R", "S", "T", "S", "T"),
  group = c("a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b", "c", "c", "c", "c", "c", "c", "d", "d", "d", "d", "d", "d", "e", "e", "e", "e", "e", "e"),
  x = c(41, 43, 45, 45, 45, 46, 41, 46, 53, 54, 54, 56, 35, 35, 37, 37, 44, 44, 43, 44, 45, 45, 46, 46, 44, 48, 50, 52, 53, 54),
  y = structure(c(2.2, 1.8, 1.8, 2.3, 2.2, 2.2, 5.3, 2.3, 4.6, 4.6, 4.8, 4.8, 3.9, 4.1, 3.9, 4.1, 3.6, 3.7, 2.8, 2.6, 2.8, 3.1, 3.1, 2.9, 0.7, 0.7, 1, 0.8, 1, 0.8), .Names = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""))
)

这里有几点要注意

调试 ggproto 方法

以下是我最喜欢的三种调试方法:

如果您自己编写 ggproto,可以使用 RStudio 的浏览器,或从代码中超级分配对象。奖励:您可以从调试屏幕进行超级分配。

GeomDummy <- ggproto(
  "GeomDummy", Geom,
  draw_panel = function(...) { # Doesn't really matter

   # If in RStudio, put this somewhere in the beginning
   browser()

   {...} # Useful code here

   # Superassign data to be debugged to global environment
   debugdata <<- problemdata
  }
)

如果调试不可变代码(比如 ggplot 自己的代码,除非你分叉它),你仍然可以用浏览器调试它,但是要按照正确的路径找到有问题的代码需要一些努力:

debugonce(ggplot2:::ggplot_build.ggplot)
# The above is triggered whenever a plot is build before drawing
ggplot(mtcars, aes(wt, mpg)) + geom_point()

您也可以 debug(ggplot2:::ggplot_build.ggplot),但完成后您必须 undebug()

发现改进

在以下位中:

if (!anyDuplicated(data$group)) {
    message_wrap("geom_path: Each group consists of only one observation. ",
                 "Do you need to adjust the group aesthetic?")
}
{...}
if (nrow(munched) < 2) {
  return(zeroGrob())
}

这根本不会绘制任何内容,即使要绘制的 1 个点不需要线段连接到自身。

在下面的代码中:

if (unique(coords$size == 0)) {
  my_points <- NULL
}

通常人们会使用 shape = NA 来省略绘图点,但我不能决定您应该如何编写自己的几何图形。另外,我以前从未见过 if(unique(x == y)) 模式,但是如果同时存在 TRUEFALSE 情况,这不会引发警告吗?将其替换为 if (all(coords$size == 0)).

可能会有用

也就是说,整个条件点绘制可以简化为如下结构:

GeomTrail <- ggproto(
  "GeomTrail", GeomPoint,
  draw_panel = function(self, ...usual_arguments...) { # Important to include self
    # Default geom point behaviour
    my_points <- ggproto_parent(GeomPoint, self)$draw_panel(
      data, panel_params, coord, na.rm = na.rm
    )

  {..rest of code goes here..}

  },
  non_missing_aes = c("size", "colour") # Omit shape here
)

改进的ggproto/网格代码

我做的主要事情是将 (x,y) 参数化更改为 geom_segments() 使用的 ([x0,x1],[y0,y1]) 参数化,这使得其他计算在网格代码也更容易理解。

我也从 makeContent() 切换到 makeContext(),因为出于我无法理解的原因,颜色不会更新。

GeomTrail <- ggplot2::ggproto(
  "GeomTrail", ggplot2::GeomPoint,
  
  default_aes = ggplot2::aes(
    shape = 19, colour = "black", size = 1.5, fill = NA, alpha = NA, stroke = 0.5,
    linesize = 0.5, linetype = 1, gap = .9,
  ),
  
  ## tjebo: 
  ## here is a function handle_na(), which does have no effect on the problem
  
  draw_panel = function(data, panel_params, coord, arrow = NULL,
                        lineend = "butt", linejoin = "round", linemitre = 10,
                        na.rm = FALSE) {
    if (!anyDuplicated(data$group)) {
      message_wrap("geom_path: Each group consists of only one observation. ",
                   "Do you need to adjust the group aesthetic?")
    }
    
    
    # ggplot: 
    ##must be sorted on group
    data <- data[order(data$group), , drop = FALSE]
    
    # ggplot: 
    ##Default geom point behaviour
    if (is.character(data$shape)) {
      data$shape <- translate_shape_string(data$shape)
    }
    coords <- coord$transform(data, panel_params)
    
    if (unique(coords$size == 0)) {
      my_points <- NULL
    } else {
      my_points <- grid::pointsGrob(
        coords$x,
        coords$y,
        pch = coords$shape,
        gp = grid::gpar(
          col = alpha(coords$colour, coords$alpha),
          fill = alpha(coords$fill, coords$alpha),
          fontsize = coords$size * .pt + coords$stroke * .stroke / 2,
          lwd = coords$stroke * .stroke / 2
        )
      )
    }
    
    data <- coord_munch(coord, data, panel_params)
    
    data <- transform(data,
                      xend = c(tail(x, -1), NA),
                      yend = c(tail(y, -1), NA),
                      keep = c(group[-1] == head(group, -1), FALSE))
    data <- data[data$keep, ]
    
    ## Make custom grob class
    my_path <- grid::grob(
      x0 = unit(data$x, "npc"), x1 = unit(data$xend, "npc"),
      y0 = unit(data$y, "npc"), y1 = unit(data$yend, "npc"),
      mult = data$gap * .pt,
      name = "pointpath",
      gp = grid::gpar(
        col = alpha(data$colour, data$alpha),
        lwd = (data$linesize * .pt),
        lty = data$linetype,
        lineend = "butt",
        linejoin = "round", linemitre = 10
      ),
      vp = NULL,
      ### Now this is the important bit:
      cl = "trail"
    )
    
    ggplot2:::ggname(
      "geom_trail",
      grid::grobTree(my_path, my_points)
    )
  }
)

makeContext.trail <- function(x) {
  # Convert npcs to absolute units
  x0 <- grid::convertX(x$x0, "mm", TRUE)
  y0 <- grid::convertY(x$y0, "mm", TRUE)
  x1 <- grid::convertX(x$x1, "mm", TRUE)
  y1 <- grid::convertY(x$y1, "mm", TRUE)
  
  # Do trigonometry stuff
  dx <- x1 - x0
  dy <- y1 - y0
  hyp <- sqrt(dx ^ 2 + dy ^ 2)
  nudge_y <- (dy / hyp) * x$mult
  nudge_x <- (dx / hyp) * x$mult
  
  # Calculate new positions
  x0 <- x0 + nudge_x
  x1 <- x1 - nudge_x
  y0 <- y0 + nudge_y
  y1 <- y1 - nudge_y
  
  # Filter overshoot
  keep <- (sign(dx) == sign(x1 - x0)) & (sign(dy) == sign(y1 - y0))
  x$gp[] <- lapply(x$gp, function(x) {
    if (length(x) == 1L) return(x) else x[keep]
  })
  
  # Supply new xy coordinates
  x$x0 <- unit(x0[keep], "mm")
  x$x1 <- unit(x1[keep], "mm")
  x$y0 <- unit(y0[keep], "mm")
  x$y1 <- unit(y1[keep], "mm")
  
  # Set to segments class
  x$mult <- NULL
  x$id <- NULL
  class(x)[1] <- "segments"
  x
}

最终结果

现在绘制成这样:

ggplot(testdf, aes(x, y)) +
    geom_trail(aes(group = id, color = group))

注:

我实际上并没有临时想出这个来回答 SO 问题,我最近不得不处理这个 geom 的 very similar problems with my own version

一年后,我发现了一种调试 ggplot2 ggproto 方法的新方法,我认为由于它的简单性,应该得到不同的答案。

我们可以声明两个辅助函数:

ggdebug <- function(x, once = TRUE) {
  fun <- if (once) debugonce else debug
  fun(environment(x)$f)
}

ggundebug <- function(x) {
  undebug(environment(x)$f)
}

接下来,我们可以标记一个用于调试的ggproto方法

ggdebug(GeomPoint$draw_panel)

执行一些使用该方法的代码,我们就有了!

ggplot(mpg, aes(displ, hwy)) +
  geom_point()