为什么 plotly sankey 图中节点的固定位置被覆盖或忽略?

Why are fixed positions for nodes in a plotly sankey graph being overridden or ignored?

我正在寻求为时间 1 的 5 个节点和时间 2 的 5 个节点之间的流制作大量桑基图。我希望每次都以相同的顺序绘制节点,无论节点或流的大小如何。然而,其中一些图表是用乱序的节点绘制的。我试图动态计算预期的 node.y 个位置,但它们似乎被覆盖或忽略了。

以下代码将产生四个数字:

library(plotly)
library(tidyverse)

my_labels <-
  c(
    "Node 0",
    "Node 1",
    "Node 2",
    "Node 3",
    "Node 4",
    "Node 5",
    "Node 6",
    "Node 7",
    "Node 8",
    "Node 9"
  )

# Uses original data, which includes some flows much larger than others
source_ids <-
  c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4)
target_ids <-
  c(5, 6, 7, 8, 9, 5, 6, 7, 8, 9, 5, 6, 7, 8, 9, 5, 6, 7, 8, 9, 5, 6, 7, 8, 9)
values1 <-
  c(60, 23, 1, 0, 9, 15, 33, 13, 4, 3, 0, 9, 8, 2, 1, 0, 4, 12, 127, 9, 4, 4, 1, 11, 1)

my_test_data1 <- data.frame(source_ids, target_ids, values1)

fig1 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,
    pad = 10), # 10 Pixel
  link = list(
    source = my_test_data1$source_ids,
    target = my_test_data1$target_ids,
    value = my_test_data1$values1))

fig1 <- fig1 %>%
  layout(
    title = list(
      text = "fig1"
    )
  )

# Nodes do not appear in intended order. Node 3, the largest node, appears below
# Node 4, and the right hand nodes are also out of order.
fig1

# Build a new set of test data with even, identical flows
values2 <- rep(10, times = 25)
my_test_data2 <- data.frame(source_ids, target_ids, values2)

fig2 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,
    pad = 10), 
  link = list(
    source = my_test_data2$source_ids,
    target = my_test_data2$target_ids,
    value = my_test_data2$values2))

fig2 <- fig2 %>%
  layout(
    title = list(
      text = "fig2"
    )
  )

# Displays nodes in intended order
fig2

# Second attempt of original data. First try dynamically determining node.y
# values that are intended to force arrange nodes in intended order.

label_pos_dfs <-
  list(
    
    # Label positions of source node labels
    my_test_data1 %>%
      group_by(source_ids) %>%
      summarize(n = sum(values1)) %>%
      rename(node.name = source_ids) %>%
      mutate(label.pos = 1 - (cumsum(n) - n/2) / sum(n)),
    
    # Label positions of target node labels
    my_test_data1 %>%
      group_by(target_ids) %>%
      summarize(n = sum(values1)) %>%
      rename(node.name = target_ids) %>%
      mutate(label.pos = 1 - (cumsum(n) - n/2) / sum(n))
  )

my_node_label_y_positions <- 
  lapply(label_pos_dfs, "[", "label.pos") %>% 
  bind_rows() %>% 
  pull(label.pos) 

fig3 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,
    
    x = c(0,0,0,0,0,1,1,1,1,1),
    y = my_node_label_y_positions,
    
    pad = 10), 
  link = list(
    source = my_test_data1$source_ids,
    target = my_test_data1$target_ids,
    value = my_test_data1$values))

fig3 <- fig3 %>%
  layout(
    title = list(
      text = "fig3"
    )
  )

# Nodes do not appear in intended order. Just like in fig1, Node 3, the largest
# node, appears below Node 4, and the right hand nodes are also out of order.
fig3

fig4 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,
    x = c(0, 0, 0, 0, 0, 1, 1, 1, 1, 1),
    y = c(1,     # Node 0 in top position
          0.7,   # Node 1 in second position
          0.5,   # Node 2 in third/middle position
          0.3,   # Node 3 in fourth position
          0.1,   # Node 4 in bottom/fifth position
          1,     # Node 5 in top position
          0.3,   # Node 6 in fourth position
          0.5,   # Node 7 in middle position
          0.7,   # Node 8 in second position
          0.2),  # Node 9 in bottom position
    pad = 10), 
  link = list(
    source = my_test_data2$source_ids,
    target = my_test_data2$target_ids,
    value = my_test_data2$values2))

fig4 <- fig4 %>%
  layout(
    title = list(
      text = "fig4"
    )
  )

# Displays nodes in intended swapped order, with Node 8 and Node 6 switched. All
# nodes 5 through 9 do seem to drag lower for an unknown reason.
fig4

我在 github 上调查未解决的问题后找到了解决方案。显然,node.x和node.y不能等于0:https://github.com/plotly/plotly.py/issues/3002

我不确定为什么在解决该问题后,动态创建的 y 位置现在会导致与预期顺序相反的情况。我应该是从上数而不是从下数?


fig5 <- plot_ly(
  type = "sankey",
  arrangement = "snap",
  node = list(
    label = my_labels,

    # Nodes cannot be positioned at 0 or 1!
    x = c(1e-09, 1e-09, 1e-09, 1e-09, 1e-09, 0.99, 0.99, 0.99, 0.99, 0.99),

    # My dynamic label positioning, while it seemed to give the right output to me,
    # actually landed them in reversed order, so they are unreversed here with *-1 + 1
    y = my_node_label_y_positions * -1 + 1,
    
    pad = 5), 
  link = list(
    source = my_test_data1$source_ids,
    target = my_test_data1$target_ids,
    value = my_test_data1$values))

fig5 <- fig5 %>%
  layout(
    title = list(
      text = "fig5"
    )
  )

# Nodes DO appear in the intended order, at last!!! 
fig5