如何使用 NetworkD3 库 R 生成具有自动引用和循环引用的 Sankey 图
how to produce Sankey diagram with auto-references and circular-references using NetworkD3 library R
我有这个数据:
list(nodes = structure(list(name = c(NA, NA, "1.1.1. Formação Florestal",
"1.1.2. Formação Savanica", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "3.1. Pastagem", NA, NA, NA, "3.2.1. Cultura Anual e Perene",
NA, "3.3. Mosaico de Agricultura e Pastagem", NA, NA, "4.2. Infraestrutura Urbana",
"4.5. Outra Área não Vegetada", NA, NA, NA, NA, NA, NA, NA, "5.1 Rio ou Lago ou Oceano"
)), class = "data.frame", row.names = c(NA, -33L)), links = structure(list(
source = c(3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 19L, 19L, 19L, 19L, 21L, 21L, 21L,
21L, 21L, 21L, 24L, 25L, 25L, 25L, 33L), target = c(3L, 21L,
4L, 21L, 15L, 3L, 25L, 4L, 33L, 19L, 15L, 21L, 3L, 25L, 4L,
33L, 15L, 19L, 4L, 21L, 4L, 21L, 25L, 33L, 15L, 3L, 4L, 25L,
4L, 33L, 33L), value = c(0.544859347827813, 0.00354385993588971,
0.494359662221154, 4.67602736159475, 2.20248911690968, 0.501437742068369,
0.00354375594818463, 24.8427814053755, 0.439418727642527,
0.0079740332093807, 11.8060486886398, 2.76329829691466, 0.000886029792298199,
0.00177186270758855, 3.35504921147758, 0.14263144351167,
1.12170804870686, 0.0478454594554582, 0.217079959877658,
0.00620223918980076, 1.79754946594068, 9.02868098124075,
0.00442981113709027, 0.242743895018645, 0.498770814980772,
0.00265782877794886, 0.000885894856554407, 0.379188333632346,
0.00265793188317263, 0.00265771537700804, 0.39158027235054
)), row.names = c(NA, -31L), class = "data.frame"))
我正在尝试使用 networkD3
包和这个简单的代码来生成桑基图:
sankeyNetwork(Links = landuse$links, Nodes = landuse$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "km²", fontSize = 12, nodeWidth = 30)
我收到了这条消息:
Warning message:
It looks like Source/Target is not zero-indexed. This is required in JavaScript and so your plot may not render.
但即使我对 target/source 进行了零索引,开发中也没有任何内容。我有与此格式相同的数据 example,所以我想知道可能的问题。
编辑:
我有自动引用和循环引用。是否可以使用包使用此类数据制作图表?
好吧,由于它的构建方式 sankeyNetwork
,您需要从 links
中的 0 开始。从landuse
可以看出,你的数据是从3开始的。
您可以重新索引 link 以从 0:
开始
landuse$links$source <- landuse$links$source-3
landuse$links$target <- landuse$links$target-3
sankeyNetwork(Links = landuse$links, Nodes = landuse$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "km²", fontSize = 12, nodeWidth = 30)
可以肯定的是,它看起来不像您 link 在您的问题中使用的 sankey。为什么?因为你的数据
- 您有 "autoreferences": links,其中源和目标是同一个节点。这会生成那些在同一节点开始和结束的怪异半圆
- 您有 "circular references":links,其中源 'X' 转到目标 'Y',源 'Y' 转到目标 'Z'然后源 'Z' 转到目标 'Z'。生成那些奇怪的曲线
- 你们中有些值比其他值小几个数量级,所以那些小值被严重可视化。
您可能需要全面检查您的数据:
- 你真的对 "autoreferences" 感兴趣吗?如果没有,删除它们
- 您对循环引用感到满意,还是更喜欢复制节点以显示线性桑基?
- 您对显示非常小的节点感兴趣吗?如果没有,删除它们
根据您在其中一条评论 (here) 中提供 link 的示例,您实际上并不需要自动引用和循环引用,而是您想要的是两个不同的每个事物的节点,一个用于左列,一个用于右列(例如 left/1985 列中的 "Formação Florestal" 和 right/2017 列中的 "Formação Florestal")。
您可以使用您提供的数据来实现这一点,方法是将具有相同索引的源节点和目标节点区分为单独的节点,就像这样...
landuse <- list(
nodes = data.frame(
name = c(
NA, NA, "1.1.1. Formação Florestal", "1.1.2. Formação Savanica", NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, "3.1. Pastagem", NA, NA, NA,
"3.2.1. Cultura Anual e Perene", NA,
"3.3. Mosaico de Agricultura e Pastagem", NA, NA,
"4.2. Infraestrutura Urbana", "4.5. Outra Área não Vegetada", NA, NA, NA,
NA, NA, NA, NA,"5.1 Rio ou Lago ou Oceano"
),
stringsAsFactors = FALSE
),
links = data.frame(
source = c(
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
19L, 19L, 19L, 19L, 21L, 21L, 21L, 21L, 21L, 21L, 24L, 25L, 25L, 25L, 33L
),
target = c(
3L, 21L, 4L, 21L, 15L, 3L, 25L, 4L, 33L, 19L, 15L, 21L, 3L, 25L, 4L, 33L,
15L, 19L, 4L, 21L, 4L, 21L, 25L, 33L, 15L, 3L, 4L, 25L, 4L, 33L,33L
),
value = c(
0.544859347827813, 0.00354385993588971, 0.494359662221154,
4.67602736159475, 2.20248911690968, 0.501437742068369,
0.00354375594818463, 24.8427814053755, 0.439418727642527,
0.0079740332093807, 11.8060486886398, 2.76329829691466,
0.000886029792298199, 0.00177186270758855, 3.35504921147758,
0.14263144351167, 1.12170804870686, 0.0478454594554582,
0.217079959877658, 0.00620223918980076, 1.79754946594068,
9.02868098124075, 0.00442981113709027, 0.242743895018645,
0.498770814980772, 0.00265782877794886, 0.000885894856554407,
0.379188333632346, 0.00265793188317263, 0.00265771537700804,
0.39158027235054
),
stringsAsFactors = FALSE
)
)
# create a links data frame where the right and left column versions of each node
# are distinguishble
links <-
data.frame(source = paste0(landuse$nodes$name[landuse$links$source], " (1985)"),
target = paste0(landuse$nodes$name[landuse$links$target], " (2017)"),
value = landuse$links$value,
stringsAsFactors = FALSE)
# build a nodes data frame from the new links data frame
nodes <- data.frame(name = unique(c(links$source, links$target)),
stringsAsFactors = FALSE)
# change the source and target variables to be the zero-indexed position of
# each node in the new nodes data frame
links$source <- match(links$source, nodes$name) - 1
links$target <- match(links$target, nodes$name) - 1
# remove the year indicator from the node names
nodes$name <- substring(nodes$name, 1, nchar(nodes$name) - 7)
# plot it
library(networkD3)
sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "km²", fontSize = 12, nodeWidth = 30)
我有这个数据:
list(nodes = structure(list(name = c(NA, NA, "1.1.1. Formação Florestal",
"1.1.2. Formação Savanica", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "3.1. Pastagem", NA, NA, NA, "3.2.1. Cultura Anual e Perene",
NA, "3.3. Mosaico de Agricultura e Pastagem", NA, NA, "4.2. Infraestrutura Urbana",
"4.5. Outra Área não Vegetada", NA, NA, NA, NA, NA, NA, NA, "5.1 Rio ou Lago ou Oceano"
)), class = "data.frame", row.names = c(NA, -33L)), links = structure(list(
source = c(3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 19L, 19L, 19L, 19L, 21L, 21L, 21L,
21L, 21L, 21L, 24L, 25L, 25L, 25L, 33L), target = c(3L, 21L,
4L, 21L, 15L, 3L, 25L, 4L, 33L, 19L, 15L, 21L, 3L, 25L, 4L,
33L, 15L, 19L, 4L, 21L, 4L, 21L, 25L, 33L, 15L, 3L, 4L, 25L,
4L, 33L, 33L), value = c(0.544859347827813, 0.00354385993588971,
0.494359662221154, 4.67602736159475, 2.20248911690968, 0.501437742068369,
0.00354375594818463, 24.8427814053755, 0.439418727642527,
0.0079740332093807, 11.8060486886398, 2.76329829691466, 0.000886029792298199,
0.00177186270758855, 3.35504921147758, 0.14263144351167,
1.12170804870686, 0.0478454594554582, 0.217079959877658,
0.00620223918980076, 1.79754946594068, 9.02868098124075,
0.00442981113709027, 0.242743895018645, 0.498770814980772,
0.00265782877794886, 0.000885894856554407, 0.379188333632346,
0.00265793188317263, 0.00265771537700804, 0.39158027235054
)), row.names = c(NA, -31L), class = "data.frame"))
我正在尝试使用 networkD3
包和这个简单的代码来生成桑基图:
sankeyNetwork(Links = landuse$links, Nodes = landuse$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "km²", fontSize = 12, nodeWidth = 30)
我收到了这条消息:
Warning message:
It looks like Source/Target is not zero-indexed. This is required in JavaScript and so your plot may not render.
但即使我对 target/source 进行了零索引,开发中也没有任何内容。我有与此格式相同的数据 example,所以我想知道可能的问题。
编辑:
我有自动引用和循环引用。是否可以使用包使用此类数据制作图表?
好吧,由于它的构建方式 sankeyNetwork
,您需要从 links
中的 0 开始。从landuse
可以看出,你的数据是从3开始的。
您可以重新索引 link 以从 0:
开始landuse$links$source <- landuse$links$source-3
landuse$links$target <- landuse$links$target-3
sankeyNetwork(Links = landuse$links, Nodes = landuse$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "km²", fontSize = 12, nodeWidth = 30)
可以肯定的是,它看起来不像您 link 在您的问题中使用的 sankey。为什么?因为你的数据
- 您有 "autoreferences": links,其中源和目标是同一个节点。这会生成那些在同一节点开始和结束的怪异半圆
- 您有 "circular references":links,其中源 'X' 转到目标 'Y',源 'Y' 转到目标 'Z'然后源 'Z' 转到目标 'Z'。生成那些奇怪的曲线
- 你们中有些值比其他值小几个数量级,所以那些小值被严重可视化。
您可能需要全面检查您的数据:
- 你真的对 "autoreferences" 感兴趣吗?如果没有,删除它们
- 您对循环引用感到满意,还是更喜欢复制节点以显示线性桑基?
- 您对显示非常小的节点感兴趣吗?如果没有,删除它们
根据您在其中一条评论 (here) 中提供 link 的示例,您实际上并不需要自动引用和循环引用,而是您想要的是两个不同的每个事物的节点,一个用于左列,一个用于右列(例如 left/1985 列中的 "Formação Florestal" 和 right/2017 列中的 "Formação Florestal")。
您可以使用您提供的数据来实现这一点,方法是将具有相同索引的源节点和目标节点区分为单独的节点,就像这样...
landuse <- list(
nodes = data.frame(
name = c(
NA, NA, "1.1.1. Formação Florestal", "1.1.2. Formação Savanica", NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, "3.1. Pastagem", NA, NA, NA,
"3.2.1. Cultura Anual e Perene", NA,
"3.3. Mosaico de Agricultura e Pastagem", NA, NA,
"4.2. Infraestrutura Urbana", "4.5. Outra Área não Vegetada", NA, NA, NA,
NA, NA, NA, NA,"5.1 Rio ou Lago ou Oceano"
),
stringsAsFactors = FALSE
),
links = data.frame(
source = c(
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
19L, 19L, 19L, 19L, 21L, 21L, 21L, 21L, 21L, 21L, 24L, 25L, 25L, 25L, 33L
),
target = c(
3L, 21L, 4L, 21L, 15L, 3L, 25L, 4L, 33L, 19L, 15L, 21L, 3L, 25L, 4L, 33L,
15L, 19L, 4L, 21L, 4L, 21L, 25L, 33L, 15L, 3L, 4L, 25L, 4L, 33L,33L
),
value = c(
0.544859347827813, 0.00354385993588971, 0.494359662221154,
4.67602736159475, 2.20248911690968, 0.501437742068369,
0.00354375594818463, 24.8427814053755, 0.439418727642527,
0.0079740332093807, 11.8060486886398, 2.76329829691466,
0.000886029792298199, 0.00177186270758855, 3.35504921147758,
0.14263144351167, 1.12170804870686, 0.0478454594554582,
0.217079959877658, 0.00620223918980076, 1.79754946594068,
9.02868098124075, 0.00442981113709027, 0.242743895018645,
0.498770814980772, 0.00265782877794886, 0.000885894856554407,
0.379188333632346, 0.00265793188317263, 0.00265771537700804,
0.39158027235054
),
stringsAsFactors = FALSE
)
)
# create a links data frame where the right and left column versions of each node
# are distinguishble
links <-
data.frame(source = paste0(landuse$nodes$name[landuse$links$source], " (1985)"),
target = paste0(landuse$nodes$name[landuse$links$target], " (2017)"),
value = landuse$links$value,
stringsAsFactors = FALSE)
# build a nodes data frame from the new links data frame
nodes <- data.frame(name = unique(c(links$source, links$target)),
stringsAsFactors = FALSE)
# change the source and target variables to be the zero-indexed position of
# each node in the new nodes data frame
links$source <- match(links$source, nodes$name) - 1
links$target <- match(links$target, nodes$name) - 1
# remove the year indicator from the node names
nodes$name <- substring(nodes$name, 1, nchar(nodes$name) - 7)
# plot it
library(networkD3)
sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
units = "km²", fontSize = 12, nodeWidth = 30)