河图中的排序节点
ordering nodes in riverplot
我目前正在使用 riverplot 包开发 riverplot
图。但是,我很难让我的数据在地块上排序。让我用一个例子来解释:
library(riverplot)
df.nodes <- cbind.data.frame(c("A", "B", "C", "D", "E", "F", "G", "H","I","K"), c(1,1,1,1,2,2,2,2,3,3))
colnames(df.nodes) <- c("ID", "x")
df.edges <- cbind.data.frame(c("A", "B", "C", "D", "E", "F", "G", "H"), c("G", "H", "E", "F", "K", "K","I","I"), rep(1, 8))
colnames(df.edges) <- c("N1", "N2", "Value")
ex.river <- makeRiver(df.nodes, df.edges)
plot(ex.river)
如您所见,本例中的边相互交叉。当您拥有比我的示例更多的节点时,这会变得非常混乱。
我的问题:你有什么创意如何对我得到的节点进行排序吗:(这不是代码,而是我想要的图表上的输出顺序)
D F
C E K
B H I
A G
边缘应该尽可能少地相互交叉。据我了解,df.nodes
定义了节点的顺序,因此我应该以某种方式重新排序 df.nodes
。当然,我可以手动订购 df.nodes,但是如果您有很多节点,那就有点棘手了。
非常感谢任何想法。
这是一个棘手的问题,它需要使用 data.table
进行大量连接操作。可能有更聪明的解决方案。但是,这个适用于给定的数据集。
基本思想是从左到右对节点和边进行排序。
数据
df.nodes <- data.frame(ID = c("A", "B", "C", "D", "E", "F", "G", "H","I","K"),
x = c(rep(1:2, each = 4L), 3L, 3L),
stringsAsFactors = FALSE)
df.edges <- data.frame(N1 = c("A", "B", "C", "D", "E", "F", "G", "H"),
N2 = c("G", "H", "E", "F", "K", "K","I","I"),
Value = rep(1L, 8),
stringsAsFactors = FALSE)
library(data.table) # CRAN version 1.10.4 used
# coerce to data.table and use abbreviated object names
edt <- setDT(df.edges)
ndt <- setDT(df.nodes)
获取边缘位置
# add x positions of nodes to edges
# two joins required for each of the two nodes of an edge
edt2 <- ndt[ndt[edt, on = c(ID = "N2")], on = c(ID = "N1")][
, setnames(.SD, c("N1", "x1", "N2", "x2", "Value"))]
# add unique id number for edge x-positions from left to right
# id reflects order of x pos 1-2, 2-3, ..., 10-11
edt2[order(x1, x2), e.pos := rleid(x1, x2)]
edt2
# N1 x1 N2 x2 Value e.pos
#1: A 1 G 2 1 1
#2: B 1 H 2 1 1
#3: C 1 E 2 1 1
#4: D 1 F 2 1 1
#5: E 2 K 3 1 2
#6: F 2 K 3 1 2
#7: G 2 I 3 1 2
#8: H 2 I 3 1 2
从左到右排序
# initialize: get order of nodes in leftmost x position
# update edt2 with row number
edt2 <- ndt[x == 1L, .(N1 = ID, rn1 = .I)][edt2, on = "N1"]
# loop over edge positions
# determine row numbers (sort order) for nodes from left to right
for (p in edt2[, head(unique(e.pos), -1L)]) {
edt2[p == e.pos, rn2 := rn1]
edt2 <- edt2[p == e.pos, .(N1 = N2, rn1 = rn2)][edt2, on = "N1"]
edt2[, rn1 := dplyr::coalesce(rn1, i.rn1)][, i.rn1 := NULL]
}
edt2[e.pos == last(e.pos), rn2 := rn1]
edt2
# N1 rn1 x1 N2 x2 Value e.pos rn2
#1: A 1 1 G 2 1 1 1
#2: B 2 1 H 2 1 1 2
#3: C 3 1 E 2 1 1 3
#4: D 4 1 F 2 1 1 4
#5: E 3 2 K 3 1 2 3
#6: F 4 2 K 3 1 2 4
#7: G 1 2 I 3 1 2 1
#8: H 2 2 I 3 1 2 2
从边缘提取节点的排序顺序 table
# extract sort order of all nodes from edge table,
# update node table
ndt <- unique(edt2[, .(ID = c(N1, N2), rn = c(rn1, rn2))], by = "ID")[ndt, on = "ID"]
ndt
# ID rn x
# 1: A 1 1
# 2: B 2 1
# 3: C 3 1
# 4: D 4 1
# 5: E 3 2
# 6: F 4 2
# 7: G 1 2
# 8: H 2 2
# 9: I 1 3
#10: K 3 3
创建河图
library(riverplot)
# pass sorted node table
# coercion back to data.frame required due to type check in `makeRiver()`
ex.river <- makeRiver(setDF(ndt[order(x, rn), .(ID, x)]), setDF(edt))
plot(ex.river)
我目前正在使用 riverplot 包开发 riverplot
图。但是,我很难让我的数据在地块上排序。让我用一个例子来解释:
library(riverplot)
df.nodes <- cbind.data.frame(c("A", "B", "C", "D", "E", "F", "G", "H","I","K"), c(1,1,1,1,2,2,2,2,3,3))
colnames(df.nodes) <- c("ID", "x")
df.edges <- cbind.data.frame(c("A", "B", "C", "D", "E", "F", "G", "H"), c("G", "H", "E", "F", "K", "K","I","I"), rep(1, 8))
colnames(df.edges) <- c("N1", "N2", "Value")
ex.river <- makeRiver(df.nodes, df.edges)
plot(ex.river)
如您所见,本例中的边相互交叉。当您拥有比我的示例更多的节点时,这会变得非常混乱。 我的问题:你有什么创意如何对我得到的节点进行排序吗:(这不是代码,而是我想要的图表上的输出顺序)
D F
C E K
B H I
A G
边缘应该尽可能少地相互交叉。据我了解,df.nodes
定义了节点的顺序,因此我应该以某种方式重新排序 df.nodes
。当然,我可以手动订购 df.nodes,但是如果您有很多节点,那就有点棘手了。
非常感谢任何想法。
这是一个棘手的问题,它需要使用 data.table
进行大量连接操作。可能有更聪明的解决方案。但是,这个适用于给定的数据集。
基本思想是从左到右对节点和边进行排序。
数据
df.nodes <- data.frame(ID = c("A", "B", "C", "D", "E", "F", "G", "H","I","K"),
x = c(rep(1:2, each = 4L), 3L, 3L),
stringsAsFactors = FALSE)
df.edges <- data.frame(N1 = c("A", "B", "C", "D", "E", "F", "G", "H"),
N2 = c("G", "H", "E", "F", "K", "K","I","I"),
Value = rep(1L, 8),
stringsAsFactors = FALSE)
library(data.table) # CRAN version 1.10.4 used
# coerce to data.table and use abbreviated object names
edt <- setDT(df.edges)
ndt <- setDT(df.nodes)
获取边缘位置
# add x positions of nodes to edges
# two joins required for each of the two nodes of an edge
edt2 <- ndt[ndt[edt, on = c(ID = "N2")], on = c(ID = "N1")][
, setnames(.SD, c("N1", "x1", "N2", "x2", "Value"))]
# add unique id number for edge x-positions from left to right
# id reflects order of x pos 1-2, 2-3, ..., 10-11
edt2[order(x1, x2), e.pos := rleid(x1, x2)]
edt2
# N1 x1 N2 x2 Value e.pos
#1: A 1 G 2 1 1
#2: B 1 H 2 1 1
#3: C 1 E 2 1 1
#4: D 1 F 2 1 1
#5: E 2 K 3 1 2
#6: F 2 K 3 1 2
#7: G 2 I 3 1 2
#8: H 2 I 3 1 2
从左到右排序
# initialize: get order of nodes in leftmost x position
# update edt2 with row number
edt2 <- ndt[x == 1L, .(N1 = ID, rn1 = .I)][edt2, on = "N1"]
# loop over edge positions
# determine row numbers (sort order) for nodes from left to right
for (p in edt2[, head(unique(e.pos), -1L)]) {
edt2[p == e.pos, rn2 := rn1]
edt2 <- edt2[p == e.pos, .(N1 = N2, rn1 = rn2)][edt2, on = "N1"]
edt2[, rn1 := dplyr::coalesce(rn1, i.rn1)][, i.rn1 := NULL]
}
edt2[e.pos == last(e.pos), rn2 := rn1]
edt2
# N1 rn1 x1 N2 x2 Value e.pos rn2
#1: A 1 1 G 2 1 1 1
#2: B 2 1 H 2 1 1 2
#3: C 3 1 E 2 1 1 3
#4: D 4 1 F 2 1 1 4
#5: E 3 2 K 3 1 2 3
#6: F 4 2 K 3 1 2 4
#7: G 1 2 I 3 1 2 1
#8: H 2 2 I 3 1 2 2
从边缘提取节点的排序顺序 table
# extract sort order of all nodes from edge table,
# update node table
ndt <- unique(edt2[, .(ID = c(N1, N2), rn = c(rn1, rn2))], by = "ID")[ndt, on = "ID"]
ndt
# ID rn x
# 1: A 1 1
# 2: B 2 1
# 3: C 3 1
# 4: D 4 1
# 5: E 3 2
# 6: F 4 2
# 7: G 1 2
# 8: H 2 2
# 9: I 1 3
#10: K 3 3
创建河图
library(riverplot)
# pass sorted node table
# coercion back to data.frame required due to type check in `makeRiver()`
ex.river <- makeRiver(setDF(ndt[order(x, rn), .(ID, x)]), setDF(edt))
plot(ex.river)