按属性聚类网络节点
Clustering network nodes by attributes
我有一个data.frame类似于d
如下。
d <- structure(list(ID = c("KP1009", "GP3040", "KP1757", "GP2243",
"KP682", "KP1789", "KP1933", "KP1662", "KP1718", "GP3339", "GP4007",
"GP3398", "GP6720", "KP808", "KP1154", "KP748", "GP4263", "GP1132",
"GP5881", "GP6291", "KP1004", "KP1998", "GP4123", "GP5930", "KP1070",
"KP905", "KP579", "KP1100", "KP587", "GP913", "GP4864", "KP1513",
"GP5979", "KP730", "KP1412", "KP615", "KP1315", "KP993", "GP1521",
"KP1034", "KP651", "GP2876", "GP4715", "GP5056", "GP555", "GP408",
"GP4217", "GP641"),
Type = c("B", "A", "B", "A", "B", "B", "B",
"B", "B", "A", "A", "A", "A", "B", "B", "B", "A", "A", "A", "A",
"B", "B", "A", "A", "B", "B", "B", "B", "B", "A", "A", "B", "A",
"B", "B", "B", "B", "B", "A", "B", "B", "A", "A", "A", "A", "A",
"A", "A"),
Set = c(15L, 1L, 10L, 21L, 5L, 9L, 12L, 15L, 16L,
19L, 22L, 3L, 12L, 22L, 15L, 25L, 10L, 25L, 12L, 3L, 10L, 8L,
8L, 20L, 20L, 19L, 25L, 15L, 6L, 21L, 9L, 5L, 24L, 9L, 20L, 5L,
2L, 2L, 11L, 9L, 16L, 10L, 21L, 4L, 1L, 8L, 5L, 11L), Loc = c(3L,
2L, 3L, 1L, 3L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 2L, 2L, 1L, 3L,
2L, 2L, 2L, 3L, 2L, 3L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 3L,
1L, 3L, 2L, 3L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L)),
.Names = c("ID", "Type", "Set", "Loc"), class = "data.frame",
row.names = c(NA, -48L))
我正在尝试将 data.frame (d$Sets
) 中的集合可视化为网络图。
sets <- unique(d$Set[duplicated(d$Set)])
rel <- vector("list", length(sets))
for (i in 1:length(sets)) {
rel[[i]] <- as.data.frame(t(combn(subset(d, d$Set ==sets[i])$ID, 2)))
}
library(data.table)
rel <- rbindlist(rel)
library(igraph)
g <- graph.data.frame(rel, directed=F, vertices=d)
V(g)$color = ifelse(V(g)$Type == "A", "red", "green")
layout <- layout.fruchterman.reingold(g, niter = 500)
plot.igraph(g, vertex.size=8,
vertex.label.cex=0.9, layout = layout)
我根据 V(g)$Type
的 d$Type
对节点进行了着色。
现在进入大结局,各种套路齐聚一堂。我想将具有一种成员的集合作为一个单独的组来绘制,这样最终就会有三组集合。
- 包含 A 类型成员的集合
- 包含 B 类型成员的集合
- 包含类型 A 和 B 成员的集合
类似这样
如何使用 igraph
包实现这种集群?
这是我解决它的方法,使用上面的代码创建 g
对象。这比乍一看更棘手,因为您想要达到 group/connectedness/cluster 级别的多色会员资格。:
## Find cluster membership:
c <- clusters(g)
d <- data.frame(membership=c$membership, color=V(g)$color, id=1:length(V(g)))
c$red_members <- aggregate(d$color=="red", by=list(d$membership), FUN=sum)[,2]
c$green_members <- aggregate(d$color=="green", by=list(d$membership), FUN=sum)[,2]
V(g)$group_has_red <- (c$red_members[ c$membership ] > 0)
V(g)$group_has_green <- (c$green_members[ c$membership ] > 0)
## Create sub-graphs containing the appropriate membership:
g_mixed <- delete.vertices(g, !(V(g)$group_has_red & V(g)$group_has_green))
g_red <- delete.vertices(g, !(V(g)$group_has_red & !(V(g)$group_has_green)))
g_green <- delete.vertices(g, !(V(g)$group_has_green & !(V(g)$group_has_red)))
par(mfrow=c(1,3))
plot(g_green, vertex.size=8, vertex.label=NA)
plot(g_mixed, vertex.size=8, vertex.label=NA)
plot(g_red, vertex.size=8, vertex.label=NA)
我有一个data.frame类似于d
如下。
d <- structure(list(ID = c("KP1009", "GP3040", "KP1757", "GP2243",
"KP682", "KP1789", "KP1933", "KP1662", "KP1718", "GP3339", "GP4007",
"GP3398", "GP6720", "KP808", "KP1154", "KP748", "GP4263", "GP1132",
"GP5881", "GP6291", "KP1004", "KP1998", "GP4123", "GP5930", "KP1070",
"KP905", "KP579", "KP1100", "KP587", "GP913", "GP4864", "KP1513",
"GP5979", "KP730", "KP1412", "KP615", "KP1315", "KP993", "GP1521",
"KP1034", "KP651", "GP2876", "GP4715", "GP5056", "GP555", "GP408",
"GP4217", "GP641"),
Type = c("B", "A", "B", "A", "B", "B", "B",
"B", "B", "A", "A", "A", "A", "B", "B", "B", "A", "A", "A", "A",
"B", "B", "A", "A", "B", "B", "B", "B", "B", "A", "A", "B", "A",
"B", "B", "B", "B", "B", "A", "B", "B", "A", "A", "A", "A", "A",
"A", "A"),
Set = c(15L, 1L, 10L, 21L, 5L, 9L, 12L, 15L, 16L,
19L, 22L, 3L, 12L, 22L, 15L, 25L, 10L, 25L, 12L, 3L, 10L, 8L,
8L, 20L, 20L, 19L, 25L, 15L, 6L, 21L, 9L, 5L, 24L, 9L, 20L, 5L,
2L, 2L, 11L, 9L, 16L, 10L, 21L, 4L, 1L, 8L, 5L, 11L), Loc = c(3L,
2L, 3L, 1L, 3L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 2L, 2L, 1L, 3L,
2L, 2L, 2L, 3L, 2L, 3L, 2L, 1L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 3L,
1L, 3L, 2L, 3L, 1L, 1L, 1L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L)),
.Names = c("ID", "Type", "Set", "Loc"), class = "data.frame",
row.names = c(NA, -48L))
我正在尝试将 data.frame (d$Sets
) 中的集合可视化为网络图。
sets <- unique(d$Set[duplicated(d$Set)])
rel <- vector("list", length(sets))
for (i in 1:length(sets)) {
rel[[i]] <- as.data.frame(t(combn(subset(d, d$Set ==sets[i])$ID, 2)))
}
library(data.table)
rel <- rbindlist(rel)
library(igraph)
g <- graph.data.frame(rel, directed=F, vertices=d)
V(g)$color = ifelse(V(g)$Type == "A", "red", "green")
layout <- layout.fruchterman.reingold(g, niter = 500)
plot.igraph(g, vertex.size=8,
vertex.label.cex=0.9, layout = layout)
我根据 V(g)$Type
的 d$Type
对节点进行了着色。
现在进入大结局,各种套路齐聚一堂。我想将具有一种成员的集合作为一个单独的组来绘制,这样最终就会有三组集合。
- 包含 A 类型成员的集合
- 包含 B 类型成员的集合
- 包含类型 A 和 B 成员的集合
类似这样
如何使用 igraph
包实现这种集群?
这是我解决它的方法,使用上面的代码创建 g
对象。这比乍一看更棘手,因为您想要达到 group/connectedness/cluster 级别的多色会员资格。:
## Find cluster membership:
c <- clusters(g)
d <- data.frame(membership=c$membership, color=V(g)$color, id=1:length(V(g)))
c$red_members <- aggregate(d$color=="red", by=list(d$membership), FUN=sum)[,2]
c$green_members <- aggregate(d$color=="green", by=list(d$membership), FUN=sum)[,2]
V(g)$group_has_red <- (c$red_members[ c$membership ] > 0)
V(g)$group_has_green <- (c$green_members[ c$membership ] > 0)
## Create sub-graphs containing the appropriate membership:
g_mixed <- delete.vertices(g, !(V(g)$group_has_red & V(g)$group_has_green))
g_red <- delete.vertices(g, !(V(g)$group_has_red & !(V(g)$group_has_green)))
g_green <- delete.vertices(g, !(V(g)$group_has_green & !(V(g)$group_has_red)))
par(mfrow=c(1,3))
plot(g_green, vertex.size=8, vertex.label=NA)
plot(g_mixed, vertex.size=8, vertex.label=NA)
plot(g_red, vertex.size=8, vertex.label=NA)