使用 presence/absence 数据 R 的共现网络
Co-occurence networks using presence/absence data R
我正在尝试为我的 presence/absence 细菌物种数据制作一个共现网络图,但我不确定如何处理它。我希望最终得到这样的结果 enter image description herewhere each species is linked to another species if they are both present in the same patient, with a larger circle for higher frequency species. I originally tried using widyr and tidygraph packages but I'm not sure if my data set is compatible with them enter image description here,因为它将患者作为列,将个体物种作为行。最好我想知道 packages/code 我可以使用什么来处理我的数据集,或者我如何更改我的数据集以使用这些包。
您可以使用矩阵叉积来获得共生矩阵。然后用igraph
包把邻接矩阵转成图就简单了。试试这个:
library(igraph)
# Create fake data set
# rows = patients
# cols = species
set.seed(2222)
df <- matrix(sample(c(TRUE, FALSE), 50, replace = TRUE), 5)
colnames(df) <- letters[1:10]
# Generate co-occurrence matrix with crossproduct
co_mat <- t(df) %*% df
# Set diagonal values to 0
diag(co_mat) <- 0
# Assign dim names
dimnames(co_mat) <- list(colnames(df), colnames(df))
# Create graph from adjacency matrix
# ! edge weights are equal to frequency of co-occurrence
g <- graph_from_adjacency_matrix(co_mat, mode = "upper", weighted = TRUE)
# Assign nodes weight equal to species frequency
g <- set.vertex.attribute(g, "v_weight", value = colSums(df))
plot(g, vertex.size = V(g)$v_weight * 5 + 5, edge.width = E(g)$weight * 5)
这是我们的假数据
a b c d e f g h i j
[1,] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
[2,] TRUE FALSE FALSE FALSE TRUE TRUE TRUE FALSE TRUE FALSE
[3,] FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE
[4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
[5,] FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE
结果如下:
像 Istrel 一样,我也会推荐 igraph。可能是 ggplot 的第二个解决方案..
library(ggnetwork)
library(ggplot2)
library(igraph)
#sample data:
set.seed(1)
mat <- matrix(rbinom(50 * 5, 1, 0.1), ncol = 15, nrow = 100)
# This is not necessary for the example data. But in your case, if you want species as nodes you have to do a transpose:
#mat <- t(mat)
#### Optional! But usually there are often "empty cases" which you might want to remove:
# remove 0-sum-columns
mat <- mat[,apply(mat, 2, function(x) !all(x==0))]
# remove 0-sum-rows
mat <- mat[apply(mat, 1, function(x) !all(x==0)),]
# transform in term-term adjacency matrix
mat.t <- mat %*% t(mat)
##### calculate graph
g <- igraph::graph.adjacency(mat.t,mode="undirected",weighted=T,diag=FALSE)
# calculate coordinates (see https://igraph.org/r/doc/layout_.html for different layouts)
layout <- as.matrix(layout_with_lgl(g))
p<-ggplot(g, layout = layout, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges( color = "grey20", alpha = 0.2, size = 2) + # add e.g. curvature = 0.15 for curved edges
geom_nodes(size = (centralization.degree(g)$res +3) , color="darkolivegreen4", alpha = 1) +
geom_nodes(size = centralization.degree(g)$res , color="darkolivegreen2", alpha = 1) +
geom_nodetext(aes(label = vertex.names), size= 5) +
theme_blank()
p
enter image description here
使用 ggplot 美学:
# calculate degree:
V(g)$Degree <- centralization.degree(g)$res
p<-ggplot(g, layout = layout, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges( color = "grey20", alpha = 0.2, size = 2) + # add e.g. curvature = 0.15 for curved edges
geom_nodes(aes(size = Degree) , color="darkolivegreen2", alpha = 1) +
scale_size_continuous(range = c(5, 16)) +
geom_nodetext(aes(label = vertex.names), size= 5) +
theme_blank()
p
我正在尝试为我的 presence/absence 细菌物种数据制作一个共现网络图,但我不确定如何处理它。我希望最终得到这样的结果 enter image description herewhere each species is linked to another species if they are both present in the same patient, with a larger circle for higher frequency species. I originally tried using widyr and tidygraph packages but I'm not sure if my data set is compatible with them enter image description here,因为它将患者作为列,将个体物种作为行。最好我想知道 packages/code 我可以使用什么来处理我的数据集,或者我如何更改我的数据集以使用这些包。
您可以使用矩阵叉积来获得共生矩阵。然后用igraph
包把邻接矩阵转成图就简单了。试试这个:
library(igraph)
# Create fake data set
# rows = patients
# cols = species
set.seed(2222)
df <- matrix(sample(c(TRUE, FALSE), 50, replace = TRUE), 5)
colnames(df) <- letters[1:10]
# Generate co-occurrence matrix with crossproduct
co_mat <- t(df) %*% df
# Set diagonal values to 0
diag(co_mat) <- 0
# Assign dim names
dimnames(co_mat) <- list(colnames(df), colnames(df))
# Create graph from adjacency matrix
# ! edge weights are equal to frequency of co-occurrence
g <- graph_from_adjacency_matrix(co_mat, mode = "upper", weighted = TRUE)
# Assign nodes weight equal to species frequency
g <- set.vertex.attribute(g, "v_weight", value = colSums(df))
plot(g, vertex.size = V(g)$v_weight * 5 + 5, edge.width = E(g)$weight * 5)
这是我们的假数据
a b c d e f g h i j
[1,] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE
[2,] TRUE FALSE FALSE FALSE TRUE TRUE TRUE FALSE TRUE FALSE
[3,] FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE
[4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
[5,] FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE FALSE
结果如下:
像 Istrel 一样,我也会推荐 igraph。可能是 ggplot 的第二个解决方案..
library(ggnetwork)
library(ggplot2)
library(igraph)
#sample data:
set.seed(1)
mat <- matrix(rbinom(50 * 5, 1, 0.1), ncol = 15, nrow = 100)
# This is not necessary for the example data. But in your case, if you want species as nodes you have to do a transpose:
#mat <- t(mat)
#### Optional! But usually there are often "empty cases" which you might want to remove:
# remove 0-sum-columns
mat <- mat[,apply(mat, 2, function(x) !all(x==0))]
# remove 0-sum-rows
mat <- mat[apply(mat, 1, function(x) !all(x==0)),]
# transform in term-term adjacency matrix
mat.t <- mat %*% t(mat)
##### calculate graph
g <- igraph::graph.adjacency(mat.t,mode="undirected",weighted=T,diag=FALSE)
# calculate coordinates (see https://igraph.org/r/doc/layout_.html for different layouts)
layout <- as.matrix(layout_with_lgl(g))
p<-ggplot(g, layout = layout, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges( color = "grey20", alpha = 0.2, size = 2) + # add e.g. curvature = 0.15 for curved edges
geom_nodes(size = (centralization.degree(g)$res +3) , color="darkolivegreen4", alpha = 1) +
geom_nodes(size = centralization.degree(g)$res , color="darkolivegreen2", alpha = 1) +
geom_nodetext(aes(label = vertex.names), size= 5) +
theme_blank()
p
enter image description here
使用 ggplot 美学:
# calculate degree:
V(g)$Degree <- centralization.degree(g)$res
p<-ggplot(g, layout = layout, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges( color = "grey20", alpha = 0.2, size = 2) + # add e.g. curvature = 0.15 for curved edges
geom_nodes(aes(size = Degree) , color="darkolivegreen2", alpha = 1) +
scale_size_continuous(range = c(5, 16)) +
geom_nodetext(aes(label = vertex.names), size= 5) +
theme_blank()
p