R igraph:查找 DAG 中某些边之间层次结构的算法

R igraph: algoritm to find the hierachy between certain edges in a DAG

我有一个表示河流网络的有向图 (DAG)。在一些河流(边缘)有流量测量站。我想根据最上游河流段的层次结构对这些站点进行排名。最上游的站将有一个 class 1。上游只有 1 个站等级的站将有一个 class 2,上游有 2 个站等级的站将有一个 class 3,依此类推. igraph 中是否有算法可以做到这一点?我在文档中搜索了“等级”、“层次结构”、“顺序”等术语,但没有找到与我想要执行的类似的内容。

我还使用了距最下游边缘(河网出口)的“距离”进行 class化,但它没有考虑站点之间的关系(距离非常不同的边缘可以根据河网配置具有相同的等级)...

有没有关于图形算法的建议?

这是class化的说明:

这是我用来测试的数据(与图片无关):

library(igraph)

vertices_df <- data.frame(
  id = c(418,410,402,394,386,427,378,370,362,354,346,338,330,322,314,306,298,290,282,274,595,266,419,395,258,250,242,234,226,218,210,202,194,186,178,170,162,146,138,130,122,114,106,98,90,82,74,66,58,50,42,34,26,18,10,2,587,579,571,563,555,547,539,531,523,515,28,12,154,507,499,491,483,475,467,459,451,443,435,411,403,387,379,371,363,355,347,339,331,323,315,307,299,291,283,275,267,259,251,243,235,227,219,211,203,195,187,179,171,163,155,147,139,131,123,115,107,99,91,83,75,67,59,51,43,35,27,19,11,3,20,4),
  station = c(1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0)
)

edges_df <- data.frame(
  from = c(410,402,394,378,427,403,370,362,354,346,338,330,322,314,306,298,290,282,274,595,587,395,107,3,250,218,226,26,58,66,146,34,18,98,106,74,130,507,571,563,443,547,539,531,523,515,28,20,499,491,483,475,467,459,451,435,411,355,387,379,347,371,331,307,195,291,115,147,171,43,99,123,227,259,27,2,234,10,386,419,194,202,186,122,170,162,258,178,266,242,114,210,154,11,75,90,42,82,50,138,579,67,323,555,179,211,243,219,267,12,4,35,315,363,19,299,51,187,91,155,275,163,339,203,283,131,139,59,83,235,251),
  to = c(418,410,402,394,386,427,378,370,362,354,346,338,330,322,314,306,298,290,282,274,595,266,419,395,258,250,242,234,226,218,210,202,194,186,178,170,162,587,579,571,563,555,547,539,531,523,515,28,507,499,491,483,475,467,459,451,443,435,411,403,387,379,371,363,355,347,339,331,323,315,307,299,291,283,275,418,410,402,394,386,370,362,354,346,338,330,322,314,306,298,282,274,595,419,395,250,234,226,218,210,587,579,571,563,555,547,539,531,523,515,28,507,491,483,467,459,451,443,435,411,403,387,379,355,347,331,323,307,299,291,283)
)

net <- graph.data.frame(
  edges_df[, c("from", "to")],
  directed=TRUE,
  vertices=vertices_df
)

所以我知道边 418、394、386、499、355、331、35 有一个站点,我想用属性 1、2、3、4 对它们进行排名...

在 igraph 中尝试了“距离”和“topo_sort”算法后,我最后一次尝试是使用 dfs:

res <- dfs(
  net,
  root = "418",
  neimode = "in",
  unreachable = TRUE,
  order = TRUE,
  order.out = TRUE,
  father = TRUE,
  dist = TRUE
)

res$dist[names(res$dist) %in% as.character(vertices_df[vertices_df$station == 1, "id"])]

所以我知道从最下游边开始的边顺序,但是我丢失了上游边之间的关系信息(不同距离的边可以具有相同的等级)。我不确定它会把我引向何方,所以我想知道是否有另一种图形算法更适合这个目的...

我会优先使用 R,因为工作流程的其他部分是使用该语言...

您可以试试下面的代码

# subset vertices of interest, i.e., `station == 1`
v <- as.character(with(vertices_df, id[station == 1]))

# find the sink node among `v`
snk <- v[which(colSums(distances(net, v, v, "out") == Inf) == 0)]

# find the paths to the sink node from all other nodes and assign ranks along the path
lst <- lapply(
  v[!v %in% snk],
  function(p) {
    s <- intersect(names(shortest_paths(net, p, snk)$vpath[[1]]), v)
    data.frame(id = s, vrank = seq_along(s))
  }
)

# aggreagte all info and take the max rank for each id
dfout <- aggregate(. ~ id, do.call(rbind, lst), max)

你会看到

> dfout
   id vrank
1 331     1
2  35     1
3 355     1
4 386     2
5 394     3
6 418     4
7 499     2