生成给定大小的所有有向图直到同构
Generate all digraphs of a given size up to isomorphism
我正在尝试生成具有最多 graph isomorphism 个给定节点数的所有有向图,以便我可以将它们提供给另一个 Python 程序。这是一个使用 NetworkX 的简单参考实现,我想加快它的速度:
from itertools import combinations, product
import networkx as nx
def generate_digraphs(n):
graphs_so_far = list()
nodes = list(range(n))
possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
for edge_mask in product([True, False], repeat=len(possible_edges)):
edges = [edge for include, edge in zip(edge_mask, possible_edges) if include]
g = nx.DiGraph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
if not any(nx.is_isomorphic(g_before, g) for g_before in graphs_so_far):
graphs_so_far.append(g)
return graphs_so_far
assert len(generate_digraphs(1)) == 1
assert len(generate_digraphs(2)) == 3
assert len(generate_digraphs(3)) == 16
此类图表的数量似乎增长得相当快,由此 OEIS sequence 给出。我正在寻找一种能够在合理的时间内生成最多 7 个节点(总共大约十亿张图)的所有图的解决方案。
将图形表示为 NetworkX 对象不是很重要;例如,用邻接表表示图形或使用不同的库对我来说很好。
98-99% 的计算时间用于同构测试,因此游戏的名称是减少必要测试的数量。
在这里,我分批创建图,以便只在一个批次内测试图的同构。
在第一个变体(下面的版本 2)中,一个批次中的所有图都具有相同数量的边。这导致 运行 时间有明显但适度的改进(对于大小为 4 的图快 2.5 倍,对于更大的图速度提高更大)。
在第二个变体(下面的版本 3)中,一个批次中的所有图形都具有相同的 out-degree 序列。这导致 运行 时间的显着改进(对于大小为 4 的图快 35 倍,对于更大的图速度提高更大)。
在第三个变体(下面的版本 4)中,一个批次中的所有图都具有相同的 out-degree 序列。此外,在一个批次中,所有图形都按 in-degree 序列排序。与版本 3 相比,这导致速度略有提高(对于大小为 4 的图形快 1.3 倍;对于大小为 5 的图形快 2.1 倍)。
#!/usr/bin/env python
"""
Efficient motif generation.
"""
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from timeit import timeit
from itertools import combinations, product, chain, combinations_with_replacement
# for profiling with kernprof/line_profiler
try:
profile
except NameError:
profile = lambda x: x
@profile
def version_1(n):
"""Original implementation by @hilberts_drinking_problem"""
graphs_so_far = list()
nodes = list(range(n))
possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
for edge_mask in product([True, False], repeat=len(possible_edges)):
edges = [edge for include, edge in zip(edge_mask, possible_edges) if include]
g = nx.DiGraph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
if not any(nx.is_isomorphic(g_before, g) for g_before in graphs_so_far):
graphs_so_far.append(g)
return graphs_so_far
@profile
def version_2(n):
"""Creates graphs in batches, where each batch contains graphs with
the same number of edges. Only graphs within a batch have to be tested
for isomorphisms."""
graphs_so_far = list()
nodes = list(range(n))
possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
for ii in range(len(possible_edges)+1):
tmp = []
for edges in combinations(possible_edges, ii):
g = nx.from_edgelist(edges, create_using=nx.DiGraph)
if not any(nx.is_isomorphic(g_before, g) for g_before in tmp):
tmp.append(g)
graphs_so_far.extend(tmp)
return graphs_so_far
@profile
def version_3(n):
"""Creates graphs in batches, where each batch contains graphs with
the same out-degree sequence. Only graphs within a batch have to be tested
for isomorphisms."""
graphs_so_far = list()
outdegree_sequences_so_far = list()
for outdegree_sequence in product(*[range(n) for _ in range(n)]):
# skip degree sequences which we have already seen as the resulting graphs will be isomorphic
if sorted(outdegree_sequence) not in outdegree_sequences_so_far:
tmp = []
for edges in generate_graphs(outdegree_sequence):
g = nx.from_edgelist(edges, create_using=nx.DiGraph)
if not any(nx.is_isomorphic(g_before, g) for g_before in tmp):
tmp.append(g)
graphs_so_far.extend(tmp)
outdegree_sequences_so_far.append(sorted(outdegree_sequence))
return graphs_so_far
def generate_graphs(outdegree_sequence):
"""Generates all directed graphs with a given out-degree sequence."""
for edges in product(*[generate_edges(node, degree, len(outdegree_sequence)) \
for node, degree in enumerate(outdegree_sequence)]):
yield(list(chain(*edges)))
def generate_edges(node, outdegree, total_nodes):
"""Generates all edges for a given node with a given out-degree and a given graph size."""
for targets in combinations(set(range(total_nodes)) - {node}, outdegree):
yield([(node, target) for target in targets])
@profile
def version_4(n):
"""Creates graphs in batches, where each batch contains graphs with
the same out-degree sequence. Within a batch, graphs are sorted
by in-degree sequence, such that only graphs with the same
in-degree sequence have to be tested for isomorphism.
"""
graphs_so_far = list()
for outdegree_sequence in combinations_with_replacement(range(n), n):
tmp = dict()
for edges in generate_graphs(outdegree_sequence):
g = nx.from_edgelist(edges, create_using=nx.DiGraph)
indegree_sequence = tuple(sorted(degree for _, degree in g.in_degree()))
if indegree_sequence in tmp:
if not any(nx.is_isomorphic(g_before, g) for g_before in tmp[indegree_sequence]):
tmp[indegree_sequence].append(g)
else:
tmp[indegree_sequence] = [g]
for graphs in tmp.values():
graphs_so_far.extend(graphs)
return graphs_so_far
if __name__ == '__main__':
order = range(1, 5)
t1 = [timeit(lambda : version_1(n), number=3) for n in order]
t2 = [timeit(lambda : version_2(n), number=3) for n in order]
t3 = [timeit(lambda : version_3(n), number=3) for n in order]
t4 = [timeit(lambda : version_4(n), number=3) for n in order]
fig, ax = plt.subplots()
for ii, t in enumerate([t1, t2, t3, t4]):
ax.plot(t, label=f"Version no. {ii+1}")
ax.set_yscale('log')
ax.set_ylabel('Execution time [s]')
ax.set_xlabel('Graph order')
ax.legend()
plt.show()
除了使用 nx.is_isomorphic
比较两个图 G1 和 G2,您还可以生成所有与 G1 同构的图并检查 G2 是否在此集合中。
起初,这听起来比较麻烦,但它允许您不仅可以检查 G2 是否与 G1 同构,还可以检查任何图是否与 G1 同构,而 nx.is_isomorphic
在比较两个图时总是从头开始。
为了方便起见,每个图都存储为边列表。
如果所有边的集合相同,则两个图相同(非同构)。
始终确保边列表是一个排序的元组,这样 ==
就可以准确地测试相等性并使边列表可散列。
import itertools
def all_digraphs(n):
possible_edges = [
(i, j) for i, j in itertools.product(range(n), repeat=2) if i != j
]
for edge_mask in itertools.product([True, False], repeat=len(possible_edges)):
# The result is already sorted
yield tuple(edge for include, edge in zip(edge_mask, possible_edges) if include)
def unique_digraphs(n):
already_seen = set()
for graph in all_digraphs(n):
if graph not in already_seen:
yield graph
already_seen |= {
tuple(sorted((perm[i], perm[j]) for i, j in graph))
for perm in itertools.permutations(range(n))
}
与之前解决方案的变体相比,这在我的机器上给出了以下计时:
这一切看起来很有希望,但是对于 6 个节点,我的 16GiB 内存已经不够用了,Python 进程被操作系统终止了。
我确定您可以将此代码与为每个 outdegree_sequence
批量生成图表结合起来,如上一个答案中所述。
这将允许在每批后清空 already_seen
并大大减少内存消耗。
我从 Brendan McKay 的论文中学到了一个有用的想法
“Isomorph-free exhaustive generation”(虽然我相信它早于
那篇论文)。
想法是我们可以将同构classes组织成一棵树,
其中空图的单例 class 是根,每个
class 具有 n > 0 个节点的图具有 parent class 具有图
有 n − 1 个节点。枚举图的同构 classes
n > 0 个节点,枚举 n − 1 图的同构 classes
节点,并且对于每个这样的class,将其代表扩展到所有
n 个节点并过滤掉实际上不是的节点的可能方法
children.
下面的 Python 代码用一个基本的但是
非平凡图同构子例程。 n = 需要几分钟
6 和(此处估计)n = 7 的大约几天。对于额外的
速度,将其移植到 C++,也许会找到更好的算法来处理
置换群(可能在 TAoCP 中,尽管大多数图没有
对称性,所以不清楚收益有多大)。
import cProfile
import collections
import itertools
import random
# Returns labels approximating the orbits of graph. Two nodes in the same orbit
# have the same label, but two nodes in different orbits don't necessarily have
# different labels.
def invariant_labels(graph, n):
labels = [1] * n
for r in range(2):
incoming = [0] * n
outgoing = [0] * n
for i, j in graph:
incoming[j] += labels[i]
outgoing[i] += labels[j]
for i in range(n):
labels[i] = hash((incoming[i], outgoing[i]))
return labels
# Returns the inverse of perm.
def inverse_permutation(perm):
n = len(perm)
inverse = [None] * n
for i in range(n):
inverse[perm[i]] = i
return inverse
# Returns the permutation that sorts by label.
def label_sorting_permutation(labels):
n = len(labels)
return inverse_permutation(sorted(range(n), key=lambda i: labels[i]))
# Returns the graph where node i becomes perm[i] .
def permuted_graph(perm, graph):
perm_graph = [(perm[i], perm[j]) for (i, j) in graph]
perm_graph.sort()
return perm_graph
# Yields each permutation generated by swaps of two consecutive nodes with the
# same label.
def label_stabilizer(labels):
n = len(labels)
factors = (
itertools.permutations(block)
for (_, block) in itertools.groupby(range(n), key=lambda i: labels[i])
)
for subperms in itertools.product(*factors):
yield [i for subperm in subperms for i in subperm]
# Returns the canonical labeled graph isomorphic to graph.
def canonical_graph(graph, n):
labels = invariant_labels(graph, n)
sorting_perm = label_sorting_permutation(labels)
graph = permuted_graph(sorting_perm, graph)
labels.sort()
return max(
(permuted_graph(perm, graph), perm[sorting_perm[n - 1]])
for perm in label_stabilizer(labels)
)
# Returns the list of permutations that stabilize graph.
def graph_stabilizer(graph, n):
return [
perm
for perm in label_stabilizer(invariant_labels(graph, n))
if permuted_graph(perm, graph) == graph
]
# Yields the subsets of range(n) .
def power_set(n):
for r in range(n + 1):
for s in itertools.combinations(range(n), r):
yield list(s)
# Returns the set where i becomes perm[i] .
def permuted_set(perm, s):
perm_s = [perm[i] for i in s]
perm_s.sort()
return perm_s
# If s is canonical, returns the list of permutations in group that stabilize s.
# Otherwise, returns None.
def set_stabilizer(s, group):
stabilizer = []
for perm in group:
perm_s = permuted_set(perm, s)
if perm_s < s:
return None
if perm_s == s:
stabilizer.append(perm)
return stabilizer
# Yields one representative of each isomorphism class.
def enumerate_graphs(n):
assert 0 <= n
if 0 == n:
yield []
return
for subgraph in enumerate_graphs(n - 1):
sub_stab = graph_stabilizer(subgraph, n - 1)
for incoming in power_set(n - 1):
in_stab = set_stabilizer(incoming, sub_stab)
if not in_stab:
continue
for outgoing in power_set(n - 1):
out_stab = set_stabilizer(outgoing, in_stab)
if not out_stab:
continue
graph, i_star = canonical_graph(
subgraph
+ [(i, n - 1) for i in incoming]
+ [(n - 1, j) for j in outgoing],
n,
)
if i_star == n - 1:
yield graph
def test():
print(sum(1 for graph in enumerate_graphs(5)))
cProfile.run("test()")
我正在尝试生成具有最多 graph isomorphism 个给定节点数的所有有向图,以便我可以将它们提供给另一个 Python 程序。这是一个使用 NetworkX 的简单参考实现,我想加快它的速度:
from itertools import combinations, product
import networkx as nx
def generate_digraphs(n):
graphs_so_far = list()
nodes = list(range(n))
possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
for edge_mask in product([True, False], repeat=len(possible_edges)):
edges = [edge for include, edge in zip(edge_mask, possible_edges) if include]
g = nx.DiGraph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
if not any(nx.is_isomorphic(g_before, g) for g_before in graphs_so_far):
graphs_so_far.append(g)
return graphs_so_far
assert len(generate_digraphs(1)) == 1
assert len(generate_digraphs(2)) == 3
assert len(generate_digraphs(3)) == 16
此类图表的数量似乎增长得相当快,由此 OEIS sequence 给出。我正在寻找一种能够在合理的时间内生成最多 7 个节点(总共大约十亿张图)的所有图的解决方案。
将图形表示为 NetworkX 对象不是很重要;例如,用邻接表表示图形或使用不同的库对我来说很好。
98-99% 的计算时间用于同构测试,因此游戏的名称是减少必要测试的数量。 在这里,我分批创建图,以便只在一个批次内测试图的同构。
在第一个变体(下面的版本 2)中,一个批次中的所有图都具有相同数量的边。这导致 运行 时间有明显但适度的改进(对于大小为 4 的图快 2.5 倍,对于更大的图速度提高更大)。
在第二个变体(下面的版本 3)中,一个批次中的所有图形都具有相同的 out-degree 序列。这导致 运行 时间的显着改进(对于大小为 4 的图快 35 倍,对于更大的图速度提高更大)。
在第三个变体(下面的版本 4)中,一个批次中的所有图都具有相同的 out-degree 序列。此外,在一个批次中,所有图形都按 in-degree 序列排序。与版本 3 相比,这导致速度略有提高(对于大小为 4 的图形快 1.3 倍;对于大小为 5 的图形快 2.1 倍)。
#!/usr/bin/env python
"""
Efficient motif generation.
"""
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from timeit import timeit
from itertools import combinations, product, chain, combinations_with_replacement
# for profiling with kernprof/line_profiler
try:
profile
except NameError:
profile = lambda x: x
@profile
def version_1(n):
"""Original implementation by @hilberts_drinking_problem"""
graphs_so_far = list()
nodes = list(range(n))
possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
for edge_mask in product([True, False], repeat=len(possible_edges)):
edges = [edge for include, edge in zip(edge_mask, possible_edges) if include]
g = nx.DiGraph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
if not any(nx.is_isomorphic(g_before, g) for g_before in graphs_so_far):
graphs_so_far.append(g)
return graphs_so_far
@profile
def version_2(n):
"""Creates graphs in batches, where each batch contains graphs with
the same number of edges. Only graphs within a batch have to be tested
for isomorphisms."""
graphs_so_far = list()
nodes = list(range(n))
possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
for ii in range(len(possible_edges)+1):
tmp = []
for edges in combinations(possible_edges, ii):
g = nx.from_edgelist(edges, create_using=nx.DiGraph)
if not any(nx.is_isomorphic(g_before, g) for g_before in tmp):
tmp.append(g)
graphs_so_far.extend(tmp)
return graphs_so_far
@profile
def version_3(n):
"""Creates graphs in batches, where each batch contains graphs with
the same out-degree sequence. Only graphs within a batch have to be tested
for isomorphisms."""
graphs_so_far = list()
outdegree_sequences_so_far = list()
for outdegree_sequence in product(*[range(n) for _ in range(n)]):
# skip degree sequences which we have already seen as the resulting graphs will be isomorphic
if sorted(outdegree_sequence) not in outdegree_sequences_so_far:
tmp = []
for edges in generate_graphs(outdegree_sequence):
g = nx.from_edgelist(edges, create_using=nx.DiGraph)
if not any(nx.is_isomorphic(g_before, g) for g_before in tmp):
tmp.append(g)
graphs_so_far.extend(tmp)
outdegree_sequences_so_far.append(sorted(outdegree_sequence))
return graphs_so_far
def generate_graphs(outdegree_sequence):
"""Generates all directed graphs with a given out-degree sequence."""
for edges in product(*[generate_edges(node, degree, len(outdegree_sequence)) \
for node, degree in enumerate(outdegree_sequence)]):
yield(list(chain(*edges)))
def generate_edges(node, outdegree, total_nodes):
"""Generates all edges for a given node with a given out-degree and a given graph size."""
for targets in combinations(set(range(total_nodes)) - {node}, outdegree):
yield([(node, target) for target in targets])
@profile
def version_4(n):
"""Creates graphs in batches, where each batch contains graphs with
the same out-degree sequence. Within a batch, graphs are sorted
by in-degree sequence, such that only graphs with the same
in-degree sequence have to be tested for isomorphism.
"""
graphs_so_far = list()
for outdegree_sequence in combinations_with_replacement(range(n), n):
tmp = dict()
for edges in generate_graphs(outdegree_sequence):
g = nx.from_edgelist(edges, create_using=nx.DiGraph)
indegree_sequence = tuple(sorted(degree for _, degree in g.in_degree()))
if indegree_sequence in tmp:
if not any(nx.is_isomorphic(g_before, g) for g_before in tmp[indegree_sequence]):
tmp[indegree_sequence].append(g)
else:
tmp[indegree_sequence] = [g]
for graphs in tmp.values():
graphs_so_far.extend(graphs)
return graphs_so_far
if __name__ == '__main__':
order = range(1, 5)
t1 = [timeit(lambda : version_1(n), number=3) for n in order]
t2 = [timeit(lambda : version_2(n), number=3) for n in order]
t3 = [timeit(lambda : version_3(n), number=3) for n in order]
t4 = [timeit(lambda : version_4(n), number=3) for n in order]
fig, ax = plt.subplots()
for ii, t in enumerate([t1, t2, t3, t4]):
ax.plot(t, label=f"Version no. {ii+1}")
ax.set_yscale('log')
ax.set_ylabel('Execution time [s]')
ax.set_xlabel('Graph order')
ax.legend()
plt.show()
除了使用 nx.is_isomorphic
比较两个图 G1 和 G2,您还可以生成所有与 G1 同构的图并检查 G2 是否在此集合中。
起初,这听起来比较麻烦,但它允许您不仅可以检查 G2 是否与 G1 同构,还可以检查任何图是否与 G1 同构,而 nx.is_isomorphic
在比较两个图时总是从头开始。
为了方便起见,每个图都存储为边列表。
如果所有边的集合相同,则两个图相同(非同构)。
始终确保边列表是一个排序的元组,这样 ==
就可以准确地测试相等性并使边列表可散列。
import itertools
def all_digraphs(n):
possible_edges = [
(i, j) for i, j in itertools.product(range(n), repeat=2) if i != j
]
for edge_mask in itertools.product([True, False], repeat=len(possible_edges)):
# The result is already sorted
yield tuple(edge for include, edge in zip(edge_mask, possible_edges) if include)
def unique_digraphs(n):
already_seen = set()
for graph in all_digraphs(n):
if graph not in already_seen:
yield graph
already_seen |= {
tuple(sorted((perm[i], perm[j]) for i, j in graph))
for perm in itertools.permutations(range(n))
}
与之前解决方案的变体相比,这在我的机器上给出了以下计时:
这一切看起来很有希望,但是对于 6 个节点,我的 16GiB 内存已经不够用了,Python 进程被操作系统终止了。
我确定您可以将此代码与为每个 outdegree_sequence
批量生成图表结合起来,如上一个答案中所述。
这将允许在每批后清空 already_seen
并大大减少内存消耗。
我从 Brendan McKay 的论文中学到了一个有用的想法 “Isomorph-free exhaustive generation”(虽然我相信它早于 那篇论文)。
想法是我们可以将同构classes组织成一棵树, 其中空图的单例 class 是根,每个 class 具有 n > 0 个节点的图具有 parent class 具有图 有 n − 1 个节点。枚举图的同构 classes n > 0 个节点,枚举 n − 1 图的同构 classes 节点,并且对于每个这样的class,将其代表扩展到所有 n 个节点并过滤掉实际上不是的节点的可能方法 children.
下面的 Python 代码用一个基本的但是 非平凡图同构子例程。 n = 需要几分钟 6 和(此处估计)n = 7 的大约几天。对于额外的 速度,将其移植到 C++,也许会找到更好的算法来处理 置换群(可能在 TAoCP 中,尽管大多数图没有 对称性,所以不清楚收益有多大)。
import cProfile
import collections
import itertools
import random
# Returns labels approximating the orbits of graph. Two nodes in the same orbit
# have the same label, but two nodes in different orbits don't necessarily have
# different labels.
def invariant_labels(graph, n):
labels = [1] * n
for r in range(2):
incoming = [0] * n
outgoing = [0] * n
for i, j in graph:
incoming[j] += labels[i]
outgoing[i] += labels[j]
for i in range(n):
labels[i] = hash((incoming[i], outgoing[i]))
return labels
# Returns the inverse of perm.
def inverse_permutation(perm):
n = len(perm)
inverse = [None] * n
for i in range(n):
inverse[perm[i]] = i
return inverse
# Returns the permutation that sorts by label.
def label_sorting_permutation(labels):
n = len(labels)
return inverse_permutation(sorted(range(n), key=lambda i: labels[i]))
# Returns the graph where node i becomes perm[i] .
def permuted_graph(perm, graph):
perm_graph = [(perm[i], perm[j]) for (i, j) in graph]
perm_graph.sort()
return perm_graph
# Yields each permutation generated by swaps of two consecutive nodes with the
# same label.
def label_stabilizer(labels):
n = len(labels)
factors = (
itertools.permutations(block)
for (_, block) in itertools.groupby(range(n), key=lambda i: labels[i])
)
for subperms in itertools.product(*factors):
yield [i for subperm in subperms for i in subperm]
# Returns the canonical labeled graph isomorphic to graph.
def canonical_graph(graph, n):
labels = invariant_labels(graph, n)
sorting_perm = label_sorting_permutation(labels)
graph = permuted_graph(sorting_perm, graph)
labels.sort()
return max(
(permuted_graph(perm, graph), perm[sorting_perm[n - 1]])
for perm in label_stabilizer(labels)
)
# Returns the list of permutations that stabilize graph.
def graph_stabilizer(graph, n):
return [
perm
for perm in label_stabilizer(invariant_labels(graph, n))
if permuted_graph(perm, graph) == graph
]
# Yields the subsets of range(n) .
def power_set(n):
for r in range(n + 1):
for s in itertools.combinations(range(n), r):
yield list(s)
# Returns the set where i becomes perm[i] .
def permuted_set(perm, s):
perm_s = [perm[i] for i in s]
perm_s.sort()
return perm_s
# If s is canonical, returns the list of permutations in group that stabilize s.
# Otherwise, returns None.
def set_stabilizer(s, group):
stabilizer = []
for perm in group:
perm_s = permuted_set(perm, s)
if perm_s < s:
return None
if perm_s == s:
stabilizer.append(perm)
return stabilizer
# Yields one representative of each isomorphism class.
def enumerate_graphs(n):
assert 0 <= n
if 0 == n:
yield []
return
for subgraph in enumerate_graphs(n - 1):
sub_stab = graph_stabilizer(subgraph, n - 1)
for incoming in power_set(n - 1):
in_stab = set_stabilizer(incoming, sub_stab)
if not in_stab:
continue
for outgoing in power_set(n - 1):
out_stab = set_stabilizer(outgoing, in_stab)
if not out_stab:
continue
graph, i_star = canonical_graph(
subgraph
+ [(i, n - 1) for i in incoming]
+ [(n - 1, j) for j in outgoing],
n,
)
if i_star == n - 1:
yield graph
def test():
print(sum(1 for graph in enumerate_graphs(5)))
cProfile.run("test()")