通过迭代删除高度节点来计算图中的三角形
Counting triangles in a graph by iteratively removing high-degree nodes
在具有大约 15 万个节点和 200 万条边的无向图上计算 nx.triangles(G)
目前非常慢(80 小时的规模)。如果节点度分布严重偏斜,使用以下过程计算三角形是否有问题?
import networkx as nx
def largest_degree_node(G):
# this was improved using suggestion by Stef in the comments
return max(G.degree(), key=lambda x: x[1])[0]
def count_triangles(G):
G=G.copy()
triangle_counts = 0
while len(G.nodes()):
focal_node = largest_degree_node(G)
triangle_counts += nx.triangles(G, nodes=[focal_node])[focal_node]
G.remove_node(focal_node)
return triangle_counts
G = nx.erdos_renyi_graph(1000, 0.1)
# compute triangles with nx
triangles_nx = int(sum(v for k, v in nx.triangles(G).items()) / 3)
# compute triangles iteratively
triangles_iterative = count_triangles(G)
# assertion passes
assert int(triangles_nx) == int(triangles_iterative)
断言通过了,但我担心在某些边缘情况下这种迭代方法不起作用。
假设图是非定向的(即G.is_directed() == False
),可以通过找到两个都是的节点来有效地找到三角形的数量邻居的邻居和同一节点的直接邻居。预先计算和预先过滤节点的邻居,使每个三角形只计算一次,这有助于大大缩短执行时间。这是代码:
nodeNeighbours = {
# The filtering of the set ensure each triangle is only computed once
node: set(n for n in edgeInfos.keys() if n > node)
for node, edgeInfos in G.adjacency()
}
triangleCount = sum(
len(neighbours & nodeNeighbours[node2])
for node1, neighbours in nodeNeighbours.items()
for node2 in neighbours
)
以上代码比示例图上的原始迭代解决方案快 12 倍。 72 倍 nx.erdos_renyi_graph(15000, 0.005)
。
一个更快的解决方案在于使用Numba并使用多线程执行计算。不幸的是,这种方法复杂得多(因此这个单独的答案)。
思路是先将图转换为Numpy数组,然后平衡线程之间的工作,最后应用备选答案中提供的算法(使用多线程和Numpy数组)。这些图使用四个 Numpy 数组进行编码:
nodes
:包含所有节点ID;
allNeighbours
:包含所有节点的所有邻居;
neighbourSlices
:包含每个节点的数组 allNeighbours
的切片(开始+结束索引)(以便能够获取节点的邻居);
nodeIdToPos
: 根据节点的ID在nodes
中包含节点的索引。
代码如下:
# Split the nodes so the work (based on the slices) is balanced.
# Returns the (start,stop) slices of the nodes
@nb.njit('(int_[:,::1], int_)')
def splitSlices(neighbourSlices, count):
n = np.int64(neighbourSlices.shape[0])
m = neighbourSlices[-1, 1] if neighbourSlices.size > 0 else 0
workSize = np.empty((count, 2), dtype=np.int_)
curPos = 0
for i in range(count):
for j in range(curPos, n):
stop = neighbourSlices[j, 1]
if stop >= m * (i + 1) // count:
workSize[i, 0] = curPos
curPos = j + 1
workSize[i, 1] = curPos
break
if count > 0:
workSize[0, 0] = 0
workSize[-1, 1] = n
return workSize
@nb.njit('(int_[::1], int_[:,::1], int_[::1])', parallel=True)
def filterNeighbours(nodes, neighbourSlices, allNeighbours):
outNeighbourSlices = np.empty(neighbourSlices.shape, dtype=np.int_)
outAllNeighbours = np.empty(allNeighbours.size//2, dtype=np.int_)
curPos = 0
for i in range(nodes.size):
curNode = nodes[i]
start, stop = neighbourSlices[i]
outNeighbourSlices[i, 0] = curPos
for neighbour in allNeighbours[start:stop]:
if neighbour > curNode:
outAllNeighbours[curPos] = neighbour
curPos += 1
outNeighbourSlices[i, 1] = curPos
threadCount = nb.np.ufunc.parallel.get_num_threads()
nodeSlides = splitSlices(outNeighbourSlices, threadCount)
for threadId in nb.prange(threadCount):
start, stop = nodeSlides[threadId]
for i in range(start, stop):
start, stop = outNeighbourSlices[i]
outAllNeighbours[start:stop].sort()
return (outNeighbourSlices, outAllNeighbours)
@nb.njit('int_(int_[::1], int_[:,::1], int_[::1])', parallel=True)
def computeTriangle(nodes, neighbourSlices, allNeighbours):
nodeIdToPos = np.empty(np.max(nodes)+1, dtype=np.int_)
for i, node in enumerate(nodes):
nodeIdToPos[node] = i
neighbourSlices, allNeighbours = filterNeighbours(nodes, neighbourSlices, allNeighbours)
threadCount = nb.np.ufunc.parallel.get_num_threads()
nodeSlides = splitSlices(neighbourSlices, threadCount)
s = 0
for threadId in nb.prange(threadCount):
start, stop = nodeSlides[threadId]
for i in range(start, stop):
node1 = nodes[i]
start1, stop2 = neighbourSlices[i]
neighbours1 = allNeighbours[start1:stop2]
for node2 in neighbours1:
start2, stop2 = neighbourSlices[nodeIdToPos[node2]]
neighbours2 = allNeighbours[start2:stop2]
for node3 in neighbours2:
if node3 > node2:
foundId = np.searchsorted(neighbours1, node3)
s += foundId < neighbours1.size and neighbours1[foundId] == node3
return s
# Conversion to Numpy arrays
edgeCount = sum(len(neighbours) for _, neighbours in G.adjacency())
nodes = np.fromiter(G.nodes(), dtype=np.int_)
neighbourSlices = np.empty((len(G), 2), dtype=np.int_)
allNeighbours = np.empty(edgeCount, dtype=np.int_)
curPos = 0
for i, nodeInfos in enumerate(G.adjacency()):
node, edgeInfos = nodeInfos
neighbours = np.fromiter(edgeInfos.keys(), dtype=np.int_)
neighbourSlices[i, 0] = curPos
neighbourSlices[i, 1] = curPos + neighbours.size
allNeighbours[curPos:curPos+neighbours.size] = neighbours
curPos += neighbours.size
# Actual computation (very fast)
triangleCount = computeTriangle(nodes, neighbourSlices, allNeighbours)
在我的 6 核机器上,此解决方案比示例图上的原始迭代解决方案快 45 倍,快 290 倍 在 nx.erdos_renyi_graph(15000, 0.005)
上。当平均度数(N_edges/N_nodes
)很大时,它比其他方法快得多。
不幸的是,大部分时间花在了将图形(顺序)转换为 Numpy 数组。因此,如果您想要更快的方法(例如快 2~3 倍),那么您当然需要使用本地语言实现(如 C 或 C++)。或者主要使用 Numpy 数组而不是 NetworkX 图,但这种方法显然不够灵活且归档复杂。
在具有大约 15 万个节点和 200 万条边的无向图上计算 nx.triangles(G)
目前非常慢(80 小时的规模)。如果节点度分布严重偏斜,使用以下过程计算三角形是否有问题?
import networkx as nx
def largest_degree_node(G):
# this was improved using suggestion by Stef in the comments
return max(G.degree(), key=lambda x: x[1])[0]
def count_triangles(G):
G=G.copy()
triangle_counts = 0
while len(G.nodes()):
focal_node = largest_degree_node(G)
triangle_counts += nx.triangles(G, nodes=[focal_node])[focal_node]
G.remove_node(focal_node)
return triangle_counts
G = nx.erdos_renyi_graph(1000, 0.1)
# compute triangles with nx
triangles_nx = int(sum(v for k, v in nx.triangles(G).items()) / 3)
# compute triangles iteratively
triangles_iterative = count_triangles(G)
# assertion passes
assert int(triangles_nx) == int(triangles_iterative)
断言通过了,但我担心在某些边缘情况下这种迭代方法不起作用。
假设图是非定向的(即G.is_directed() == False
),可以通过找到两个都是的节点来有效地找到三角形的数量邻居的邻居和同一节点的直接邻居。预先计算和预先过滤节点的邻居,使每个三角形只计算一次,这有助于大大缩短执行时间。这是代码:
nodeNeighbours = {
# The filtering of the set ensure each triangle is only computed once
node: set(n for n in edgeInfos.keys() if n > node)
for node, edgeInfos in G.adjacency()
}
triangleCount = sum(
len(neighbours & nodeNeighbours[node2])
for node1, neighbours in nodeNeighbours.items()
for node2 in neighbours
)
以上代码比示例图上的原始迭代解决方案快 12 倍。 72 倍 nx.erdos_renyi_graph(15000, 0.005)
。
一个更快的解决方案在于使用Numba并使用多线程执行计算。不幸的是,这种方法复杂得多(因此这个单独的答案)。
思路是先将图转换为Numpy数组,然后平衡线程之间的工作,最后应用备选答案中提供的算法(使用多线程和Numpy数组)。这些图使用四个 Numpy 数组进行编码:
nodes
:包含所有节点ID;allNeighbours
:包含所有节点的所有邻居;neighbourSlices
:包含每个节点的数组allNeighbours
的切片(开始+结束索引)(以便能够获取节点的邻居);nodeIdToPos
: 根据节点的ID在nodes
中包含节点的索引。
代码如下:
# Split the nodes so the work (based on the slices) is balanced.
# Returns the (start,stop) slices of the nodes
@nb.njit('(int_[:,::1], int_)')
def splitSlices(neighbourSlices, count):
n = np.int64(neighbourSlices.shape[0])
m = neighbourSlices[-1, 1] if neighbourSlices.size > 0 else 0
workSize = np.empty((count, 2), dtype=np.int_)
curPos = 0
for i in range(count):
for j in range(curPos, n):
stop = neighbourSlices[j, 1]
if stop >= m * (i + 1) // count:
workSize[i, 0] = curPos
curPos = j + 1
workSize[i, 1] = curPos
break
if count > 0:
workSize[0, 0] = 0
workSize[-1, 1] = n
return workSize
@nb.njit('(int_[::1], int_[:,::1], int_[::1])', parallel=True)
def filterNeighbours(nodes, neighbourSlices, allNeighbours):
outNeighbourSlices = np.empty(neighbourSlices.shape, dtype=np.int_)
outAllNeighbours = np.empty(allNeighbours.size//2, dtype=np.int_)
curPos = 0
for i in range(nodes.size):
curNode = nodes[i]
start, stop = neighbourSlices[i]
outNeighbourSlices[i, 0] = curPos
for neighbour in allNeighbours[start:stop]:
if neighbour > curNode:
outAllNeighbours[curPos] = neighbour
curPos += 1
outNeighbourSlices[i, 1] = curPos
threadCount = nb.np.ufunc.parallel.get_num_threads()
nodeSlides = splitSlices(outNeighbourSlices, threadCount)
for threadId in nb.prange(threadCount):
start, stop = nodeSlides[threadId]
for i in range(start, stop):
start, stop = outNeighbourSlices[i]
outAllNeighbours[start:stop].sort()
return (outNeighbourSlices, outAllNeighbours)
@nb.njit('int_(int_[::1], int_[:,::1], int_[::1])', parallel=True)
def computeTriangle(nodes, neighbourSlices, allNeighbours):
nodeIdToPos = np.empty(np.max(nodes)+1, dtype=np.int_)
for i, node in enumerate(nodes):
nodeIdToPos[node] = i
neighbourSlices, allNeighbours = filterNeighbours(nodes, neighbourSlices, allNeighbours)
threadCount = nb.np.ufunc.parallel.get_num_threads()
nodeSlides = splitSlices(neighbourSlices, threadCount)
s = 0
for threadId in nb.prange(threadCount):
start, stop = nodeSlides[threadId]
for i in range(start, stop):
node1 = nodes[i]
start1, stop2 = neighbourSlices[i]
neighbours1 = allNeighbours[start1:stop2]
for node2 in neighbours1:
start2, stop2 = neighbourSlices[nodeIdToPos[node2]]
neighbours2 = allNeighbours[start2:stop2]
for node3 in neighbours2:
if node3 > node2:
foundId = np.searchsorted(neighbours1, node3)
s += foundId < neighbours1.size and neighbours1[foundId] == node3
return s
# Conversion to Numpy arrays
edgeCount = sum(len(neighbours) for _, neighbours in G.adjacency())
nodes = np.fromiter(G.nodes(), dtype=np.int_)
neighbourSlices = np.empty((len(G), 2), dtype=np.int_)
allNeighbours = np.empty(edgeCount, dtype=np.int_)
curPos = 0
for i, nodeInfos in enumerate(G.adjacency()):
node, edgeInfos = nodeInfos
neighbours = np.fromiter(edgeInfos.keys(), dtype=np.int_)
neighbourSlices[i, 0] = curPos
neighbourSlices[i, 1] = curPos + neighbours.size
allNeighbours[curPos:curPos+neighbours.size] = neighbours
curPos += neighbours.size
# Actual computation (very fast)
triangleCount = computeTriangle(nodes, neighbourSlices, allNeighbours)
在我的 6 核机器上,此解决方案比示例图上的原始迭代解决方案快 45 倍,快 290 倍 在 nx.erdos_renyi_graph(15000, 0.005)
上。当平均度数(N_edges/N_nodes
)很大时,它比其他方法快得多。
不幸的是,大部分时间花在了将图形(顺序)转换为 Numpy 数组。因此,如果您想要更快的方法(例如快 2~3 倍),那么您当然需要使用本地语言实现(如 C 或 C++)。或者主要使用 Numpy 数组而不是 NetworkX 图,但这种方法显然不够灵活且归档复杂。