优化深度优先搜索 python
optimizing depth first search python
我有 parent 和 child 行的级别。所以我使用下面的代码来识别 parents ,child 和它们的级别如下:
instanceID parentInstanceID dependencyInstanceID isCycle level
0 44909 103565 103565 False 0
1 45461 103565 103565 False 0
2 45465 103565 103565 False 0
def traverse_graph(graph):
n_stack = Stack([n for n in graph.root_parents])
p_stack = Stack()
track_of_node = {}
cycles = {}
print("Beginning of whileloop", datetime.now())
while n_stack.stack:
if n_stack.last not in graph.visited_nodes:
if n_stack.last != p_stack.last:
if graph.nodes_dict[n_stack.last]:
p_stack.add(n_stack.last)
if p_stack.last not in track_of_node.keys():
track_of_node[p_stack.last] = 1
else:
track_of_node[p_stack.last] += 1
if track_of_node[p_stack.last] >= 3 and check_cycle(p_stack):
if p_stack.stack[-2] not in cycles.keys():
cycles[p_stack.stack[-2]] = [p_stack.last]
else:
cycles[p_stack.stack[-2]].append(p_stack.last)
n_stack.pop()
e = p_stack.pop()
track_of_node[e] -= 1
else:
discard = []
if n_stack.last in cycles.keys():
discard += cycles[n_stack.last]
n_stack.add_multiple(graph.nodes_dict[n_stack.last], discard)
else:
graph.visited_nodes[n_stack.last] = set([])
n_stack.pop()
else:
cycle_nodes = []
if n_stack.last in cycles.keys():
cycle_nodes += cycles[n_stack.last]
descendants = []
for c in graph.nodes_dict[n_stack.last]:
if c in graph.visited_nodes.keys():
for d in graph.visited_nodes[c]:
c, ip, ic, lv = list(d)
if lv != -1:
descendants.append((c, ip, ic, lv+1))
graph.visited_nodes[n_stack.last] = set([(c, n_stack.last, False, 0)
for c in graph.nodes_dict[n_stack.last]] +
descendants +
[(n, n_stack.last, True, -1) for n in cycle_nodes])
n_stack.pop()
e = p_stack.pop()
if e in track_of_node.keys():
#print("within e",e)
track_of_node[e] -= 1
else:
if n_stack.last == p_stack.last:
e = p_stack.pop()
if e in track_of_node.keys():
track_of_node[e] -= 1
n_stack.pop()
for 循环花费的时间最长。
for c in graph.nodes_dict[n_stack.last]:
if c in graph.visited_nodes.keys():
for d in graph.visited_nodes[c]:
c, ip, ic, lv = list(d)
if lv != -1:
descendants.append((c, ip, ic, lv+1))
作为参数传递的图形是这样的
[42345] []
[42345, 42547] []
[42345, 42547, 44025] []
[42345, 42547, 44025, 44032] []
[42345, 42547, 44025, 44032, 44085] []
[42345, 42547, 44025, 44032, 44085, 44097] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099, 44327] []
我也尝试了 LRU 缓存和使用 ThreadPoolExecutor 的多线程执行。但它没有用。如果您对更快地执行 for 循环有任何想法,请告诉我。谢谢。
我分块调用了 traverse_graph(graph) 并解决了问题。
我有 parent 和 child 行的级别。所以我使用下面的代码来识别 parents ,child 和它们的级别如下:
instanceID parentInstanceID dependencyInstanceID isCycle level
0 44909 103565 103565 False 0
1 45461 103565 103565 False 0
2 45465 103565 103565 False 0
def traverse_graph(graph):
n_stack = Stack([n for n in graph.root_parents])
p_stack = Stack()
track_of_node = {}
cycles = {}
print("Beginning of whileloop", datetime.now())
while n_stack.stack:
if n_stack.last not in graph.visited_nodes:
if n_stack.last != p_stack.last:
if graph.nodes_dict[n_stack.last]:
p_stack.add(n_stack.last)
if p_stack.last not in track_of_node.keys():
track_of_node[p_stack.last] = 1
else:
track_of_node[p_stack.last] += 1
if track_of_node[p_stack.last] >= 3 and check_cycle(p_stack):
if p_stack.stack[-2] not in cycles.keys():
cycles[p_stack.stack[-2]] = [p_stack.last]
else:
cycles[p_stack.stack[-2]].append(p_stack.last)
n_stack.pop()
e = p_stack.pop()
track_of_node[e] -= 1
else:
discard = []
if n_stack.last in cycles.keys():
discard += cycles[n_stack.last]
n_stack.add_multiple(graph.nodes_dict[n_stack.last], discard)
else:
graph.visited_nodes[n_stack.last] = set([])
n_stack.pop()
else:
cycle_nodes = []
if n_stack.last in cycles.keys():
cycle_nodes += cycles[n_stack.last]
descendants = []
for c in graph.nodes_dict[n_stack.last]:
if c in graph.visited_nodes.keys():
for d in graph.visited_nodes[c]:
c, ip, ic, lv = list(d)
if lv != -1:
descendants.append((c, ip, ic, lv+1))
graph.visited_nodes[n_stack.last] = set([(c, n_stack.last, False, 0)
for c in graph.nodes_dict[n_stack.last]] +
descendants +
[(n, n_stack.last, True, -1) for n in cycle_nodes])
n_stack.pop()
e = p_stack.pop()
if e in track_of_node.keys():
#print("within e",e)
track_of_node[e] -= 1
else:
if n_stack.last == p_stack.last:
e = p_stack.pop()
if e in track_of_node.keys():
track_of_node[e] -= 1
n_stack.pop()
for 循环花费的时间最长。
for c in graph.nodes_dict[n_stack.last]:
if c in graph.visited_nodes.keys():
for d in graph.visited_nodes[c]:
c, ip, ic, lv = list(d)
if lv != -1:
descendants.append((c, ip, ic, lv+1))
作为参数传递的图形是这样的
[42345] []
[42345, 42547] []
[42345, 42547, 44025] []
[42345, 42547, 44025, 44032] []
[42345, 42547, 44025, 44032, 44085] []
[42345, 42547, 44025, 44032, 44085, 44097] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099] []
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099, 44327] []
我也尝试了 LRU 缓存和使用 ThreadPoolExecutor 的多线程执行。但它没有用。如果您对更快地执行 for 循环有任何想法,请告诉我。谢谢。
我分块调用了 traverse_graph(graph) 并解决了问题。