优化深度优先搜索 python

optimizing depth first search python

我有 parent 和 child 行的级别。所以我使用下面的代码来识别 parents ,child 和它们的级别如下:

       instanceID     parentInstanceID dependencyInstanceID isCycle  level  
0         44909            103565                103565    False      0 
1         45461            103565                103565    False      0 
2         45465            103565                103565    False      0 
    def traverse_graph(graph):
        n_stack = Stack([n for n in graph.root_parents])
        p_stack = Stack()
    
        track_of_node = {}
        cycles = {}
        print("Beginning of whileloop", datetime.now())
        while n_stack.stack:
            if n_stack.last not in graph.visited_nodes:
                if n_stack.last != p_stack.last:
                    if graph.nodes_dict[n_stack.last]:
                        p_stack.add(n_stack.last)
                        if p_stack.last not in track_of_node.keys():
                            track_of_node[p_stack.last] = 1
                        else:
                            track_of_node[p_stack.last] += 1
                        if track_of_node[p_stack.last] >= 3 and check_cycle(p_stack):
                            if p_stack.stack[-2] not in cycles.keys():
                                cycles[p_stack.stack[-2]] = [p_stack.last]
                            else:
                                cycles[p_stack.stack[-2]].append(p_stack.last)
                            n_stack.pop()
                            e = p_stack.pop()
                            track_of_node[e] -= 1
                        else:
                            discard = []
                            if n_stack.last in cycles.keys():
                                discard += cycles[n_stack.last]
    
                            n_stack.add_multiple(graph.nodes_dict[n_stack.last], discard)
                    else:
                        graph.visited_nodes[n_stack.last] = set([])
                        n_stack.pop()
    
                else:
                    cycle_nodes = []
                    if n_stack.last in cycles.keys():
                        cycle_nodes += cycles[n_stack.last]
                    descendants = []
                    for c in graph.nodes_dict[n_stack.last]:
                        if c in graph.visited_nodes.keys():
                            for d in graph.visited_nodes[c]:
                                c, ip, ic, lv = list(d)
                                if lv != -1:
                                    descendants.append((c, ip, ic, lv+1))
                    graph.visited_nodes[n_stack.last] = set([(c, n_stack.last, False, 0)
                                                             for c in graph.nodes_dict[n_stack.last]] +
                                                            descendants +
                                                            [(n, n_stack.last, True, -1) for n in cycle_nodes])
                    n_stack.pop()
                    e = p_stack.pop()
                    if e in track_of_node.keys():
                        #print("within e",e)
                        track_of_node[e] -= 1
    
            else:
                if n_stack.last == p_stack.last:
                    e = p_stack.pop()
                    if e in track_of_node.keys():
                        track_of_node[e] -= 1
    
                n_stack.pop()

for 循环花费的时间最长。

for c in graph.nodes_dict[n_stack.last]:
    if c in graph.visited_nodes.keys():
        for d in graph.visited_nodes[c]:
            c, ip, ic, lv = list(d)
            if lv != -1:
               descendants.append((c, ip, ic, lv+1))

作为参数传递的图形是这样的

[42345] []  
[42345, 42547] []   
[42345, 42547, 44025] []    
[42345, 42547, 44025, 44032] [] 
[42345, 42547, 44025, 44032, 44085] []  
[42345, 42547, 44025, 44032, 44085, 44097] []   
[42345, 42547, 44025, 44032, 44085, 44097, 44098] []    
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099] [] 
[42345, 42547, 44025, 44032, 44085, 44097, 44098, 44099, 44327] []  

我也尝试了 LRU 缓存和使用 ThreadPoolExecutor 的多线程执行。但它没有用。如果您对更快地执行 for 循环有任何想法,请告诉我。谢谢。

我分块调用了 traverse_graph(graph) 并解决了问题。