优化python DFS(for循环效率低下)
Optimizing python DFS (for loop is inefficient)
给定以下函数,归档相同(且更快)结果的正确和 pythonic 方法是什么?
我的代码效率不高,我相信我错过了一些盯着我看的东西。
我们的想法是找到一个 [[A,B],[A,C],[C,B]] 的模式,而不必生成额外的排列(因为这会导致更长的处理时间比较)。
在现实生活中输入 find_path
的字典长度大约为 10,000,因此必须使用下面的当前代码版本迭代该数量是效率不高的。
from time import perf_counter
from typing import List, Generator, Dict
def find_path(data: Dict) -> Generator:
for first_pair in data:
pair1: List[str] = first_pair.split("/")
for second_pair in data:
pair2: List[str] = second_pair.split("/")
if pair2[0] == pair1[0] and pair2[1] != pair1[1]:
for third_pair in data:
pair3: List[str] = third_pair.split("/")
if pair3[0] == pair2[1] and pair3[1] == pair1[1]:
amount_pair_1: int = data.get(first_pair)[
"amount"
]
id_pair_1: int = data.get(first_pair)["id"]
amount_pair_2: int = data.get(second_pair)[
"amount"
]
id_pair_2: int = data.get(second_pair)["id"]
amount_pair_3: int = data.get(third_pair)[
"amount"
]
id_pair_3: int = data.get(third_pair)["id"]
yield (
pair1,
amount_pair_1,
id_pair_1,
pair2,
amount_pair_2,
id_pair_2,
pair3,
amount_pair_3,
id_pair_3,
)
raw_data = {
"EZ/TC": {"id": 1, "amount": 9},
"LM/TH": {"id": 2, "amount": 8},
"CD/EH": {"id": 3, "amount": 7},
"EH/TC": {"id": 4, "amount": 6},
"LM/TC": {"id": 5, "amount": 5},
"CD/TC": {"id": 6, "amount": 4},
"BT/TH": {"id": 7, "amount": 3},
"BT/TX": {"id": 8, "amount": 2},
"TX/TH": {"id": 9, "amount": 1},
}
processed_data = list(find_path(raw_data))
for i in processed_data:
print(("The path to traverse is:", i))
>> ('The path to traverse is:', (['CD', 'TC'], 4, 6, ['CD', 'EH'], 7, 3, ['EH', 'TC'], 6, 4))
>> ('The path to traverse is:', (['BT', 'TH'], 3, 7, ['BT', 'TX'], 2, 8, ['TX', 'TH'], 1, 9))
>> ('Time to complete', 5.748599869548343e-05)
# Timing for a simple ref., as mentioned above, the raw_data is a dict containing about 10,000 keys
你不能用这种图形表示来做到这一点。该算法具有 O(|E|^3)
时间复杂度。将边存储为列表数组是个好主意,每个列表将仅存储相邻的顶点。然后很容易做你需要的。幸运的是,您可以在 O(|E|)
时间内重新表示图形。
怎么做
我们将图存储为顶点数组(但在这种情况下,由于字符串顶点值,我们采用词典)。我们想通过一个顶点访问所有邻居。让我们这样做——我们将在数组 中存储给定顶点的所有邻居列表 。
现在我们只需要通过一组边来构建我们的结构(又名row_data)。
如何在图形中添加边?简单!我们应该在 array 中找到一个顶点 from 并将顶点 to 添加到它的邻居列表中
所以,construct_graph 函数可以是这样的:
def construct_graph(raw_data): # here we will change representation
graph = defaultdict(list) # our graph
for pair in raw_data: # go through every edge
u, v = pair.split("/") # get from and to vertexes
graph[u].append(v) # and add this edge in our structure
return graph # return our new graph to other functions
如何找到路径长度 2
我们将在图表上使用 dfs
。
def dfs(g, u, dist): # this is a simple dfs function
if dist == 2: # we has a 'dist' from our start
return [u] # and if we found already answer, return it
for v in g.get(u, []): # otherwise check all neighbours of current vertex
ans = dfs(g, v, dist + 1) # run dfs in every neighbour with dist+1
if ans: # and if that dfs found something
ans.append(u) # store it in ouy answer
return ans # and return it
return [] # otherwise we found nothing
然后我们对每个顶点进行尝试。
def main():
graph = construct_graph(raw_data)
for v in graph.keys(): # here we will try to find path
ans = dfs(graph, v, 0) # starting with 0 dist
if ans: # and if we found something
print(list(reversed(ans))) # return it, but answer will be reversed
给定以下函数,归档相同(且更快)结果的正确和 pythonic 方法是什么?
我的代码效率不高,我相信我错过了一些盯着我看的东西。
我们的想法是找到一个 [[A,B],[A,C],[C,B]] 的模式,而不必生成额外的排列(因为这会导致更长的处理时间比较)。
在现实生活中输入 find_path
的字典长度大约为 10,000,因此必须使用下面的当前代码版本迭代该数量是效率不高的。
from time import perf_counter
from typing import List, Generator, Dict
def find_path(data: Dict) -> Generator:
for first_pair in data:
pair1: List[str] = first_pair.split("/")
for second_pair in data:
pair2: List[str] = second_pair.split("/")
if pair2[0] == pair1[0] and pair2[1] != pair1[1]:
for third_pair in data:
pair3: List[str] = third_pair.split("/")
if pair3[0] == pair2[1] and pair3[1] == pair1[1]:
amount_pair_1: int = data.get(first_pair)[
"amount"
]
id_pair_1: int = data.get(first_pair)["id"]
amount_pair_2: int = data.get(second_pair)[
"amount"
]
id_pair_2: int = data.get(second_pair)["id"]
amount_pair_3: int = data.get(third_pair)[
"amount"
]
id_pair_3: int = data.get(third_pair)["id"]
yield (
pair1,
amount_pair_1,
id_pair_1,
pair2,
amount_pair_2,
id_pair_2,
pair3,
amount_pair_3,
id_pair_3,
)
raw_data = {
"EZ/TC": {"id": 1, "amount": 9},
"LM/TH": {"id": 2, "amount": 8},
"CD/EH": {"id": 3, "amount": 7},
"EH/TC": {"id": 4, "amount": 6},
"LM/TC": {"id": 5, "amount": 5},
"CD/TC": {"id": 6, "amount": 4},
"BT/TH": {"id": 7, "amount": 3},
"BT/TX": {"id": 8, "amount": 2},
"TX/TH": {"id": 9, "amount": 1},
}
processed_data = list(find_path(raw_data))
for i in processed_data:
print(("The path to traverse is:", i))
>> ('The path to traverse is:', (['CD', 'TC'], 4, 6, ['CD', 'EH'], 7, 3, ['EH', 'TC'], 6, 4))
>> ('The path to traverse is:', (['BT', 'TH'], 3, 7, ['BT', 'TX'], 2, 8, ['TX', 'TH'], 1, 9))
>> ('Time to complete', 5.748599869548343e-05)
# Timing for a simple ref., as mentioned above, the raw_data is a dict containing about 10,000 keys
你不能用这种图形表示来做到这一点。该算法具有 O(|E|^3)
时间复杂度。将边存储为列表数组是个好主意,每个列表将仅存储相邻的顶点。然后很容易做你需要的。幸运的是,您可以在 O(|E|)
时间内重新表示图形。
怎么做
我们将图存储为顶点数组(但在这种情况下,由于字符串顶点值,我们采用词典)。我们想通过一个顶点访问所有邻居。让我们这样做——我们将在数组 中存储给定顶点的所有邻居列表 。
现在我们只需要通过一组边来构建我们的结构(又名row_data)。 如何在图形中添加边?简单!我们应该在 array 中找到一个顶点 from 并将顶点 to 添加到它的邻居列表中
所以,construct_graph 函数可以是这样的:
def construct_graph(raw_data): # here we will change representation
graph = defaultdict(list) # our graph
for pair in raw_data: # go through every edge
u, v = pair.split("/") # get from and to vertexes
graph[u].append(v) # and add this edge in our structure
return graph # return our new graph to other functions
如何找到路径长度 2
我们将在图表上使用 dfs
。
def dfs(g, u, dist): # this is a simple dfs function
if dist == 2: # we has a 'dist' from our start
return [u] # and if we found already answer, return it
for v in g.get(u, []): # otherwise check all neighbours of current vertex
ans = dfs(g, v, dist + 1) # run dfs in every neighbour with dist+1
if ans: # and if that dfs found something
ans.append(u) # store it in ouy answer
return ans # and return it
return [] # otherwise we found nothing
然后我们对每个顶点进行尝试。
def main():
graph = construct_graph(raw_data)
for v in graph.keys(): # here we will try to find path
ans = dfs(graph, v, 0) # starting with 0 dist
if ans: # and if we found something
print(list(reversed(ans))) # return it, but answer will be reversed