加速旅行商问题的动态规划解决方案的建议?
Suggestions for speeding up a dynamic programming solution to the Traveling Salesman Problem?
我正在学习在线课程,其中一项作业是实现动态规划算法来解决旅行商问题 (TSP)。我的 Python 实施适用于小案例(~5 个城市),但对于 25 个城市的 'real' 应用程序,它似乎非常慢。我正在寻找加速算法的建议。
算法描述如下:
http://www.geeksforgeeks.org/travelling-salesman-problem-set-1/ 中也描述了动态规划解决方案,其中提供了额外的参考资料。
作业的问题陈述是:
我已经使用数组 A
的 pandas DataFrame
对象实现了伪代码。由于集合不可散列且不能用作索引,我改为使用元组,注意对它们进行排序以使它们成为集合的唯一表示。这是代码以及几个不断增加的测试用例:
import functools
from itertools import combinations
import numpy as np
import pandas as pd
from cached_property import cached_property
import pytest
def powerset_list(s):
'''Return a list of tuples representing all subsets of s'''
return functools.reduce(lambda x, y: x + y, [list(combinations(s, r)) for r in range(len(s)+1)])
class Graph(object):
def __init__(self, edges):
self.edges = edges
@cached_property
def nodes(self):
_nodes = set()
for edge in self.edges:
u, v, weight = edge
_nodes.add(u)
_nodes.add(v)
return list(_nodes)
@cached_property
def W(self):
'''Matrix of edge weights'''
n = len(self.nodes)
w = np.full((n, n), np.inf)
np.fill_diagonal(w, 0)
w = pd.DataFrame(w, index=range(1, n+1), columns=range(1, n+1))
for edge in self.edges:
u, v, weight = edge
w.set_value(u, v, weight)
w.set_value(v, u, weight)
return w
def tsp(self):
'''Solve the traveling salesman problem using a dynamic programming method'''
n = len(self.nodes)
sets = [(1,) + x for x in powerset_list(range(2, n+1))]
A = pd.DataFrame(np.full((len(sets), n), np.inf), index=sets, columns=range(1, n+1))
A.set_value((1,), 1, 0)
for m in range(2, n+1):
for S in [(1,) + perm for perm in combinations(range(2, n+1), m-1)]:
for j in set(S) - set([1]):
S_min_j = tuple(sorted(set(S) - set([j])))
A.set_value(S, j, min(A.get_value(S_min_j, k) + self.W.get_value(k, j) for k in S_min_j))
return min(A.get_value(tuple(range(1, n+1)), j) + self.W.get_value(j, 1) for j in range(2, n+1))
@pytest.fixture
def edges_geeksforgeeks():
'''Edges from the example graph on http://www.geeksforgeeks.org/travelling-salesman-problem-set-1/'''
return [(1, 2, 10), (1, 3, 15), (1, 4, 20), (2, 3, 35), (2, 4, 25), (3, 4, 30)]
def test_tsp(edges_geeksforgeeks):
graph = Graph(edges_geeksforgeeks)
min_cost = graph.tsp()
assert min_cost == 80
def dist(coord1, coord2):
return np.linalg.norm(np.array(coord1) - np.array(coord2))
def edges_from_coords(filename):
with open(filename) as f:
coords = [tuple(map(float, line.split())) for line in f.read().splitlines()[1:]]
nodes = list(range(1, len(coords) + 1))
coords = dict(zip(nodes, coords))
return [(comb[0], comb[1], dist(coords[comb[0]], coords[comb[1]])) for comb in combinations(nodes, 2)]
@pytest.mark.parametrize("test_input, expected", [("Hulburd_1.txt", 10.24), ("Hulburd_2.txt", 12.36), ("Hulburd_3.txt", 14.00)])
def test_Hulburd(test_input, expected):
'''Test data supplied by Eric Hulburd on the course forum'''
edges = edges_from_coords(test_input)
graph = Graph(edges)
min_cost = graph.tsp()
assert np.around(min_cost, decimals=2) == expected
@pytest.fixture
def edges_cities():
return edges_from_coords('tsp.txt')
@pytest.mark.skip(reason="This takes too long to run")
def test_tsp_cities(edges_cities):
graph = Graph(edges_cities)
min_cost = graph.tsp()
print("The minimum cost rounded down to the nearest integer is {}".format(int(np.floor(min_cost))))
if __name__ == "__main__":
pytest.main([__file__, "-s"])
测试中使用的文件是Hulburd_1.txt, Hulburd_2.txt, Hulburd_3.txt, and the main file for the actual assignment, tsp.txt。问题是涉及 tsp.txt
的最后一个(跳过的)测试花费的时间太长 运行。
我怎样才能加快算法速度?在课程论坛上,有人说他们使用位掩码和并行化在大约 3 分钟内达到 运行;另一个建议是简化数组的索引而不是使用元组。
关于如何提高性能的一些想法:
- 使用 32 位整数代替元组来表示您的子集 - 如果您的城市不超过 32 个,这应该足够了
- 在每个步骤中,您只需要为大小为 m - 1 的子集计算的值(您不必为大小为 m-2、m 的子集存储任何值-3 等)- 这可能会大大减少您的内存使用量
我正在学习在线课程,其中一项作业是实现动态规划算法来解决旅行商问题 (TSP)。我的 Python 实施适用于小案例(~5 个城市),但对于 25 个城市的 'real' 应用程序,它似乎非常慢。我正在寻找加速算法的建议。
算法描述如下:
http://www.geeksforgeeks.org/travelling-salesman-problem-set-1/ 中也描述了动态规划解决方案,其中提供了额外的参考资料。
作业的问题陈述是:
我已经使用数组 A
的 pandas DataFrame
对象实现了伪代码。由于集合不可散列且不能用作索引,我改为使用元组,注意对它们进行排序以使它们成为集合的唯一表示。这是代码以及几个不断增加的测试用例:
import functools
from itertools import combinations
import numpy as np
import pandas as pd
from cached_property import cached_property
import pytest
def powerset_list(s):
'''Return a list of tuples representing all subsets of s'''
return functools.reduce(lambda x, y: x + y, [list(combinations(s, r)) for r in range(len(s)+1)])
class Graph(object):
def __init__(self, edges):
self.edges = edges
@cached_property
def nodes(self):
_nodes = set()
for edge in self.edges:
u, v, weight = edge
_nodes.add(u)
_nodes.add(v)
return list(_nodes)
@cached_property
def W(self):
'''Matrix of edge weights'''
n = len(self.nodes)
w = np.full((n, n), np.inf)
np.fill_diagonal(w, 0)
w = pd.DataFrame(w, index=range(1, n+1), columns=range(1, n+1))
for edge in self.edges:
u, v, weight = edge
w.set_value(u, v, weight)
w.set_value(v, u, weight)
return w
def tsp(self):
'''Solve the traveling salesman problem using a dynamic programming method'''
n = len(self.nodes)
sets = [(1,) + x for x in powerset_list(range(2, n+1))]
A = pd.DataFrame(np.full((len(sets), n), np.inf), index=sets, columns=range(1, n+1))
A.set_value((1,), 1, 0)
for m in range(2, n+1):
for S in [(1,) + perm for perm in combinations(range(2, n+1), m-1)]:
for j in set(S) - set([1]):
S_min_j = tuple(sorted(set(S) - set([j])))
A.set_value(S, j, min(A.get_value(S_min_j, k) + self.W.get_value(k, j) for k in S_min_j))
return min(A.get_value(tuple(range(1, n+1)), j) + self.W.get_value(j, 1) for j in range(2, n+1))
@pytest.fixture
def edges_geeksforgeeks():
'''Edges from the example graph on http://www.geeksforgeeks.org/travelling-salesman-problem-set-1/'''
return [(1, 2, 10), (1, 3, 15), (1, 4, 20), (2, 3, 35), (2, 4, 25), (3, 4, 30)]
def test_tsp(edges_geeksforgeeks):
graph = Graph(edges_geeksforgeeks)
min_cost = graph.tsp()
assert min_cost == 80
def dist(coord1, coord2):
return np.linalg.norm(np.array(coord1) - np.array(coord2))
def edges_from_coords(filename):
with open(filename) as f:
coords = [tuple(map(float, line.split())) for line in f.read().splitlines()[1:]]
nodes = list(range(1, len(coords) + 1))
coords = dict(zip(nodes, coords))
return [(comb[0], comb[1], dist(coords[comb[0]], coords[comb[1]])) for comb in combinations(nodes, 2)]
@pytest.mark.parametrize("test_input, expected", [("Hulburd_1.txt", 10.24), ("Hulburd_2.txt", 12.36), ("Hulburd_3.txt", 14.00)])
def test_Hulburd(test_input, expected):
'''Test data supplied by Eric Hulburd on the course forum'''
edges = edges_from_coords(test_input)
graph = Graph(edges)
min_cost = graph.tsp()
assert np.around(min_cost, decimals=2) == expected
@pytest.fixture
def edges_cities():
return edges_from_coords('tsp.txt')
@pytest.mark.skip(reason="This takes too long to run")
def test_tsp_cities(edges_cities):
graph = Graph(edges_cities)
min_cost = graph.tsp()
print("The minimum cost rounded down to the nearest integer is {}".format(int(np.floor(min_cost))))
if __name__ == "__main__":
pytest.main([__file__, "-s"])
测试中使用的文件是Hulburd_1.txt, Hulburd_2.txt, Hulburd_3.txt, and the main file for the actual assignment, tsp.txt。问题是涉及 tsp.txt
的最后一个(跳过的)测试花费的时间太长 运行。
我怎样才能加快算法速度?在课程论坛上,有人说他们使用位掩码和并行化在大约 3 分钟内达到 运行;另一个建议是简化数组的索引而不是使用元组。
关于如何提高性能的一些想法:
- 使用 32 位整数代替元组来表示您的子集 - 如果您的城市不超过 32 个,这应该足够了
- 在每个步骤中,您只需要为大小为 m - 1 的子集计算的值(您不必为大小为 m-2、m 的子集存储任何值-3 等)- 这可能会大大减少您的内存使用量