最近对实现 Python
Closest Pair Implemetation Python
我正在尝试使用分而治之的方法在 Python 中实现最近对问题,一切似乎都正常,只是在某些输入情况下,答案是错误的。我的代码如下:
def closestSplitPair(Px,Py,d):
X = Px[len(Px)-1][0]
Sy = [item for item in Py if item[0]>=X-d and item[0]<=X+d]
best,p3,q3 = d,None,None
for i in xrange(0,len(Sy)-2):
for j in xrange(1,min(7,len(Sy)-1-i)):
if dist(Sy[i],Sy[i+j]) < best:
best = (Sy[i],Sy[i+j])
p3,q3 = Sy[i],Sy[i+j]
return (p3,q3,best)
我是通过递归函数调用上面的函数,如下:
def closestPair(Px,Py): """Px and Py are input arrays sorted according to
their x and y coordinates respectively"""
if len(Px) <= 3:
return min_dist(Px)
else:
mid = len(Px)/2
Qx = Px[:mid] ### x-sorted left side of P
Qy = Py[:mid] ### y-sorted left side of P
Rx = Px[mid:] ### x-sorted right side of P
Ry = Py[mid:] ### y-sorted right side of P
(p1,q1,d1) = closestPair(Qx,Qy)
(p2,q2,d2) = closestPair(Rx,Ry)
d = min(d1,d2)
(p3,q3,d3) = closestSplitPair(Px,Py,d)
return min((p1,q1,d1),(p2,q2,d2),(p3,q3,d3),key=lambda tup: tup[2])
其中 min_dist(P)
是具有 3 个或更少元素的列表 P 和 returns 包含最近点对及其距离的元组的最近对算法的蛮力实现。
如果我的输入是 P = [(0,0),(7,6),(2,20),(12,5),(16,16),(5,8),(19,7),(14,22),(8,19),(7,29),(10,11),(1,13)]
,那么我的输出是 ((5,8),(7,6),2.8284271)
,这是正确的输出。但是当我的输入是 P = [(94, 5), (96, -79), (20, 73), (8, -50), (78, 2), (100, 63), (-14, -69), (99, -8), (-11, -7), (-78, -46)]
时,我得到的输出是 ((78, 2), (94, 5), 16.278820596099706)
而正确的输出应该是 ((94, 5), (99, -8), 13.92838827718412)
你有两个问题,你忘记调用 dist 来更新最佳距离。但主要问题是发生了不止一个递归调用,所以当你找到一个更接近的默认拆分对时,你可能会最终覆盖,best,p3,q3 = d,None,None
。我将 closest_pair
中的最佳对作为参数传递给 closest_split_pair
,因此我不会潜在地覆盖该值。
def closest_split_pair(p_x, p_y, delta, best_pair): # <- a parameter
ln_x = len(p_x)
mx_x = p_x[ln_x // 2][0]
s_y = [x for x in p_y if mx_x - delta <= x[0] <= mx_x + delta]
best = delta
for i in range(len(s_y) - 1):
for j in range(1, min(i + 7, (len(s_y) - i))):
p, q = s_y[i], s_y[i + j]
dst = dist(p, q)
if dst < best:
best_pair = p, q
best = dst
return best_pair
closest_pair 的结尾如下所示:
p_1, q_1 = closest_pair(srt_q_x, srt_q_y)
p_2, q_2 = closest_pair(srt_r_x, srt_r_y)
closest = min(dist(p_1, q_1), dist(p_2, q_2))
# get min of both and then pass that as an arg to closest_split_pair
mn = min((p_1, q_1), (p_2, q_2), key=lambda x: dist(x[0], x[1]))
p_3, q_3 = closest_split_pair(p_x, p_y, closest,mn)
# either return mn or we have a closer split pair
return min(mn, (p_3, q_3), key=lambda x: dist(x[0], x[1]))
你还有其他一些逻辑问题,你的切片逻辑不正确,我对你的代码做了一些修改,其中 brute 只是一个简单的 bruteforce 双循环:
def closestPair(Px, Py):
if len(Px) <= 3:
return brute(Px)
mid = len(Px) / 2
# get left and right half of Px
q, r = Px[:mid], Px[mid:]
# sorted versions of q and r by their x and y coordinates
Qx, Qy = [x for x in q if Py and x[0] <= Px[-1][0]], [x for x in q if x[1] <= Py[-1][1]]
Rx, Ry = [x for x in r if Py and x[0] <= Px[-1][0]], [x for x in r if x[1] <= Py[-1][1]]
(p1, q1) = closestPair(Qx, Qy)
(p2, q2) = closestPair(Rx, Ry)
d = min(dist(p1, p2), dist(p2, q2))
mn = min((p1, q1), (p2, q2), key=lambda x: dist(x[0], x[1]))
(p3, q3) = closest_split_pair(Px, Py, d, mn)
return min(mn, (p3, q3), key=lambda x: dist(x[0], x[1]))
我今天刚做了这个算法,所以毫无疑问需要做一些改进,但这会给你正确的答案。
蛮力可以更快地使用 stdlib 函数。因此,它可以有效地应用于3点以上。
from itertools import combinations
def closest(points_list):
return min((dist(p1, p2), p1, p2)
for p1, p2 in combinations(points_list, r=2))
最有效的分点方式是分块。如果您没有异常值,您可以将 space 分成相等的部分,并仅比较相同或相邻图块中的点。
瓷砖的数量必须尽可能多。但是,为了避免孤立的图块,当每个点在相邻图块中没有点时,您必须通过点数限制图块的数量。
完整列表:
from math import sqrt
from itertools import combinations, product
from collections import defaultdict
import sys
max_float = sys.float_info.max
def dist((x1, y1), (x2, y2)):
return sqrt((x1 - x2) ** 2 + (y1 - y2) **2)
def closest(points_list):
if len(points_list) < 2:
return (max_float, None, None) # default value compatible with min function
return min((dist(p1, p2), p1, p2)
for p1, p2 in combinations(points_list, r=2))
def closest_between(pnt_lst1, pnt_lst2):
if not pnt_lst1 or not pnt_lst2:
return (max_float, None, None) # default value compatible with min function
return min((dist(p1, p2), p1, p2)
for p1, p2 in product(pnt_lst1, pnt_lst2))
def divide_on_tiles(points_list):
side = int(sqrt(len(points_list))) # number of tiles on one side of square
tiles = defaultdict(list)
min_x = min(x for x, y in points_list)
max_x = max(x for x, y in points_list)
min_y = min(x for x, y in points_list)
max_y = max(x for x, y in points_list)
tile_x_size = float(max_x - min_x) / side
tile_y_size = float(max_y - min_y) / side
for x, y in points_list:
x_tile = int((x - min_x) / tile_x_size)
y_tile = int((y - min_y) / tile_y_size)
tiles[(x_tile, y_tile)].append((x, y))
return tiles
def closest_for_tile(tiles, (x_tile, y_tile)):
points = tiles[(x_tile, y_tile)]
return min(closest(points),
# use dict.get to avoid creating empty tiles
# we compare current tile only with half of neighbours (right/top),
# because another half (left/bottom) make it in another iteration by themselves
closest_between(points, tiles.get((x_tile+1, y_tile))),
closest_between(points, tiles.get((x_tile, y_tile+1))),
closest_between(points, tiles.get((x_tile+1, y_tile+1))),
closest_between(points, tiles.get((x_tile-1, y_tile+1))))
def find_closest_in_tiles(tiles):
return min(closest_for_tile(tiles, coord) for coord in tiles.keys())
P1 = [(0,0),(7,6),(2,20),(12,5),(16,16),(5,8),(19,7),(14,22),(8,19),(7,29),(10,11),(1,13)]
P2 = [(94, 5), (96, -79), (20, 73), (8, -50), (78, 2), (100, 63), (-14, -69), (99, -8), (-11, -7), (-78, -46)]
print find_closest_in_tiles(divide_on_tiles(P1)) # (2.8284271247461903, (7, 6), (5, 8))
print find_closest_in_tiles(divide_on_tiles(P2)) # (13.92838827718412, (94, 5), (99, -8))
print find_closest_in_tiles(divide_on_tiles(P1 + P2)) # (2.8284271247461903, (7, 6), (5, 8))
这里是基于堆数据结构的最近点问题的递归分治python实现。它还占负整数。它可以通过使用 heappop() 在堆中弹出 k 个节点来 return k 个最近点。
from __future__ import division
from collections import namedtuple
from random import randint
import math as m
import heapq as hq
def get_key(item):
return(item[0])
def closest_point_problem(points):
point = []
heap = []
pt = namedtuple('pt', 'x y')
for i in range(len(points)):
point.append(pt(points[i][0], points[i][1]))
point = sorted(point, key=get_key)
visited_index = []
find_min(0, len(point) - 1, point, heap, visited_index)
print(hq.heappop(heap))
def find_min(start, end, point, heap, visited_index):
if len(point[start:end + 1]) & 1:
mid = start + ((len(point[start:end + 1]) + 1) >> 1)
else:
mid = start + (len(point[start:end + 1]) >> 1)
if start in visited_index:
start = start + 1
if end in visited_index:
end = end - 1
if len(point[start:end + 1]) > 3:
if start < mid - 1:
distance1 = m.sqrt((point[start].x - point[start + 1].x) ** 2 + (point[start].y - point[start + 1].y) ** 2)
distance2 = m.sqrt((point[mid].x - point[mid - 1].x) ** 2 + (point[mid].y - point[mid - 1].y) ** 2)
if distance1 < distance2:
hq.heappush(heap, (distance1, ((point[start].x, point[start].y), (point[start + 1].x, point[start + 1].y))))
else:
hq.heappush(heap, (distance2, ((point[mid].x, point[mid].y), (point[mid - 1].x, point[mid - 1].y))))
visited_index.append(start)
visited_index.append(start + 1)
visited_index.append(mid)
visited_index.append(mid - 1)
find_min(start, mid, point, heap, visited_index)
if mid + 1 < end:
distance1 = m.sqrt((point[mid].x - point[mid + 1].x) ** 2 + (point[mid].y - point[mid + 1].y) ** 2)
distance2 = m.sqrt((point[end].x - point[end - 1].x) ** 2 + (point[end].y - point[end - 1].y) ** 2)
if distance1 < distance2:
hq.heappush(heap, (distance1, ((point[mid].x, point[mid].y), (point[mid + 1].x, point[mid + 1].y))))
else:
hq.heappush(heap, (distance2, ((point[end].x, point[end].y), (point[end - 1].x, point[end - 1].y))))
visited_index.append(end)
visited_index.append(end - 1)
visited_index.append(mid)
visited_index.append(mid + 1)
find_min(mid, end, point, heap, visited_index)
x = []
num_points = 10
for i in range(num_points):
x.append((randint(- num_points << 2, num_points << 2), randint(- num_points << 2, num_points << 2)))
closest_point_problem(x)
:)
您只需将 closestSplitPair 函数 def 中的第七行从 best=(Sy[i],Sy[i+j])
更改为 best=dist(Sy[i],Sy[i+j])
,您将得到正确的 answer: ((94, 5), (99, -8), 13.92838827718412).
您错过了对 dist 函数的调用.
Padraic Cunningham 的回答指出这是第一个问题。
此致。
我正在尝试使用分而治之的方法在 Python 中实现最近对问题,一切似乎都正常,只是在某些输入情况下,答案是错误的。我的代码如下:
def closestSplitPair(Px,Py,d):
X = Px[len(Px)-1][0]
Sy = [item for item in Py if item[0]>=X-d and item[0]<=X+d]
best,p3,q3 = d,None,None
for i in xrange(0,len(Sy)-2):
for j in xrange(1,min(7,len(Sy)-1-i)):
if dist(Sy[i],Sy[i+j]) < best:
best = (Sy[i],Sy[i+j])
p3,q3 = Sy[i],Sy[i+j]
return (p3,q3,best)
我是通过递归函数调用上面的函数,如下:
def closestPair(Px,Py): """Px and Py are input arrays sorted according to
their x and y coordinates respectively"""
if len(Px) <= 3:
return min_dist(Px)
else:
mid = len(Px)/2
Qx = Px[:mid] ### x-sorted left side of P
Qy = Py[:mid] ### y-sorted left side of P
Rx = Px[mid:] ### x-sorted right side of P
Ry = Py[mid:] ### y-sorted right side of P
(p1,q1,d1) = closestPair(Qx,Qy)
(p2,q2,d2) = closestPair(Rx,Ry)
d = min(d1,d2)
(p3,q3,d3) = closestSplitPair(Px,Py,d)
return min((p1,q1,d1),(p2,q2,d2),(p3,q3,d3),key=lambda tup: tup[2])
其中 min_dist(P)
是具有 3 个或更少元素的列表 P 和 returns 包含最近点对及其距离的元组的最近对算法的蛮力实现。
如果我的输入是 P = [(0,0),(7,6),(2,20),(12,5),(16,16),(5,8),(19,7),(14,22),(8,19),(7,29),(10,11),(1,13)]
,那么我的输出是 ((5,8),(7,6),2.8284271)
,这是正确的输出。但是当我的输入是 P = [(94, 5), (96, -79), (20, 73), (8, -50), (78, 2), (100, 63), (-14, -69), (99, -8), (-11, -7), (-78, -46)]
时,我得到的输出是 ((78, 2), (94, 5), 16.278820596099706)
而正确的输出应该是 ((94, 5), (99, -8), 13.92838827718412)
你有两个问题,你忘记调用 dist 来更新最佳距离。但主要问题是发生了不止一个递归调用,所以当你找到一个更接近的默认拆分对时,你可能会最终覆盖,best,p3,q3 = d,None,None
。我将 closest_pair
中的最佳对作为参数传递给 closest_split_pair
,因此我不会潜在地覆盖该值。
def closest_split_pair(p_x, p_y, delta, best_pair): # <- a parameter
ln_x = len(p_x)
mx_x = p_x[ln_x // 2][0]
s_y = [x for x in p_y if mx_x - delta <= x[0] <= mx_x + delta]
best = delta
for i in range(len(s_y) - 1):
for j in range(1, min(i + 7, (len(s_y) - i))):
p, q = s_y[i], s_y[i + j]
dst = dist(p, q)
if dst < best:
best_pair = p, q
best = dst
return best_pair
closest_pair 的结尾如下所示:
p_1, q_1 = closest_pair(srt_q_x, srt_q_y)
p_2, q_2 = closest_pair(srt_r_x, srt_r_y)
closest = min(dist(p_1, q_1), dist(p_2, q_2))
# get min of both and then pass that as an arg to closest_split_pair
mn = min((p_1, q_1), (p_2, q_2), key=lambda x: dist(x[0], x[1]))
p_3, q_3 = closest_split_pair(p_x, p_y, closest,mn)
# either return mn or we have a closer split pair
return min(mn, (p_3, q_3), key=lambda x: dist(x[0], x[1]))
你还有其他一些逻辑问题,你的切片逻辑不正确,我对你的代码做了一些修改,其中 brute 只是一个简单的 bruteforce 双循环:
def closestPair(Px, Py):
if len(Px) <= 3:
return brute(Px)
mid = len(Px) / 2
# get left and right half of Px
q, r = Px[:mid], Px[mid:]
# sorted versions of q and r by their x and y coordinates
Qx, Qy = [x for x in q if Py and x[0] <= Px[-1][0]], [x for x in q if x[1] <= Py[-1][1]]
Rx, Ry = [x for x in r if Py and x[0] <= Px[-1][0]], [x for x in r if x[1] <= Py[-1][1]]
(p1, q1) = closestPair(Qx, Qy)
(p2, q2) = closestPair(Rx, Ry)
d = min(dist(p1, p2), dist(p2, q2))
mn = min((p1, q1), (p2, q2), key=lambda x: dist(x[0], x[1]))
(p3, q3) = closest_split_pair(Px, Py, d, mn)
return min(mn, (p3, q3), key=lambda x: dist(x[0], x[1]))
我今天刚做了这个算法,所以毫无疑问需要做一些改进,但这会给你正确的答案。
蛮力可以更快地使用 stdlib 函数。因此,它可以有效地应用于3点以上。
from itertools import combinations
def closest(points_list):
return min((dist(p1, p2), p1, p2)
for p1, p2 in combinations(points_list, r=2))
最有效的分点方式是分块。如果您没有异常值,您可以将 space 分成相等的部分,并仅比较相同或相邻图块中的点。 瓷砖的数量必须尽可能多。但是,为了避免孤立的图块,当每个点在相邻图块中没有点时,您必须通过点数限制图块的数量。 完整列表:
from math import sqrt
from itertools import combinations, product
from collections import defaultdict
import sys
max_float = sys.float_info.max
def dist((x1, y1), (x2, y2)):
return sqrt((x1 - x2) ** 2 + (y1 - y2) **2)
def closest(points_list):
if len(points_list) < 2:
return (max_float, None, None) # default value compatible with min function
return min((dist(p1, p2), p1, p2)
for p1, p2 in combinations(points_list, r=2))
def closest_between(pnt_lst1, pnt_lst2):
if not pnt_lst1 or not pnt_lst2:
return (max_float, None, None) # default value compatible with min function
return min((dist(p1, p2), p1, p2)
for p1, p2 in product(pnt_lst1, pnt_lst2))
def divide_on_tiles(points_list):
side = int(sqrt(len(points_list))) # number of tiles on one side of square
tiles = defaultdict(list)
min_x = min(x for x, y in points_list)
max_x = max(x for x, y in points_list)
min_y = min(x for x, y in points_list)
max_y = max(x for x, y in points_list)
tile_x_size = float(max_x - min_x) / side
tile_y_size = float(max_y - min_y) / side
for x, y in points_list:
x_tile = int((x - min_x) / tile_x_size)
y_tile = int((y - min_y) / tile_y_size)
tiles[(x_tile, y_tile)].append((x, y))
return tiles
def closest_for_tile(tiles, (x_tile, y_tile)):
points = tiles[(x_tile, y_tile)]
return min(closest(points),
# use dict.get to avoid creating empty tiles
# we compare current tile only with half of neighbours (right/top),
# because another half (left/bottom) make it in another iteration by themselves
closest_between(points, tiles.get((x_tile+1, y_tile))),
closest_between(points, tiles.get((x_tile, y_tile+1))),
closest_between(points, tiles.get((x_tile+1, y_tile+1))),
closest_between(points, tiles.get((x_tile-1, y_tile+1))))
def find_closest_in_tiles(tiles):
return min(closest_for_tile(tiles, coord) for coord in tiles.keys())
P1 = [(0,0),(7,6),(2,20),(12,5),(16,16),(5,8),(19,7),(14,22),(8,19),(7,29),(10,11),(1,13)]
P2 = [(94, 5), (96, -79), (20, 73), (8, -50), (78, 2), (100, 63), (-14, -69), (99, -8), (-11, -7), (-78, -46)]
print find_closest_in_tiles(divide_on_tiles(P1)) # (2.8284271247461903, (7, 6), (5, 8))
print find_closest_in_tiles(divide_on_tiles(P2)) # (13.92838827718412, (94, 5), (99, -8))
print find_closest_in_tiles(divide_on_tiles(P1 + P2)) # (2.8284271247461903, (7, 6), (5, 8))
这里是基于堆数据结构的最近点问题的递归分治python实现。它还占负整数。它可以通过使用 heappop() 在堆中弹出 k 个节点来 return k 个最近点。
from __future__ import division
from collections import namedtuple
from random import randint
import math as m
import heapq as hq
def get_key(item):
return(item[0])
def closest_point_problem(points):
point = []
heap = []
pt = namedtuple('pt', 'x y')
for i in range(len(points)):
point.append(pt(points[i][0], points[i][1]))
point = sorted(point, key=get_key)
visited_index = []
find_min(0, len(point) - 1, point, heap, visited_index)
print(hq.heappop(heap))
def find_min(start, end, point, heap, visited_index):
if len(point[start:end + 1]) & 1:
mid = start + ((len(point[start:end + 1]) + 1) >> 1)
else:
mid = start + (len(point[start:end + 1]) >> 1)
if start in visited_index:
start = start + 1
if end in visited_index:
end = end - 1
if len(point[start:end + 1]) > 3:
if start < mid - 1:
distance1 = m.sqrt((point[start].x - point[start + 1].x) ** 2 + (point[start].y - point[start + 1].y) ** 2)
distance2 = m.sqrt((point[mid].x - point[mid - 1].x) ** 2 + (point[mid].y - point[mid - 1].y) ** 2)
if distance1 < distance2:
hq.heappush(heap, (distance1, ((point[start].x, point[start].y), (point[start + 1].x, point[start + 1].y))))
else:
hq.heappush(heap, (distance2, ((point[mid].x, point[mid].y), (point[mid - 1].x, point[mid - 1].y))))
visited_index.append(start)
visited_index.append(start + 1)
visited_index.append(mid)
visited_index.append(mid - 1)
find_min(start, mid, point, heap, visited_index)
if mid + 1 < end:
distance1 = m.sqrt((point[mid].x - point[mid + 1].x) ** 2 + (point[mid].y - point[mid + 1].y) ** 2)
distance2 = m.sqrt((point[end].x - point[end - 1].x) ** 2 + (point[end].y - point[end - 1].y) ** 2)
if distance1 < distance2:
hq.heappush(heap, (distance1, ((point[mid].x, point[mid].y), (point[mid + 1].x, point[mid + 1].y))))
else:
hq.heappush(heap, (distance2, ((point[end].x, point[end].y), (point[end - 1].x, point[end - 1].y))))
visited_index.append(end)
visited_index.append(end - 1)
visited_index.append(mid)
visited_index.append(mid + 1)
find_min(mid, end, point, heap, visited_index)
x = []
num_points = 10
for i in range(num_points):
x.append((randint(- num_points << 2, num_points << 2), randint(- num_points << 2, num_points << 2)))
closest_point_problem(x)
:)
您只需将 closestSplitPair 函数 def 中的第七行从 best=(Sy[i],Sy[i+j])
更改为 best=dist(Sy[i],Sy[i+j])
,您将得到正确的 answer: ((94, 5), (99, -8), 13.92838827718412).
您错过了对 dist 函数的调用.
Padraic Cunningham 的回答指出这是第一个问题。
此致。