将 numpy 中的 Ramer–Douglas–Peucker (RDP) 算法修改为 return 掩码而不是值
Modify the Ramer–Douglas–Peucker (RDP) algorithm in numpy to return a mask instead of the values
我正在尝试将 numpy implementation of the Ramer–Douglas–Peucker (RDP) algorithm 修改为 return 过滤值的掩码,而不是具有这些值的过滤数组。
问题是如何递归地return正确的掩码。
嗯,有助于正确理解算法。如果每个递归调用中第一个和最后一个段之间的所有点都在 epsilon 内,则这些点应标记为 False,而开始和结束应为 True。这是我的解决方案:
import numpy as np
def pldist(x0, x1, x2):
return np.divide(np.linalg.norm(np.linalg.det([x2 - x1, x1 - x0])),
np.linalg.norm(x2 - x1))
def rdp_index(M, epsilon=0, dist=pldist):
dmax = 0.0
index = -1
for i in xrange(1, M.shape[0]):
d = dist(M[i], M[0], M[-1])
if d > dmax:
index = i
dmax = d
if dmax > epsilon:
r1 = rdp_index(M[:index + 1], epsilon, dist)
r2 = rdp_index(M[index:], epsilon, dist)
return np.vstack((r1[:-1], r2))
else:
part_mask = np.empty_like(M, dtype = bool)
part_mask.fill(False)
part_mask[0] = True
part_mask[-1] = True
return part_mask
请注意,此实现使用递归,并且此解决方案在处理大型数据集时出现问题 。 IE。超过了递归调用的最大数量。下面是一个使用堆栈和 while 循环而不是递归调用的解决方案。此外,距离的计算效率更高一些。
def dsquared_line_points(P1, P2, points):
'''
Calculate only squared distance, only needed for comparison
http://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
'''
xdiff = P2[0] - P1[0]
ydiff = P2[1] - P1[1]
nom = (
ydiff*points[:,0] - \
xdiff*points[:,1] + \
P2[0]*P1[1] - \
P2[1]*P1[0]
)**2
denom = ydiff**2 + xdiff**2
return np.divide(nom, denom)
def rdp_numpy(M, epsilon = 0):
# initiate mask array
# same amount of points
mask = np.empty(M.shape[0], dtype = bool)
# Assume all points are valid and falsify those which are found
mask.fill(True)
# The stack to select start and end index
stack = [(0 , M.shape[0]-1)]
while (len(stack) > 0):
# Pop the last item
(start, end) = stack.pop()
# nothing to calculate if no points in between
if end - start <= 1:
continue
# Calculate distance to points
P1 = M[start]
P2 = M[end]
points = M[start + 1:end]
dsq = dsquared_line_points(P1, P2, points)
mask_eps = dsq > epsilon**2
if mask_eps.any():
# max point outside eps
# Include index that was sliced out
# Also include the start index to get absolute index
# And not relative
mid = np.argmax(dsq) + 1 + start
stack.append((start, mid))
stack.append((mid, end))
else:
# Points in between are redundant
mask[start + 1:end] = False
return mask
我正在尝试将 numpy implementation of the Ramer–Douglas–Peucker (RDP) algorithm 修改为 return 过滤值的掩码,而不是具有这些值的过滤数组。
问题是如何递归地return正确的掩码。
嗯,有助于正确理解算法。如果每个递归调用中第一个和最后一个段之间的所有点都在 epsilon 内,则这些点应标记为 False,而开始和结束应为 True。这是我的解决方案:
import numpy as np
def pldist(x0, x1, x2):
return np.divide(np.linalg.norm(np.linalg.det([x2 - x1, x1 - x0])),
np.linalg.norm(x2 - x1))
def rdp_index(M, epsilon=0, dist=pldist):
dmax = 0.0
index = -1
for i in xrange(1, M.shape[0]):
d = dist(M[i], M[0], M[-1])
if d > dmax:
index = i
dmax = d
if dmax > epsilon:
r1 = rdp_index(M[:index + 1], epsilon, dist)
r2 = rdp_index(M[index:], epsilon, dist)
return np.vstack((r1[:-1], r2))
else:
part_mask = np.empty_like(M, dtype = bool)
part_mask.fill(False)
part_mask[0] = True
part_mask[-1] = True
return part_mask
请注意,此实现使用递归,并且此解决方案在处理大型数据集时出现问题 。 IE。超过了递归调用的最大数量。下面是一个使用堆栈和 while 循环而不是递归调用的解决方案。此外,距离的计算效率更高一些。
def dsquared_line_points(P1, P2, points):
'''
Calculate only squared distance, only needed for comparison
http://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
'''
xdiff = P2[0] - P1[0]
ydiff = P2[1] - P1[1]
nom = (
ydiff*points[:,0] - \
xdiff*points[:,1] + \
P2[0]*P1[1] - \
P2[1]*P1[0]
)**2
denom = ydiff**2 + xdiff**2
return np.divide(nom, denom)
def rdp_numpy(M, epsilon = 0):
# initiate mask array
# same amount of points
mask = np.empty(M.shape[0], dtype = bool)
# Assume all points are valid and falsify those which are found
mask.fill(True)
# The stack to select start and end index
stack = [(0 , M.shape[0]-1)]
while (len(stack) > 0):
# Pop the last item
(start, end) = stack.pop()
# nothing to calculate if no points in between
if end - start <= 1:
continue
# Calculate distance to points
P1 = M[start]
P2 = M[end]
points = M[start + 1:end]
dsq = dsquared_line_points(P1, P2, points)
mask_eps = dsq > epsilon**2
if mask_eps.any():
# max point outside eps
# Include index that was sliced out
# Also include the start index to get absolute index
# And not relative
mid = np.argmax(dsq) + 1 + start
stack.append((start, mid))
stack.append((mid, end))
else:
# Points in between are redundant
mask[start + 1:end] = False
return mask