将 numpy 中的 Ramer–Douglas–Peucker (RDP) 算法修改为 return 掩码而不是值

Modify the Ramer–Douglas–Peucker (RDP) algorithm in numpy to return a mask instead of the values

我正在尝试将 numpy implementation of the Ramer–Douglas–Peucker (RDP) algorithm 修改为 return 过滤值的掩码,而不是具有这些值的过滤数组。

问题是如何递归地return正确的掩码。

嗯,有助于正确理解算法。如果每个递归调用中第一个和最后一个段之间的所有点都在 epsilon 内,则这些点应标记为 False,而开始和结束应为 True。这是我的解决方案:

import numpy as np

def pldist(x0, x1, x2):
    return np.divide(np.linalg.norm(np.linalg.det([x2 - x1, x1 - x0])),
                     np.linalg.norm(x2 - x1))

def rdp_index(M, epsilon=0, dist=pldist):

    dmax = 0.0
    index = -1

    for i in xrange(1, M.shape[0]):
        d = dist(M[i], M[0], M[-1])

        if d > dmax:
            index = i
            dmax = d

    if dmax > epsilon:
        r1 = rdp_index(M[:index + 1], epsilon, dist)
        r2 = rdp_index(M[index:], epsilon, dist)

        return np.vstack((r1[:-1], r2))
    else:
        part_mask = np.empty_like(M, dtype = bool)
        part_mask.fill(False)
        part_mask[0] = True
        part_mask[-1] = True
        return part_mask

请注意,此实现使用递归,并且此解决方案在处理大型数据集时出现问题 。 IE。超过了递归调用的最大数量。下面是一个使用堆栈和 while 循环而不是递归调用的解决方案。此外,距离的计算效率更高一些。

def dsquared_line_points(P1, P2, points):
    '''
    Calculate only squared distance, only needed for comparison
    http://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line
    '''
    xdiff = P2[0] - P1[0]
    ydiff = P2[1] - P1[1]
    nom  = (
        ydiff*points[:,0] - \
        xdiff*points[:,1] + \
        P2[0]*P1[1] - \
        P2[1]*P1[0]
    )**2
    denom = ydiff**2 + xdiff**2
    return np.divide(nom, denom)

def rdp_numpy(M, epsilon = 0):

    # initiate mask array
    # same amount of points
    mask = np.empty(M.shape[0], dtype = bool)

    # Assume all points are valid and falsify those which are found
    mask.fill(True)

    # The stack to select start and end index
    stack = [(0 , M.shape[0]-1)]

    while (len(stack) > 0):
        # Pop the last item
        (start, end) = stack.pop()

        # nothing to calculate if no points in between
        if end - start <= 1:
            continue

        # Calculate distance to points
        P1 = M[start]
        P2 = M[end]
        points = M[start + 1:end]
        dsq = dsquared_line_points(P1, P2, points)

        mask_eps = dsq > epsilon**2

        if mask_eps.any():
            # max point outside eps
            # Include index that was sliced out
            # Also include the start index to get absolute index
            # And not relative 
            mid = np.argmax(dsq) + 1 + start
            stack.append((start, mid))
            stack.append((mid, end))

        else:
            # Points in between are redundant
            mask[start + 1:end] = False

    return mask