使用 Cython 处理可变大小的列表

Question

我想对 python implementation of the Sutherland-Hogman algorithm 进行 cythonise。该算法根据非常简单的规则（在边的内部或外部等）更新顶点列表，但细节并不重要。这是 python 版本，它接受顺时针方向的多边形顶点列表。例如那些：

sP=[(50, 150),  (200, 50),  (350, 150), (350, 300), (250, 300), (200, 250), (150, 350),(100, 250), (100, 200)]
cP=[(100, 100), (300, 100), (300, 300), (100, 300)]

并计算它们的交集：

inter=clip(sP, cP)

这是在 rosettacode 上找到的代码，如果没有交集，稍微修改为 return 一个空列表。

def clip(subjectPolygon, clipPolygon):
   def inside(p):
      return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])

   def computeIntersection():
      dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ]
      dp = [ s[0] - e[0], s[1] - e[1] ]
      n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
      n2 = s[0] * e[1] - s[1] * e[0] 
      n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
      return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3]

   outputList = subjectPolygon
   cp1 = clipPolygon[-1]

   for clipVertex in clipPolygon:
      cp2 = clipVertex
      inputList = outputList
      outputList = []
      s = inputList[-1]

      for subjectVertex in inputList:
         e = subjectVertex
         if inside(e):
            if not inside(s):
               outputList.append(computeIntersection())
            outputList.append(e)
         elif inside(s):
            outputList.append(computeIntersection())
         s = e
      if len(outputList)<1:
          return []
      cp1 = cp2
   return(outputList)

这个函数对于我的应用程序来说非常慢，所以我尝试使用 numpy 对其进行 cythonize。这是我的 cython 版本。我必须在 clip 外部定义这两个函数，因为我有关于缓冲区输入的错误消息。

cython1

cimport cython
import numpy as np
cimport numpy as np

def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):


    outputList = list(subjectPolygon)
    cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
    cdef np.ndarray[np.float32_t, ndim=1] cp2 

    for i in xrange(clipPolygon.shape[0]):
       cp2 = clipPolygon[i]
       inputList = outputList
       outputList = []
       s = inputList[-1]

       for subjectVertex in inputList:
          e = subjectVertex
          if inside(e, cp1, cp2):
             if not inside(s, cp1, cp2):
                outputList.append(computeIntersection(cp1, cp2, e, s))
             outputList.append(e)
          elif inside(s, cp1, cp2):
             outputList.append(computeIntersection(cp1, cp2, e, s))
          s = e
       if len(outputList)<1:
         return []
       cp1 = cp2

    return(outputList)


def computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
    cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
    cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
    cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
    cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0] 
    cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
    cdef np.ndarray[np.float32_t, ndim=1] res=np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
    return res

def inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
    cdef bint b=(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
    return b

当我对这两个版本进行计时时，我只获得了两倍的加速，我需要至少 10 倍（或 100 倍！）。有事可做吗？如何用 Cython 处理列表？

编辑 1: 我听从了@DavidW 的建议，我分配了 numpy 数组并 trim 它们而不是使用列表，我现在正在使用 cdef 函数，这些函数应该带来 10 倍的加速，不幸的是我没有看到任何加速！

cython2

cimport cython
import numpy as np
cimport numpy as np

@cython.boundscheck(False)
@cython.wraparound(False)
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
    return clip_in_c(subjectPolygon, clipPolygon)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef np.ndarray[np.float32_t, ndim=2] clip_in_c(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):





    cdef int cp_size=clipPolygon.shape[0]
    cdef int outputList_effective_size=subjectPolygon.shape[0]
    cdef int inputList_effective_size=outputList_effective_size
    #We allocate a fixed size array of size 
    cdef int max_size_inter=outputList_effective_size*cp_size
    cdef int k=-1

    cdef np.ndarray[np.float32_t, ndim=2] outputList=np.empty((max_size_inter,2), dtype=np.float32)
    cdef np.ndarray[np.float32_t, ndim=2] inputList=np.empty((max_size_inter,2), dtype=np.float32)
    cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[cp_size-1,:]
    cdef np.ndarray[np.float32_t, ndim=1] cp2=np.empty((2,), dtype=np.float32)

    outputList[:outputList_effective_size]=subjectPolygon
    for i in xrange(cp_size):

        cp2 = clipPolygon[i]
        inputList[:outputList_effective_size] = outputList[:outputList_effective_size]
        inputList_effective_size=outputList_effective_size
        outputList_effective_size=0
        s = inputList[inputList_effective_size-1]

        for j in xrange(inputList_effective_size):
            e = inputList[j]
            if inside(e, cp1, cp2):
                if not inside(s, cp1, cp2):                    
                    k+=1
                    outputList[k]=computeIntersection(cp1, cp2, e, s)

                k+=1
                outputList[k]=e


            elif inside(s, cp1, cp2):
                k+=1
                outputList[k]=computeIntersection(cp1, cp2, e, s)

            s = e

        if k<0:
            return np.empty((0,0),dtype=np.float32)





        outputList_effective_size=k+1

        cp1 = cp2
        k=-1

    return outputList[:outputList_effective_size]


cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
    cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
    cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
    cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
    cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0] 
    cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
    return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)


cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
    return (cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])

这是基准测试：

import numpy as np
from cython1 import clip_cython1
from cython2 import clip_cython2
import time


sp=np.array([[50, 150],[200,50],[350,150],[250,300],[200,250],[150,350],[100,250],[100,200]],dtype=np.float32)
cp=np.array([[100,100],[300,100],[300,300],[100,300]],dtype=np.float32)

t1=time.time()
for i in xrange(120000):
    a=clip_cython1(sp, cp)
t2=time.time()
print (t2-t1)

t1=time.time()
for i in xrange(120000):
    a=clip_cython2(sp, cp)
t2=time.time()
print (t2-t1)

39.45

44.12

第二个更惨！

EDIT 2 来自 CodeReview 的 @Peter Taylor 的最佳答案使用了这样一个事实，即每次计算 inside_s 它都是多余的，因为 s=e 而你已经计算 inside_e（并从函数中分解出 dc 和 n1，但这并没有多大帮助）。

cimport cython
import numpy as np
cimport numpy as np


def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):


    outputList = list(subjectPolygon)
    cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
    cdef np.ndarray[np.float32_t, ndim=1] cp2 
    cdef bint inside_e, inside_s
    cdef np.float32_t n1
    cdef np.ndarray[np.float32_t, ndim=1] dc 
    cdef int i



    for i in range(clipPolygon.shape[0]):

       cp2 = clipPolygon[i]
       #intermediate
       n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
       dc=cp1-cp2
       inputList = outputList
       outputList = []
       s = inputList[-1]

       inside_s=inside(s, cp1, dc)
       for index, subjectVertex in enumerate(inputList):

           e = subjectVertex
           inside_e=inside(e, cp1, dc)
           if inside_e:

               if not inside_s:
                   outputList.append(computeIntersection(dc, n1, e, s))
               outputList.append(e)

           elif inside_s:
               outputList.append(computeIntersection(dc, n1, e, s))

           s = e
           inside_s=inside_e

       if len(outputList)<1:
           return []
       cp1 = cp2

    return(outputList)


cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] dc, np.float32_t n1, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
    cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
    cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0] 
    cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
    return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)


cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] dc):
    return (-dc[0])*(p[1]-cp1[1]) > (-dc[1])*(p[0]-cp1[0])

混合两个版本（只有 numpy 数组和@Peter Taylor 的技巧效果稍差）。不知道为什么？可能是因为我们必须分配一长串 sP.shape[0]*cp.shape[0] ?

Answer 1

在弄乱了你的 Cython 代码之后，我发现找到你的库已经在其他地方实现了要容易得多，所以检查一下 scikit-image 版本，它只是几行 Numpy 代码和你正在寻找的算法来自 matplotlib:

import numpy as np
from matplotlib import path, transforms

def polygon_clip(rp, cp, r0, c0, r1, c1):
"""Clip a polygon to the given bounding box.

Parameters
----------
rp, cp : (N,) ndarray of double
    Row and column coordinates of the polygon.
(r0, c0), (r1, c1) : double
    Top-left and bottom-right coordinates of the bounding box.

Returns
-------
r_clipped, c_clipped : (M,) ndarray of double
    Coordinates of clipped polygon.

Notes
-----
This makes use of Sutherland-Hodgman clipping as implemented in
AGG 2.4 and exposed in Matplotlib.

"""
    poly = path.Path(np.vstack((rp, cp)).T, closed=True)
    clip_rect = transforms.Bbox([[r0, c0], [r1, c1]])
    poly_clipped = poly.clip_to_bbox(clip_rect).to_polygons()[0]

    # This should be fixed in matplotlib >1.5
    if np.all(poly_clipped[-1] == poly_clipped[-2]):
        poly_clipped = poly_clipped[:-1]

    return poly_clipped[:, 0], poly_clipped[:, 1]

如果没有别的，上面的内容应该更简单地转换为 Cython。

[更新] 从其他 Cython 答案分析中尝试这个包，它已经实现了从 C++ 到 Python 的多边形裁剪，称为 https://pypi.python.org/pypi/pyclipper 用法：

导入pyclipper

主题 = ( ((180, 200), (260, 200), (260, 150), (180, 150)), ((215, 160), (230, 190), (200, 190)) )

剪辑 = ((190, 210), (240, 210), (240, 130), (190, 130))

pc = pyclipper.Pyclipper() pc.AddPath（剪辑，pyclipper.PT_CLIP，正确） pc.AddPaths(主题, pyclipper.PT_SUBJECT, 真)

解决方案 = pc.Execute(pyclipper.CT_INTERSECTION, pyclipper.PFT_EVENODD, pyclipper.PFT_EVENODD)

以上速度与我糟糕的 AMD 工作 PC BTW 9us 上的快速 Cython 代码答案大致相同。

Answer 2

我的速度提高了 15 倍：

In [12]: timeit clippy.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 126 µs per loop
In [13]: timeit clippy.clip1(clippy.sP, clippy.cP)
10000 loops, best of 3: 75.9 µs per loop
In [14]: timeit myclip.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 47.1 µs per loop
In [15]: timeit myclip.clip1(clippy.sP, clippy.cP)
100000 loops, best of 3: 8.2 µs per loop

clippy.clip是你原来的函数。

clippy.clip1 也是 Python，但用元组解包替换了大部分列表索引。

def clip1(subjectPolygon, clipPolygon):

   def inside(p0,p1):
      return(cp20-cp10)*(p1-cp11) > (cp21-cp11)*(p0-cp10)

   def computeIntersection():
      dc0, dc1 = cp10 - cp20, cp11 - cp21
      dp0, dp1 =  s0 - e0, s1 - e1
      n1 = cp10 * cp21 - cp11 * cp20
      n2 = s0 * e1 - s1 * e0
      n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
      return [(n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3]

   outputList = subjectPolygon
   cp10, cp11 = clipPolygon[-1]

   for cp20, cp21 in clipPolygon:

      inputList = outputList
      #print(inputList)
      outputList = []
      s0,s1 = inputList[-1]
      s_in = inside(s0, s1)
      for e0, e1  in inputList:
         e_in = inside(e0, e1)
         if e_in:
            if not s_in:
               outputList.append(computeIntersection())
            outputList.append((e0, e1))
         elif s_in:
            outputList.append(computeIntersection())
         s0,s1,s_in = e0,e1,e_in
      if len(outputList)<1:
          return []
      cp10, cp11 = cp20, cp21
   return outputList

myclip.clip原来是cythonized；仍在使用列表，而不是数组。

myclip.clip1 是第二个 cythonized:

cdef computeIntersection1(double cp10, double cp11, double cp20, double cp21,double s0, double s1,double e0, double e1):
  dc0, dc1 = cp10 - cp20, cp11 - cp21
  dp0, dp1 =  s0 - e0, s1 - e1
  n1 = cp10 * cp21 - cp11 * cp20
  n2 = s0 * e1 - s1 * e0
  n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
  return (n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3

cdef cclip1(subjectPolygon, clipPolygon):
   cdef double cp10, cp11, cp20, cp21
   cdef double s0, s1, e0, e1
   cdef double s_in, e_in

   outputList = subjectPolygon
   cp10, cp11 = clipPolygon[-1]

   for cp20, cp21 in clipPolygon:

      inputList = outputList
      #print(inputList)
      outputList = []
      s0,s1 = inputList[-1]
      #s_in = inside(s0, s1)
      s_in = (cp20-cp10)*(s1-cp11) - (cp21-cp11)*(s0-cp10)
      for e0, e1  in inputList:
         #e_in = inside(e0, e1)
         e_in = (cp20-cp10)*(e1-cp11) - (cp21-cp11)*(e0-cp10)
         if e_in>0:
            if s_in<=0:
               outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
            outputList.append((e0, e1))
         elif s_in>0:
            outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
         s0,s1,s_in = e0,e1,e_in
      if len(outputList)<1:
          return []
      cp10, cp11 = cp20, cp21
   return outputList

def clip1(subjectPolygon, clipPolygon):
    return cclip1(subjectPolygon, clipPolygon)

注解html的-a还是偏黄，但大部分计算不需要Python。在 compute 函数中有一个 Python 检查 0 除数，并且 Python 调用构建 return 元组。并且元组解包仍然调用Python。所以还有改进的空间。

在 Python 代码中，使用 numpy 没有任何优势。列表很小，列表元素访问速度更快。但在 cython 中，数组可能是 typed memoryviews 和纯 C 代码的垫脚石。

其他时间。

您的第二次编辑：

In [24]: timeit edit2.clip(np.array(clippy.sP,np.float32), np.array(clippy.cP,np
    ...: .float32))
1000 loops, best of 3: 228 µs per loop

@Matt's边界框

In [25]: timeit clippy.polygon_clip(clippy.rp,clippy.cp,100,100,300,300)
1000 loops, best of 3: 208 µs per loop

分机 class

我通过定义扩展来清理代码 class

cdef class Point:
    cdef public double x, y
    def __init__(self, x, y):
        self.x = x
        self.y = y

这让我可以这样写：

  s = inputList[-1]
  s_in =  insideP(s, cp1, cp2)

'cover' 函数必须将元组列表转换为点列表和 v.v。

sP = [Point(*x) for x in subjectPolygon]

这会有轻微的速度损失。

使用 Cython 处理可变大小的列表

Working with variable sized lists with Cython

python

algorithm

numpy

cython

cythonize

分机 class