使用 Cython 处理可变大小的列表
Working with variable sized lists with Cython
我想对 python implementation of the Sutherland-Hogman algorithm 进行 cythonise。该算法根据非常简单的规则(在边的内部或外部等)更新顶点列表,但细节并不重要。这是 python 版本,它接受顺时针方向的多边形顶点列表。例如那些:
sP=[(50, 150), (200, 50), (350, 150), (350, 300), (250, 300), (200, 250), (150, 350),(100, 250), (100, 200)]
cP=[(100, 100), (300, 100), (300, 300), (100, 300)]
并计算它们的交集:
inter=clip(sP, cP)
这是在 rosettacode 上找到的代码,如果没有交集,稍微修改为 return 一个空列表。
def clip(subjectPolygon, clipPolygon):
def inside(p):
return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
def computeIntersection():
dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ]
dp = [ s[0] - e[0], s[1] - e[1] ]
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
n2 = s[0] * e[1] - s[1] * e[0]
n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3]
outputList = subjectPolygon
cp1 = clipPolygon[-1]
for clipVertex in clipPolygon:
cp2 = clipVertex
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e):
if not inside(s):
outputList.append(computeIntersection())
outputList.append(e)
elif inside(s):
outputList.append(computeIntersection())
s = e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
这个函数对于我的应用程序来说非常慢,所以我尝试使用 numpy 对其进行 cythonize。这是我的 cython 版本。我必须在 clip 外部定义这两个函数,因为我有关于缓冲区输入的错误消息。
cython1
cimport cython
import numpy as np
cimport numpy as np
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
outputList = list(subjectPolygon)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2
for i in xrange(clipPolygon.shape[0]):
cp2 = clipPolygon[i]
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e, cp1, cp2):
if not inside(s, cp1, cp2):
outputList.append(computeIntersection(cp1, cp2, e, s))
outputList.append(e)
elif inside(s, cp1, cp2):
outputList.append(computeIntersection(cp1, cp2, e, s))
s = e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
def computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
cdef np.ndarray[np.float32_t, ndim=1] res=np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
return res
def inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
cdef bint b=(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
return b
当我对这两个版本进行计时时,我只获得了两倍的加速,我需要至少 10 倍(或 100 倍!)。有事可做吗?
如何用 Cython 处理列表?
编辑 1: 我听从了@DavidW 的建议,我分配了 numpy 数组并 trim 它们而不是使用列表,我现在正在使用 cdef 函数,这些函数应该带来 10 倍的加速,不幸的是我没有看到任何加速!
cython2
cimport cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
return clip_in_c(subjectPolygon, clipPolygon)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef np.ndarray[np.float32_t, ndim=2] clip_in_c(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
cdef int cp_size=clipPolygon.shape[0]
cdef int outputList_effective_size=subjectPolygon.shape[0]
cdef int inputList_effective_size=outputList_effective_size
#We allocate a fixed size array of size
cdef int max_size_inter=outputList_effective_size*cp_size
cdef int k=-1
cdef np.ndarray[np.float32_t, ndim=2] outputList=np.empty((max_size_inter,2), dtype=np.float32)
cdef np.ndarray[np.float32_t, ndim=2] inputList=np.empty((max_size_inter,2), dtype=np.float32)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[cp_size-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2=np.empty((2,), dtype=np.float32)
outputList[:outputList_effective_size]=subjectPolygon
for i in xrange(cp_size):
cp2 = clipPolygon[i]
inputList[:outputList_effective_size] = outputList[:outputList_effective_size]
inputList_effective_size=outputList_effective_size
outputList_effective_size=0
s = inputList[inputList_effective_size-1]
for j in xrange(inputList_effective_size):
e = inputList[j]
if inside(e, cp1, cp2):
if not inside(s, cp1, cp2):
k+=1
outputList[k]=computeIntersection(cp1, cp2, e, s)
k+=1
outputList[k]=e
elif inside(s, cp1, cp2):
k+=1
outputList[k]=computeIntersection(cp1, cp2, e, s)
s = e
if k<0:
return np.empty((0,0),dtype=np.float32)
outputList_effective_size=k+1
cp1 = cp2
k=-1
return outputList[:outputList_effective_size]
cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
return (cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
这是基准测试:
import numpy as np
from cython1 import clip_cython1
from cython2 import clip_cython2
import time
sp=np.array([[50, 150],[200,50],[350,150],[250,300],[200,250],[150,350],[100,250],[100,200]],dtype=np.float32)
cp=np.array([[100,100],[300,100],[300,300],[100,300]],dtype=np.float32)
t1=time.time()
for i in xrange(120000):
a=clip_cython1(sp, cp)
t2=time.time()
print (t2-t1)
t1=time.time()
for i in xrange(120000):
a=clip_cython2(sp, cp)
t2=time.time()
print (t2-t1)
39.45
44.12
第二个更惨!
EDIT 2 来自 CodeReview 的 @Peter Taylor 的最佳答案使用了这样一个事实,即每次计算 inside_s 它都是多余的,因为 s=e 而你已经计算 inside_e(并从函数中分解出 dc 和 n1,但这并没有多大帮助)。
cimport cython
import numpy as np
cimport numpy as np
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
outputList = list(subjectPolygon)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2
cdef bint inside_e, inside_s
cdef np.float32_t n1
cdef np.ndarray[np.float32_t, ndim=1] dc
cdef int i
for i in range(clipPolygon.shape[0]):
cp2 = clipPolygon[i]
#intermediate
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
dc=cp1-cp2
inputList = outputList
outputList = []
s = inputList[-1]
inside_s=inside(s, cp1, dc)
for index, subjectVertex in enumerate(inputList):
e = subjectVertex
inside_e=inside(e, cp1, dc)
if inside_e:
if not inside_s:
outputList.append(computeIntersection(dc, n1, e, s))
outputList.append(e)
elif inside_s:
outputList.append(computeIntersection(dc, n1, e, s))
s = e
inside_s=inside_e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] dc, np.float32_t n1, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] dc):
return (-dc[0])*(p[1]-cp1[1]) > (-dc[1])*(p[0]-cp1[0])
混合两个版本(只有 numpy 数组和@Peter Taylor 的技巧效果稍差)。不知道为什么?可能是因为我们必须分配一长串 sP.shape[0]*cp.shape[0] ?
在弄乱了你的 Cython 代码之后,我发现找到你的库已经在其他地方实现了要容易得多,所以检查一下 scikit-image 版本,它只是几行 Numpy 代码和你正在寻找的算法来自 matplotlib:
import numpy as np
from matplotlib import path, transforms
def polygon_clip(rp, cp, r0, c0, r1, c1):
"""Clip a polygon to the given bounding box.
Parameters
----------
rp, cp : (N,) ndarray of double
Row and column coordinates of the polygon.
(r0, c0), (r1, c1) : double
Top-left and bottom-right coordinates of the bounding box.
Returns
-------
r_clipped, c_clipped : (M,) ndarray of double
Coordinates of clipped polygon.
Notes
-----
This makes use of Sutherland-Hodgman clipping as implemented in
AGG 2.4 and exposed in Matplotlib.
"""
poly = path.Path(np.vstack((rp, cp)).T, closed=True)
clip_rect = transforms.Bbox([[r0, c0], [r1, c1]])
poly_clipped = poly.clip_to_bbox(clip_rect).to_polygons()[0]
# This should be fixed in matplotlib >1.5
if np.all(poly_clipped[-1] == poly_clipped[-2]):
poly_clipped = poly_clipped[:-1]
return poly_clipped[:, 0], poly_clipped[:, 1]
如果没有别的,上面的内容应该更简单地转换为 Cython。
[更新]
从其他 Cython 答案分析中尝试这个包,它已经实现了从 C++ 到 Python 的多边形裁剪,称为 https://pypi.python.org/pypi/pyclipper 用法:
导入pyclipper
主题 = (
((180, 200), (260, 200), (260, 150), (180, 150)),
((215, 160), (230, 190), (200, 190))
)
剪辑 = ((190, 210), (240, 210), (240, 130), (190, 130))
pc = pyclipper.Pyclipper()
pc.AddPath(剪辑,pyclipper.PT_CLIP,正确)
pc.AddPaths(主题, pyclipper.PT_SUBJECT, 真)
解决方案 = pc.Execute(pyclipper.CT_INTERSECTION, pyclipper.PFT_EVENODD, pyclipper.PFT_EVENODD)
以上速度与我糟糕的 AMD 工作 PC BTW 9us 上的快速 Cython 代码答案大致相同。
我的速度提高了 15 倍:
In [12]: timeit clippy.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 126 µs per loop
In [13]: timeit clippy.clip1(clippy.sP, clippy.cP)
10000 loops, best of 3: 75.9 µs per loop
In [14]: timeit myclip.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 47.1 µs per loop
In [15]: timeit myclip.clip1(clippy.sP, clippy.cP)
100000 loops, best of 3: 8.2 µs per loop
clippy.clip
是你原来的函数。
clippy.clip1
也是 Python,但用元组解包替换了大部分列表索引。
def clip1(subjectPolygon, clipPolygon):
def inside(p0,p1):
return(cp20-cp10)*(p1-cp11) > (cp21-cp11)*(p0-cp10)
def computeIntersection():
dc0, dc1 = cp10 - cp20, cp11 - cp21
dp0, dp1 = s0 - e0, s1 - e1
n1 = cp10 * cp21 - cp11 * cp20
n2 = s0 * e1 - s1 * e0
n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
return [(n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3]
outputList = subjectPolygon
cp10, cp11 = clipPolygon[-1]
for cp20, cp21 in clipPolygon:
inputList = outputList
#print(inputList)
outputList = []
s0,s1 = inputList[-1]
s_in = inside(s0, s1)
for e0, e1 in inputList:
e_in = inside(e0, e1)
if e_in:
if not s_in:
outputList.append(computeIntersection())
outputList.append((e0, e1))
elif s_in:
outputList.append(computeIntersection())
s0,s1,s_in = e0,e1,e_in
if len(outputList)<1:
return []
cp10, cp11 = cp20, cp21
return outputList
myclip.clip
原来是cythonized
;仍在使用列表,而不是数组。
myclip.clip1
是第二个 cythonized
:
cdef computeIntersection1(double cp10, double cp11, double cp20, double cp21,double s0, double s1,double e0, double e1):
dc0, dc1 = cp10 - cp20, cp11 - cp21
dp0, dp1 = s0 - e0, s1 - e1
n1 = cp10 * cp21 - cp11 * cp20
n2 = s0 * e1 - s1 * e0
n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
return (n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3
cdef cclip1(subjectPolygon, clipPolygon):
cdef double cp10, cp11, cp20, cp21
cdef double s0, s1, e0, e1
cdef double s_in, e_in
outputList = subjectPolygon
cp10, cp11 = clipPolygon[-1]
for cp20, cp21 in clipPolygon:
inputList = outputList
#print(inputList)
outputList = []
s0,s1 = inputList[-1]
#s_in = inside(s0, s1)
s_in = (cp20-cp10)*(s1-cp11) - (cp21-cp11)*(s0-cp10)
for e0, e1 in inputList:
#e_in = inside(e0, e1)
e_in = (cp20-cp10)*(e1-cp11) - (cp21-cp11)*(e0-cp10)
if e_in>0:
if s_in<=0:
outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
outputList.append((e0, e1))
elif s_in>0:
outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
s0,s1,s_in = e0,e1,e_in
if len(outputList)<1:
return []
cp10, cp11 = cp20, cp21
return outputList
def clip1(subjectPolygon, clipPolygon):
return cclip1(subjectPolygon, clipPolygon)
注解html
的-a
还是偏黄,但大部分计算不需要Python。在 compute
函数中有一个 Python 检查 0 除数,并且 Python 调用构建 return 元组。并且元组解包仍然调用Python。所以还有改进的空间。
在 Python 代码中,使用 numpy
没有任何优势。列表很小,列表元素访问速度更快。但在 cython
中,数组可能是 typed memoryviews
和纯 C 代码的垫脚石。
其他时间。
您的第二次编辑:
In [24]: timeit edit2.clip(np.array(clippy.sP,np.float32), np.array(clippy.cP,np
...: .float32))
1000 loops, best of 3: 228 µs per loop
@Matt's
边界框
In [25]: timeit clippy.polygon_clip(clippy.rp,clippy.cp,100,100,300,300)
1000 loops, best of 3: 208 µs per loop
分机 class
我通过定义扩展来清理代码 class
cdef class Point:
cdef public double x, y
def __init__(self, x, y):
self.x = x
self.y = y
这让我可以这样写:
s = inputList[-1]
s_in = insideP(s, cp1, cp2)
'cover' 函数必须将元组列表转换为点列表和 v.v。
sP = [Point(*x) for x in subjectPolygon]
这会有轻微的速度损失。
我想对 python implementation of the Sutherland-Hogman algorithm 进行 cythonise。该算法根据非常简单的规则(在边的内部或外部等)更新顶点列表,但细节并不重要。这是 python 版本,它接受顺时针方向的多边形顶点列表。例如那些:
sP=[(50, 150), (200, 50), (350, 150), (350, 300), (250, 300), (200, 250), (150, 350),(100, 250), (100, 200)]
cP=[(100, 100), (300, 100), (300, 300), (100, 300)]
并计算它们的交集:
inter=clip(sP, cP)
这是在 rosettacode 上找到的代码,如果没有交集,稍微修改为 return 一个空列表。
def clip(subjectPolygon, clipPolygon):
def inside(p):
return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
def computeIntersection():
dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ]
dp = [ s[0] - e[0], s[1] - e[1] ]
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
n2 = s[0] * e[1] - s[1] * e[0]
n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3]
outputList = subjectPolygon
cp1 = clipPolygon[-1]
for clipVertex in clipPolygon:
cp2 = clipVertex
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e):
if not inside(s):
outputList.append(computeIntersection())
outputList.append(e)
elif inside(s):
outputList.append(computeIntersection())
s = e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
这个函数对于我的应用程序来说非常慢,所以我尝试使用 numpy 对其进行 cythonize。这是我的 cython 版本。我必须在 clip 外部定义这两个函数,因为我有关于缓冲区输入的错误消息。
cython1
cimport cython
import numpy as np
cimport numpy as np
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
outputList = list(subjectPolygon)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2
for i in xrange(clipPolygon.shape[0]):
cp2 = clipPolygon[i]
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e, cp1, cp2):
if not inside(s, cp1, cp2):
outputList.append(computeIntersection(cp1, cp2, e, s))
outputList.append(e)
elif inside(s, cp1, cp2):
outputList.append(computeIntersection(cp1, cp2, e, s))
s = e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
def computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
cdef np.ndarray[np.float32_t, ndim=1] res=np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
return res
def inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
cdef bint b=(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
return b
当我对这两个版本进行计时时,我只获得了两倍的加速,我需要至少 10 倍(或 100 倍!)。有事可做吗? 如何用 Cython 处理列表?
编辑 1: 我听从了@DavidW 的建议,我分配了 numpy 数组并 trim 它们而不是使用列表,我现在正在使用 cdef 函数,这些函数应该带来 10 倍的加速,不幸的是我没有看到任何加速!
cython2
cimport cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
return clip_in_c(subjectPolygon, clipPolygon)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef np.ndarray[np.float32_t, ndim=2] clip_in_c(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
cdef int cp_size=clipPolygon.shape[0]
cdef int outputList_effective_size=subjectPolygon.shape[0]
cdef int inputList_effective_size=outputList_effective_size
#We allocate a fixed size array of size
cdef int max_size_inter=outputList_effective_size*cp_size
cdef int k=-1
cdef np.ndarray[np.float32_t, ndim=2] outputList=np.empty((max_size_inter,2), dtype=np.float32)
cdef np.ndarray[np.float32_t, ndim=2] inputList=np.empty((max_size_inter,2), dtype=np.float32)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[cp_size-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2=np.empty((2,), dtype=np.float32)
outputList[:outputList_effective_size]=subjectPolygon
for i in xrange(cp_size):
cp2 = clipPolygon[i]
inputList[:outputList_effective_size] = outputList[:outputList_effective_size]
inputList_effective_size=outputList_effective_size
outputList_effective_size=0
s = inputList[inputList_effective_size-1]
for j in xrange(inputList_effective_size):
e = inputList[j]
if inside(e, cp1, cp2):
if not inside(s, cp1, cp2):
k+=1
outputList[k]=computeIntersection(cp1, cp2, e, s)
k+=1
outputList[k]=e
elif inside(s, cp1, cp2):
k+=1
outputList[k]=computeIntersection(cp1, cp2, e, s)
s = e
if k<0:
return np.empty((0,0),dtype=np.float32)
outputList_effective_size=k+1
cp1 = cp2
k=-1
return outputList[:outputList_effective_size]
cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
return (cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
这是基准测试:
import numpy as np
from cython1 import clip_cython1
from cython2 import clip_cython2
import time
sp=np.array([[50, 150],[200,50],[350,150],[250,300],[200,250],[150,350],[100,250],[100,200]],dtype=np.float32)
cp=np.array([[100,100],[300,100],[300,300],[100,300]],dtype=np.float32)
t1=time.time()
for i in xrange(120000):
a=clip_cython1(sp, cp)
t2=time.time()
print (t2-t1)
t1=time.time()
for i in xrange(120000):
a=clip_cython2(sp, cp)
t2=time.time()
print (t2-t1)
39.45
44.12
第二个更惨!
EDIT 2 来自 CodeReview 的 @Peter Taylor 的最佳答案使用了这样一个事实,即每次计算 inside_s 它都是多余的,因为 s=e 而你已经计算 inside_e(并从函数中分解出 dc 和 n1,但这并没有多大帮助)。
cimport cython
import numpy as np
cimport numpy as np
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
outputList = list(subjectPolygon)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2
cdef bint inside_e, inside_s
cdef np.float32_t n1
cdef np.ndarray[np.float32_t, ndim=1] dc
cdef int i
for i in range(clipPolygon.shape[0]):
cp2 = clipPolygon[i]
#intermediate
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
dc=cp1-cp2
inputList = outputList
outputList = []
s = inputList[-1]
inside_s=inside(s, cp1, dc)
for index, subjectVertex in enumerate(inputList):
e = subjectVertex
inside_e=inside(e, cp1, dc)
if inside_e:
if not inside_s:
outputList.append(computeIntersection(dc, n1, e, s))
outputList.append(e)
elif inside_s:
outputList.append(computeIntersection(dc, n1, e, s))
s = e
inside_s=inside_e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] dc, np.float32_t n1, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] dc):
return (-dc[0])*(p[1]-cp1[1]) > (-dc[1])*(p[0]-cp1[0])
混合两个版本(只有 numpy 数组和@Peter Taylor 的技巧效果稍差)。不知道为什么?可能是因为我们必须分配一长串 sP.shape[0]*cp.shape[0] ?
在弄乱了你的 Cython 代码之后,我发现找到你的库已经在其他地方实现了要容易得多,所以检查一下 scikit-image 版本,它只是几行 Numpy 代码和你正在寻找的算法来自 matplotlib:
import numpy as np
from matplotlib import path, transforms
def polygon_clip(rp, cp, r0, c0, r1, c1):
"""Clip a polygon to the given bounding box.
Parameters
----------
rp, cp : (N,) ndarray of double
Row and column coordinates of the polygon.
(r0, c0), (r1, c1) : double
Top-left and bottom-right coordinates of the bounding box.
Returns
-------
r_clipped, c_clipped : (M,) ndarray of double
Coordinates of clipped polygon.
Notes
-----
This makes use of Sutherland-Hodgman clipping as implemented in
AGG 2.4 and exposed in Matplotlib.
"""
poly = path.Path(np.vstack((rp, cp)).T, closed=True)
clip_rect = transforms.Bbox([[r0, c0], [r1, c1]])
poly_clipped = poly.clip_to_bbox(clip_rect).to_polygons()[0]
# This should be fixed in matplotlib >1.5
if np.all(poly_clipped[-1] == poly_clipped[-2]):
poly_clipped = poly_clipped[:-1]
return poly_clipped[:, 0], poly_clipped[:, 1]
如果没有别的,上面的内容应该更简单地转换为 Cython。
[更新] 从其他 Cython 答案分析中尝试这个包,它已经实现了从 C++ 到 Python 的多边形裁剪,称为 https://pypi.python.org/pypi/pyclipper 用法:
导入pyclipper
主题 = ( ((180, 200), (260, 200), (260, 150), (180, 150)), ((215, 160), (230, 190), (200, 190)) )
剪辑 = ((190, 210), (240, 210), (240, 130), (190, 130))
pc = pyclipper.Pyclipper() pc.AddPath(剪辑,pyclipper.PT_CLIP,正确) pc.AddPaths(主题, pyclipper.PT_SUBJECT, 真)
解决方案 = pc.Execute(pyclipper.CT_INTERSECTION, pyclipper.PFT_EVENODD, pyclipper.PFT_EVENODD)
以上速度与我糟糕的 AMD 工作 PC BTW 9us 上的快速 Cython 代码答案大致相同。
我的速度提高了 15 倍:
In [12]: timeit clippy.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 126 µs per loop
In [13]: timeit clippy.clip1(clippy.sP, clippy.cP)
10000 loops, best of 3: 75.9 µs per loop
In [14]: timeit myclip.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 47.1 µs per loop
In [15]: timeit myclip.clip1(clippy.sP, clippy.cP)
100000 loops, best of 3: 8.2 µs per loop
clippy.clip
是你原来的函数。
clippy.clip1
也是 Python,但用元组解包替换了大部分列表索引。
def clip1(subjectPolygon, clipPolygon):
def inside(p0,p1):
return(cp20-cp10)*(p1-cp11) > (cp21-cp11)*(p0-cp10)
def computeIntersection():
dc0, dc1 = cp10 - cp20, cp11 - cp21
dp0, dp1 = s0 - e0, s1 - e1
n1 = cp10 * cp21 - cp11 * cp20
n2 = s0 * e1 - s1 * e0
n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
return [(n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3]
outputList = subjectPolygon
cp10, cp11 = clipPolygon[-1]
for cp20, cp21 in clipPolygon:
inputList = outputList
#print(inputList)
outputList = []
s0,s1 = inputList[-1]
s_in = inside(s0, s1)
for e0, e1 in inputList:
e_in = inside(e0, e1)
if e_in:
if not s_in:
outputList.append(computeIntersection())
outputList.append((e0, e1))
elif s_in:
outputList.append(computeIntersection())
s0,s1,s_in = e0,e1,e_in
if len(outputList)<1:
return []
cp10, cp11 = cp20, cp21
return outputList
myclip.clip
原来是cythonized
;仍在使用列表,而不是数组。
myclip.clip1
是第二个 cythonized
:
cdef computeIntersection1(double cp10, double cp11, double cp20, double cp21,double s0, double s1,double e0, double e1):
dc0, dc1 = cp10 - cp20, cp11 - cp21
dp0, dp1 = s0 - e0, s1 - e1
n1 = cp10 * cp21 - cp11 * cp20
n2 = s0 * e1 - s1 * e0
n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
return (n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3
cdef cclip1(subjectPolygon, clipPolygon):
cdef double cp10, cp11, cp20, cp21
cdef double s0, s1, e0, e1
cdef double s_in, e_in
outputList = subjectPolygon
cp10, cp11 = clipPolygon[-1]
for cp20, cp21 in clipPolygon:
inputList = outputList
#print(inputList)
outputList = []
s0,s1 = inputList[-1]
#s_in = inside(s0, s1)
s_in = (cp20-cp10)*(s1-cp11) - (cp21-cp11)*(s0-cp10)
for e0, e1 in inputList:
#e_in = inside(e0, e1)
e_in = (cp20-cp10)*(e1-cp11) - (cp21-cp11)*(e0-cp10)
if e_in>0:
if s_in<=0:
outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
outputList.append((e0, e1))
elif s_in>0:
outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
s0,s1,s_in = e0,e1,e_in
if len(outputList)<1:
return []
cp10, cp11 = cp20, cp21
return outputList
def clip1(subjectPolygon, clipPolygon):
return cclip1(subjectPolygon, clipPolygon)
注解html
的-a
还是偏黄,但大部分计算不需要Python。在 compute
函数中有一个 Python 检查 0 除数,并且 Python 调用构建 return 元组。并且元组解包仍然调用Python。所以还有改进的空间。
在 Python 代码中,使用 numpy
没有任何优势。列表很小,列表元素访问速度更快。但在 cython
中,数组可能是 typed memoryviews
和纯 C 代码的垫脚石。
其他时间。
您的第二次编辑:
In [24]: timeit edit2.clip(np.array(clippy.sP,np.float32), np.array(clippy.cP,np
...: .float32))
1000 loops, best of 3: 228 µs per loop
@Matt's
边界框
In [25]: timeit clippy.polygon_clip(clippy.rp,clippy.cp,100,100,300,300)
1000 loops, best of 3: 208 µs per loop
分机 class
我通过定义扩展来清理代码 class
cdef class Point:
cdef public double x, y
def __init__(self, x, y):
self.x = x
self.y = y
这让我可以这样写:
s = inputList[-1]
s_in = insideP(s, cp1, cp2)
'cover' 函数必须将元组列表转换为点列表和 v.v。
sP = [Point(*x) for x in subjectPolygon]
这会有轻微的速度损失。