Cython 编译器因数组而崩溃——为什么?

Cython compiler crash with array -- why?

我正在尝试使用 memoryview 将以前的 python 代码转换为 cython,但我不断从第一行 bolo 行(靠近底部)收到编译器崩溃:

from __future__ import print_function
from builtins import range
from builtins import object
import cython
from cython.view cimport array as cvarray
from cpython cimport bool
from libc.math cimport round
from libc.stdlib cimport malloc, free
import numpy as np
cimport numpy as np

class LegCache(object):
def __init__(self):
    self.d = {}
    pass

def prep_legendre(self, n, polyorder):
    p = (n, polyorder)
    if p not in self.d:
        self.d[p] = prep_legendre(n, polyorder)
    return self.d[p]

@cython.boundscheck(False)
@cython.wraparound(False)
cdef prep_legendre(int n, int polyorder):
'''make array of legendre's'''
assert type(n) == int and type(polyorder) == int

cdef int[:,:] legendres = np.empty([n, polyorder + 1], dtype=int)
cdef int l = 0

legendres[:, 0] = np.ones(n)
if polyorder > 0:
    legendres[:, 1] = np.linspace(-1, 1, n)
for i in range(polyorder - 1):
    l = i + 1
    np.multiply(l/(l+1), legendres[:,l-1])

cdef double[:,:] q = np.empty([polyorder + 1, polyorder + 1], dtype=double)
cdef double[:,:] r = np.empty([n, polyorder + 1], dtype=double)
cdef double[:,:] qt = np.empty([polyorder+1, polyorder+1], dtype=double)   
cdef double[:,:] rinv = np.empty([polyorder + 1, n], dtype=double)

q, r = np.linalg.qr(legendres)
rinv = np.linalg.inv(r)
qt = q.T.copy()
return legendres, rinv, qt

def filter_slice_legendre_qr_mask_precalc(bolo,mask,legendres):
    m=legendres.shape[1]
    n=legendres.shape[0]
    l2 = legendres*np.tile(mask.reshape(n,1),[1,m])
    q,r=np.linalg.qr(l2)

rinv = np.linalg.inv(r)
p = np.dot(q.T,bolo)
coeff=np.dot(rinv,p)
out=bolo-np.dot(legendres,coeff)
return out,coeff

@cython.boundscheck(False)
@cython.wraparound(False)
cdef poly_filter_array(
    double[:,:] array,
    np.ndarray[DTYPE3_t, cast=True, ndim=2] mask_remove, # I think this casting should still work like this
    np.ndarray[DTYPE3_t, cast=True, ndim=2] mask,
    int[:] scan_list,
    int ibegin,
    int polyorder,
    double minfrac=.75):
""" writes over input array
"""
cdef double nold = -1
# do nothing
if polyorder < 0:
    return array
#damn, work
cdef int nch = array.shape[0]
cdef int nt = array.shape[1]
cdef int ns = len(scan_list)

cdef double[:,:,:] coeff_out = np.empty([nch, ns, nt], dtype = double)

legcache = LegCache()

cdef int istart = 0
cdef int n = 0
cdef int start = 0
cdef double mean = 0.0

cdef int[:,:] legendres = np.empty([n, polyorder + 1], dtype=int)
cdef double[:,:] qt = np.empty([polyorder+1, polyorder+1], dtype=double)   
cdef double[:,:] rinv = np.empty([polyorder + 1, n], dtype=double)

#cdef double[:,:] bolo # I think you can get away without giving it a value. bolo changes in size throughout the loop
cdef int[:] goodhits = np.empty(np.shape(mask)[1], dtype = int)
# I'm not sure about the size of these
cdef double[:,:] coeff = np.empty([]) # I can't remember how dot product work right now but this should be easy to sort out

# remove mean
if polyorder == 0:
    for s in range(len(scan_list)):
        istart, n = scan_list[s]
        start = istart - ibegin
        for i in range(nch):
            if np.any(mask[i, start:start + n]):
                mean = np.average(
                    array[i, start:start + n], weights=mask[i, start:start + n])
                array[i, start:start + n] -= mean
                coeff_out[i, s, 0] = mean

# other cases
if polyorder > 0:
    for s in range(len(scan_list)):
        istart, n = scan_list[s]
        start = istart - ibegin
        if n <= polyorder:  # otherwise cannot compute legendre polynomials
            for i in range(nch):
                mask[i, start:start + n] = 0  # flag it
                # remove this region from actual data as well
                mask_remove[i, start:start + n] = 0
                print('Not enough points (%d) to build legendre of order (%d)' % (n, polyorder))
            continue
        goodhits = np.sum(mask[:, start:start + n], axis=1)
        if n != nold:
            legendres, rinv, qt = legcache.prep_legendre(n, polyorder)
            rinvqt = np.dot(rinv, qt)
            nold = n
        # handle no masked ones

        for i in range(nch):
            if goodhits[i] != n:
                continue  # skip for now

            bolo[i, :] = array[i, start:start + n] #where problem starts
            coeff = np.dot(rinvqt, bolo)
            coeff_out[i, s, :] = coeff
            bolo -= np.dot(legendres, coeff)


        for i in range(nch):
            if goodhits[i] == n:
                continue  # skip since dealt with above
            if goodhits[i] < minfrac * n:  # not enough points
                mask[i, start:start + n] = 0  # flag it
                # remove this region from actual data as well
                mask_remove[i, start:start + n] = 0
                continue
            bolo, coeff = filter_slice_legendre_qr_mask_precalc(
                array[i, start:start + n], mask[i, start:start + n], legendres)
            array[i, start:start + n] = bolo
            coeff_out[i, s, :] = coeff
return coeff_out

当我尝试编译代码时,它引发了非特定错误 "Compiler crash in ExpandInplaceOperators"。我完全迷路了。

生成错误的最少代码不需要运行。它只需要足以生成错误消息即可。我一直在删除代码,直到剩下会生成错误消息的最小块,那就是这个

cdef poly_filter_array(
    double[:,:] array):

    cdef double mean = 0.0

    cdef int i =0

    array[i, :] -= mean

编译这段代码给出

Compiler crash in ExpandInplaceOperators

ModuleNode.body = StatListNode(cycrashdelme.pyx:7:5)
StatListNode.stats[0] = CFuncDefNode(cycrashdelme.pyx:7:5,
    args = [...]/1,
    modifiers = [...]/0,
    visibility = 'private')
CFuncDefNode.body = StatListNode(cycrashdelme.pyx:10:4)
StatListNode.stats[2] = InPlaceAssignmentNode(cycrashdelme.pyx:14:9,
    operator = '-')
File 'UtilNodes.py', line 146, in __init__: ResultRefNode(may_hold_none = True,
    result_is_used = True,
    use_managed_ref = True)

Compiler crash traceback from this point on:
  File "<some path on my computer>\lib\site-packages\Cython\Compiler\UtilNodes.py", line 146, in __init__
    assert self.pos is not None
AssertionError:

像这样的东西会是一个更好的例子来问这个问题...


这几乎告诉您问题出在 array[i,:] -= mean 行(在您的原始版本中我认为是 array[:,start:start+n])。

值得尝试一些简化版本,看看会发生什么:

array -= mean
array[i,:] = array[i,:] - mean

给予

Invalid operand types for '-' (double[:, :]; double)
Invalid operand types for '-' (double[:]; double)

分别为两行。所以我们知道问题是 Cython 内存视图不支持整个视图的算术(不过你可以对单个元素进行算术)。

你可以通过临时将其转换为 numpy 数组来进行算术运算

tmp_as_array = np.asarray(array[i,:])
tmp_as_array -= mean

(输入 tmp_as_array 没有速度优势)。这会修改现有数据,而不是复制它,您可以通过打印 tmp_as_array.owndata 来验证它应该是 False.


总结-

  1. 您不能在 Cython 内存视图上进行算术运算(它们仅提供数组存储,但并非旨在提供数学运算)。
  2. 您收到的错误消息非常无用,可能是一个错误,应该报告给 https://github.com/cython/cython/issues
  3. Make a copy of your code and then start deleting bits until you identify what's actually causing the error