在 Python 中高效地执行映射

Question

我有一些将数据映射到矩阵的代码...其中大部分都是设置好的，因此我的问题可以很容易地重现，但我唯一需要加快速度的部分是评论后的内容 # the part I want to speed up

import numpy as np
# config
matrix_height = 100
matrix_width  = 200

# fake data
x_data = np.array(range(10000))
y_data = [{i:i for i in range(100)}  for t in range(len(x_data))]

# fake mapping
x_to_index = {x: np.random.randint(matrix_width) for x in x_data }
y_to_index = {}
for y_dict in y_data:
  for y_key, y_val in y_dict.items():
    y_start = np.random.randint(matrix_height-2)
    y_to_index[y_key] = (y_start, y_start+2 )

# data that must be stored
total_matrix = np.zeros([matrix_height, matrix_width]).astype(int)
n_matrix     = np.zeros([matrix_height, matrix_width]).astype(int)
latest_value = np.zeros([matrix_height, matrix_width]).astype(int)

# the part I want to speed up
for x, y_dict in zip(x_data, y_data):
    x_index = x_to_index[x]
    for y_key, y_data in y_dict.items():
        y_slice = slice(*y_to_index[y_key])
        total_matrix[ y_slice, x_index ] += y_data
        latest_value[ y_slice, x_index ]  = y_data
        n_matrix[ y_slice, x_index ]     += 1

同样，我只关注评论下方的部分# the part I want to speed up。

我不确定如何加快速度，但似乎应该可以使用可以并行执行所有操作的映射函数...

我正在寻找最后一部分的显着改进。有什么想法吗？

Answer 1

根据核心数定制。

对于total_matrix，加法是可交换的。

对于 latest_value，以相反的顺序应用拆分列。

import numpy as np
import time
import multiprocessing as mp

def func(cols, zz, tm, nm, lv, setOrder):
    for c in cols:
        for t in zz:
            tm[slice(*t[0]), c] += t[1]
            lv[slice(*t[0]), c] = t[1]
            nm[slice(*t[0]), c] += 1
    return [tm, nm, lv, setOrder]

if __name__ == '__main__':
    mp.freeze_support()

    matrix_height = 100
    matrix_width = 200
    total_range = 10000

    x_data = np.array(range(total_range))
    y_data = [{i:i for i in range(matrix_height)} for t in range(len(x_data))]

    x_to_index = {x: np.random.randint(matrix_width) for x in x_data}
    # potential general purpose cols list
    #cols = np.random.randint(0, total_range, (1, total_range))[0]
    cols = [np.int(x_to_index[k]) for k in x_to_index]

    y_to_index = {}
    for y_dict in y_data:
        for y_key, y_val in y_dict.items():
            y_start = np.random.randint(matrix_height-2)
            y_to_index[y_key] = (y_start, y_start+2)

    # potential general purpose rows list
    #rows = [(np.random.randint(matrix_height), np.random.randint(matrix_height)) for x in range(matrix_height)]
    rows = [y_to_index[k] for k in y_to_index]

    # potential general purpose y data
    #y_dat = np.random.randint(0, matrix_height, (1, matrix_height))[0]
    y_dat = [i for i in range(matrix_height)]

    o_total_matrix = np.zeros([matrix_height, matrix_width]).astype(int)
    o_n_matrix     = np.zeros([matrix_height, matrix_width]).astype(int)
    o_latest_value = np.zeros([matrix_height, matrix_width]).astype(int)

    startTime = time.time()
    for x, y_dict in zip(x_data, y_data):
        x_index = x_to_index[x]
        for y_key, y_data in y_dict.items():
            y_slice = slice(*y_to_index[y_key])
            o_total_matrix[ y_slice, x_index ] += y_data
            o_latest_value[ y_slice, x_index ]  = y_data
            o_n_matrix[ y_slice, x_index ]     += 1
    print('original time was {0:5.2f} sec'.format(time.time() - startTime))

    procs = mp.cpu_count()

    i_tm = [np.zeros([matrix_height, matrix_width]).astype(int)] * procs
    i_nm = [np.zeros([matrix_height, matrix_width]).astype(int)] * procs
    i_lv = [np.zeros([matrix_height, matrix_width]).astype(int)] * procs

    zz = list(zip(rows, y_dat))

    procs_split = np.array_split(cols, procs)
    itup = []
    for x in range(procs):
        itup.append(((list(procs_split[x])), zz, i_tm[x], i_nm[x], i_lv[x], x))

    startTime = time.time()
    with mp.Pool(processes=procs) as pool:

        ret = pool.starmap(func, itup)
        i_total_matrix = ret[0][0]
        i_n_matrix = ret[0][1]
        for x in range(1, procs):
            i_total_matrix = np.add(i_total_matrix, ret[x][0])
            i_n_matrix = np.add(i_n_matrix, ret[x][1])

        colOrder = [0] * procs
        for x in range(procs):
            colOrder[x] = (procs-1) - ret[x][3]

        i_latest_value = ret[colOrder[0]][2]
        for x in range(1, procs):
            np.putmask(i_latest_value, i_latest_value == 0, ret[x][2])

    print('improved time was {0:5.2f} sec'.format(time.time() - startTime))
    comparison = i_total_matrix == o_total_matrix
    if not comparison.all():
        print('ERROR TOTAL MATRIX')
    comparison = i_n_matrix == o_n_matrix
    if not comparison.all():
        print('ERROR N MATRIX')
    comparison = i_latest_value == o_latest_value
    if not comparison.all():
        print('ERROR LATEST VALUE')

经过试用运行，结果显示大约。快两倍：

原时间为 7.12 秒

改进的时间为 2.29 秒

在 Python 中高效地执行映射

Execute mapping efficiently in Python

python

mapping

big-o

numpy