用 mpi4py 替换多处理 pool.map

Question

我是使用 MPI 的初学者，我仍在阅读文档。然而，当谈到 mpi4py 时，几乎没有什么可做的。我编写了一个代码，目前在许多内核上使用多处理模块运行，但我需要用 mpi4py 替换它，以便我可以使用多个节点来运行我的代码。我的代码在下面，当使用多处理模块时，也没有。

使用多处理，

import numpy as np
import multiprocessing 


start_time = time.time()

E = 0.1
M = 5
n = 1000
G = 1
c = 1
stretch = [10, 1]


#Point-Distribution Generator Function 
def CDF_inv(x, e, m):
    A = 1/(1 + np.log(m/e))
    if x == 1:
        return m
    elif 0 <= x <= A:
        return e * x / A
    elif A < x < 1:
        return e * np.exp((x / A) - 1)

#Elliptical point distribution Generator Function

def get_coor_ellip(dist=CDF_inv, params=[E, M], stretch=stretch):
    R = dist(random.random(), *params)
    theta = random.random() * 2 * np.pi
    return (R * np.cos(theta) * stretch[0], R * np.sin(theta) * stretch[1])


def get_dist_sq(x_array, y_array):
    return x_array**2 + y_array**2


#Function to obtain alpha

def get_alpha(args):
    zeta_list_part, M_list_part, X, Y = args
    alpha_x = 0
    alpha_y = 0
    for key in range(len(M_list_part)):
        z_m_z_x = X - zeta_list_part[key][0]
        z_m_z_y = Y - zeta_list_part[key][1]
        dist_z_m_z = get_dist_sq(z_m_z_x, z_m_z_y)
        alpha_x += M_list_part[key] * z_m_z_x / dist_z_m_z
        alpha_y += M_list_part[key] * z_m_z_y / dist_z_m_z
    return (alpha_x, alpha_y)

#The part of the process containing the loop that needs to be parallelised, where I use pool.map()

if __name__ == '__main__':
    # n processes, scale accordingly
    num_processes = 10
    pool = multiprocessing.Pool(processes=num_processes)
    random_sample = [CDF_inv(x, E, M)
                     for x in [random.random() for e in range(n)]]
    zeta_list = [get_coor_ellip() for e in range(n)]
    x1, y1 = zip(*zeta_list)
    zeta_list = np.column_stack((np.array(x1), np.array(y1)))
    x = np.linspace(-3, 3, 100)
    y = np.linspace(-3, 3, 100)
    X, Y = np.meshgrid(x, y)
    print len(x)*len(y)*n,'calculations to be carried out.'
    M_list = np.array([.001 for i in range(n)])
    # split zeta_list, M_list, X, and Y
    zeta_list_split = np.array_split(zeta_list, num_processes, axis=0)
    M_list_split = np.array_split(M_list, num_processes)
    X_list = [X for e in range(num_processes)]
    Y_list = [Y for e in range(num_processes)]

    alpha_list = pool.map(
            get_alpha, zip(zeta_list_split, M_list_split, X_list, Y_list))
    alpha_x = 0  
    alpha_y = 0
    for e in alpha_list:
        alpha_x += e[0] * 4 * G / (c**2)
        alpha_y += e[1] * 4 * G / (c**2)

print("%f seconds" % (time.time() - start_time))

没有多处理，

import numpy as np


E = 0.1
M = 5
G = 1
c = 1
M_list = [.1 for i in range(n)]

#Point-Distribution Generator Function 

def CDF_inv(x, e, m):
    A = 1/(1 + np.log(m/e))
    if x == 1:
        return m
    elif 0 <= x <= A:
        return e * x / A
    elif A < x < 1:
        return e * np.exp((x / A) - 1)



n = 1000
random_sample = [CDF_inv(x, E, M)
                 for x in [random.random() for e in range(n)]]
stretch = [5, 2]

#Elliptical point distribution Generator Function

def get_coor_ellip(dist=CDF_inv, params=[E, M], stretch=stretch):
    R = dist(random.random(), *params)
    theta = random.random() * 2 * np.pi
    return (R * np.cos(theta) * stretch[0], R * np.sin(theta) * stretch[1])

#zeta_list is the list of coordinates of a distribution of points
zeta_list = [get_coor_ellip() for e in range(n)]
x1, y1 = zip(*zeta_list)
zeta_list = np.column_stack((np.array(x1), np.array(y1)))

#Creation of a X-Y Grid
x = np.linspace(-3, 3, 100)
y = np.linspace(-3, 3, 100)
X, Y = np.meshgrid(x, y)

def get_dist_sq(x_array, y_array):
    return x_array**2 + y_array**2


#Calculation of alpha, containing the loop that needs to be parallelised.

alpha_x = 0
alpha_y = 0
for key in range(len(M_list)):
    z_m_z_x = X - zeta_list[key][0]
    z_m_z_y = Y - zeta_list[key][1]
    dist_z_m_z = get_dist_sq(z_m_z_x, z_m_z_y)
    alpha_x += M_list[key] * z_m_z_x / dist_z_m_z
    alpha_y += M_list[key] * z_m_z_y / dist_z_m_z
alpha_x *= 4 * G / (c**2)
alpha_y *= 4 * G / (c**2)

基本上我的代码所做的是，它首先生成一个遵循特定分布的点列表。然后我应用一个方程，使用点之间的不同关系来获得数量 'alpha'。需要并行化的部分是涉及 alpha 计算的单个 for 循环。我想做的是使用 mpi4py 而不是 multiprocessing 来执行此操作，但我不确定如何进行。

Answer 1

可以使用 scatter / gather 将 multiprocessing.map 版本转换为 MPI。在您的情况下，您已经将输入列表准备为每个等级的一个块，这很有用。主要区别在于，所有代码首先由所有级别执行，因此您必须将所有应该由 master rank 0 完成的事情设置为条件。

if __name__ == '__main__':
    comm = MPI.COMM_WORLD
    if comm.rank == 0:
        random_sample = [CDF_inv(x, E, M)
                         for x in [random.random() for e in range(n)]]
        zeta_list = [get_coor_ellip() for e in range(n)]
        x1, y1 = zip(*zeta_list)
        zeta_list = np.column_stack((np.array(x1), np.array(y1)))
        x = np.linspace(-3, 3, 100)
        y = np.linspace(-3, 3, 100)
        X, Y = np.meshgrid(x, y)
        print len(x)*len(y)*n,'calculations to be carried out.'
        M_list = np.array([.001 for i in range(n)])
        # split zeta_list, M_list, X, and Y
        zeta_list_split = np.array_split(zeta_list, comm.size, axis=0)
        M_list_split = np.array_split(M_list, comm.size)
        X_list = [X for e in range(comm.size)]
        Y_list = [Y for e in range(comm.size)]
        work_list = list(zip(zeta_list_split, M_list_split, X_list, Y_list))
    else:
        work_list = None

    my_work = comm.scatter(work_list)
    my_alpha = get_alpha(my_work)

    alpha_list = comm.gather(my_alpha)
    if comm.rank == 0:
        alpha_x = 0  
        alpha_y = 0
        for e in alpha_list:
            alpha_x += e[0] * 4 * G / (c**2)
            alpha_y += e[1] * 4 * G / (c**2)

只要每个处理器获得相似的工作量，这就可以正常工作。如果通信成为问题，您可能希望在处理器之间拆分数据生成，而不是全部在主等级 0 上完成。

注意：有关代码的某些内容是伪造的，例如alpha_[xy] 最终变成 np.ndarray。串口版运行出错

Answer 2

对于仍然对类似主题感兴趣的人，我强烈建议您查看 MPIPoolExecutor() class here and the documentation is here.

用 mpi4py 替换多处理 pool.map

Replacing multiprocessing pool.map with mpi4py

python

multiprocessing

mpi4py