用 MPI_Send 和 MPI_Recv 实现 MPI_Reduce 会导致错误的结果

Implementing MPI_Reduce with MPI_Send and MPI_Recv leads to wrong results

我正在开发一个使用 MPI_Send()MPI_Recv() 来替换 MPI_Reduce() 的程序。

除了代码的最后几位,它给出了 PI 近似值、误差和 运行 时间,我得到了所有内容 运行。我也没有收到正确的总和值。

我认为 MPI_Recv() 端出了问题,但我可能是错的。 运行 宁此时,我只使用 2 个处理器。使用 MPI_Reduce.

时,程序在没有将 PI 初始化为值的情况下工作正常
#include "mpi.h"
#include <stdio.h>
#include <math.h>
 
int main( int argc, char *argv[])
{
    int n, i;
    double PI25DT = 3.141592653589793238462643;
    double pi, h, sum, x;
 
    int size, rank;
    double startTime, endTime;
 
    /* Initialize MPI and get number of processes and my number or rank*/
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&size);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
 
    /* Processor zero sets the number of intervals and starts its clock*/
    if (rank==0)
    {
       n=600000000;
       startTime=MPI_Wtime();
       for (int i = 0; i < size; i++) {
           if (i != rank) {
               MPI_Send(&n, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
           }
       }
    } 
    /* Broadcast number of intervals to all processes */
    else 
    {
        MPI_Recv(&n, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
 
    /* Calculate the width of intervals */
    h   = 1.0 / (double) n;
 
    /* Initialize sum */
    sum = 0.0;
    /* Step over each inteval I own */
    for (i = rank+1; i <= n; i += size)
    {
        /* Calculate midpoint of interval */
        x = h * ((double)i - 0.5);
        /* Add rectangle's area = height*width = f(x)*h */
        sum += (4.0/(1.0+x*x))*h;
    }
    /* Get sum total on processor zero */
    //MPI_Reduce(&sum,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
    MPI_Send(&pi, 1, MPI_SUM, 0, 0, MPI_COMM_WORLD);
    
    if (rank == 0) 
    {
        double total_sum = 0;
        for (int i = 0; i < size; i++) 
        {
            MPI_Recv(&sum, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            MPI_Recv(&pi, 1, MPI_SUM, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            total_sum += sum;
            printf("Total Sum is %lf\n", total_sum);
        }
    }
    
    /* Print approximate value of pi and runtime*/
    if (rank==0)
    {
       printf("pi is approximately %.16f, Error is %e\n",
                       pi, fabs(pi - PI25DT));
       endTime=MPI_Wtime();
       printf("runtime is=%.16f",endTime-startTime);
    }
    MPI_Finalize();
    return 0;
}

这个

MPI_Send(&pi, 1, MPI_SUM, 0, 0, MPI_COMM_WORLD);
                 ^^^^^^^

 MPI_Recv(&pi, 1, MPI_SUM, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
                  ^^^^^^^

错误的 MPI_Send and MPI_Recv 期望作为第三个参数 MPI_Datatype 而不是 MPI_OPMPI_SUM)。

但是查看您的代码,您真正想要做的是将这些调用替换为:

double pi = sum;
if (myid != 0) {
        MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
}
else {
    for (int i = 1; i < numprocs; i++) {
        MPI_Recv(&value, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        pi += value;
        }
}

替换MPI_Reduce的行为。

一个运行例子:

#include "mpi.h"
#include <stdio.h>
#include <math.h>
 
int main( int argc, char *argv[])
{
    int n, i;
    double PI25DT = 3.141592653589793238462643;
    double h, sum, x;
 
    int numprocs, myid;
    double startTime, endTime;
 
    /* Initialize MPI and get number of processes and my number or rank*/
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&myid);
 
    /* Processor zero sets the number of intervals and starts its clock*/
    if (myid==0) {
       n=600000000;
       startTime=MPI_Wtime();
       for (int i = 0; i < numprocs; i++) {
           if (i != myid) {
               MPI_Send(&n, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
           }
       }
    } 
    else {
        MPI_Recv(&n, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    }
 
    /* Calculate the width of intervals */
    h   = 1.0 / (double) n;
 
    /* Initialize sum */
    sum = 0.0;
    /* Step over each inteval I own */
    for (i = myid+1; i <= n; i += numprocs) {
        /* Calculate midpoint of interval */
        x = h * ((double)i - 0.5);
        /* Add rectangle's area = height*width = f(x)*h */
        sum += (4.0/(1.0+x*x))*h;
    }
    /* Get sum total on processor zero */
    //MPI_Reduce(&sum,&pi,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    double value = 0;
    double pi = sum;
    if (myid != 0) {
            MPI_Send(&sum, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
    }
    else {
        for (int i = 1; i < numprocs; i++) {
            MPI_Recv(&value, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            pi += value;
            }
    }
    
    /* Print approximate value of pi and runtime*/
    if (myid==0) {
       printf("pi is approximately %.16f, Error is %e\n",
                       pi, fabs(pi - PI25DT));
       endTime=MPI_Wtime();
       printf("runtime is=%.16f",endTime-startTime);
    }
    MPI_Finalize();
    return 0;
}

输出(2个进程):

pi is approximately 3.1415926535898993, Error is 1.061373e-13
runtime is=1.3594319820404053