MPI_Gather没有收到数据

MPI_Gather doesn't receive data

该程序应采用两个命令行参数,N = 每个工人应生成的项目数,H = 每个工人生成的随机数范围内的最大值。每个工作人员都列出了这些随机值,然后 BigList 就是我试图将它们全部收集回的地方,但 BigList 的数组中没有显示任何内容。例如: 运行 mpirun -np 3 a.out 4 20 得到:

RANK: 1  ---  NUM: 18
RANK: 1  ---  NUM: 6
RANK: 1  ---  NUM: 12
RANK: 1  ---  NUM: 10
RANK: 2  ---  NUM: 9
RANK: 2  ---  NUM: 3
RANK: 2  ---  NUM: 6
RANK: 2  ---  NUM: 5

当我希望 BigList 由上面列出的每个数字组成时,BigList 是空的。

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>


int main(int argc, char* argv[]){

    double t1, t2;
    MPI_Init(&argc, &argv);
    int rank;
    int wsize;
    int N = 10, H = 5;
    int num, k, i;
    int locarr[25];
    int bigList[300];
    srand(time(NULL));

    if(argc > 1){
        N = atoi(argv[1]);
        H = atoi(argv[2]);
    }

    t1 = MPI_Wtime();

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);

    if( rank == 0){
        MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
    }
    else{
        for(i = 0; i < N; i++){
            locarr[i] = (((rand() % H) + 1) / rank);
            printf("RANK: %d  ---  NUM: %d\n", rank, locarr[i]);
        }
    }

    MPI_Gather(&locarr, N, MPI_INT, bigList, N, MPI_INT, 0, MPI_COMM_WORLD);

    if( rank == 0){
        printf("BigList: ");
        for(k = 0; k < (rank * N); k++){
            printf(" %d", bigList[k]);
        }
        printf("\n");
    }


    t2 = MPI_Wtime();
//    printf("\nMPI_Wtime(): %f\n", t2 - t1);
    MPI_Finalize();

    return 0;
}

让我扩展 Gilles Gouaillardet,

的评论

MPI_Gather 调用正确写入。要获得预期的结果,需要进行两个更改。

MPI_Bcast是集体行动。所有进程都应该调用它。所以代码应该是:

if( rank == 0){
    MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
}
else{
    MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
    for(i = 0; i < N; i++){
        locarr[i] = (((rand() % H) + 1) / rank);
        printf("RANK: %d  ---  NUM: %d\n", rank, locarr[i]);
    }
}

此外,rank 0 打印 bigList 的内容。但是在 for 循环中, 循环条件 k<rank*N,对于 rank 0 这将永远是 false (k<0*N ) 因此循环不会被执行,也不会打印任何值。所以它应该是世界大小 (wsize) 而不是 rank.

if( rank == 0){
    printf("BigList: ");
    for(k = 0; k <  wsize*N; k++){
        printf(" %d", bigList[k]);
    }
    printf("\n");
}

打印函数正在 BigList 数组中打印一些额外的垃圾值。而是用这个替换代码的打印部分将解决错误

    if (rank == 0)
    {
        printf("BigList: ");
        for (k = N; k < wsize * N; k++)
        {
            printf(" %d", bigList[k]);
        }
        printf("\n");
    }

问题的完整代码是-

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int main(int argc, char *argv[])
{

    double t1, t2;
    MPI_Init(&argc, &argv);
    int rank;
    int wsize;
    int N = 10, H = 5;
    int num, k, i;
    int locarr[25];
    int bigList[300];
    srand(time(NULL));

    if (argc > 1)
    {
        N = atoi(argv[1]);
        H = atoi(argv[2]);
    }

    t1 = MPI_Wtime();

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);

    if (rank == 0)
    {
        MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
    }
    else
    {
        MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(&H, 1, MPI_INT, 0, MPI_COMM_WORLD);
        for (i = 0; i < N; i++)
        {
            locarr[i] = (((rand() % H) + 1) / rank);
            printf("RANK: %d  ---  NUM: %d\n", rank, locarr[i]);
        }
    }

    MPI_Gather(&locarr, N, MPI_INT, bigList, N, MPI_INT, 0, MPI_COMM_WORLD);

    if (rank == 0)
    {
        printf("BigList: ");
        for (k = N; k < wsize * N; k++)
        {
            printf(" %d", bigList[k]);
        }
        printf("\n");
    }
    
    MPI_Finalize();

    return 0;
}