解决处理笛卡尔网格的 MPI 程序中的死锁问题

Question

我正在实现大炮的算法。我运行它使用 4 个处理器。当我进入循环时遇到死锁：

   for (i=0; i<dims[0]; i++) { 
    Multiply(nlocal, a, b, c); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 1, downrank, 1, comm_2d, &status); 
  }

完整代码在这里：

#include <math.h>
#include <mpi.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

void Multiply(int n, double *a, double *b, double *c);

double* readMatrix(char* filename, int* size);
void writeMatrix(double* matrix, char* filename, int size);

int main(int argc, char* argv[])
{
     MPI_Init(&argc, &argv);   

  double* a,*b,*c;
  int i, t, n;
  int nlocal; 
  int npes, dims[2], periods[2]; 
  int myrank, my2drank, mycoords[2]; 
  int uprank, downrank, leftrank, rightrank, coords[2]; 
  int shiftsource, shiftdest; 
  MPI_Status status; 
  MPI_Comm comm_2d; 


  MPI_Comm_size(MPI_COMM_WORLD, &npes); 
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

  MPI_Barrier(MPI_COMM_WORLD);

  t = -MPI_Wtime();


  if (myrank == 0) {

    int sizeA,sizeB;
    printf("Reading %s\n", argv[1]);
    a = readMatrix(argv[1], &sizeA);
    b = readMatrix(argv[2], &sizeB);
    printf("Reading %s\n", argv[2]);
    c = calloc(sizeA*sizeB, sizeof(double));
    n = sizeA;
    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);

    if (sizeA != sizeB) {
        printf("Matrix not sized n^2\n");
        MPI_Abort(MPI_COMM_WORLD, 0);
    }


  }
  else {
      a = calloc(n*n, sizeof(double));
      b = calloc(n*n, sizeof(double));
      c = calloc(n*n, sizeof(double));
      MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
      MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
      MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
      MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  }


  dims[0] = dims[1] = sqrt(npes); 




  periods[0] = periods[1] = 1; 


  MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d); 


  MPI_Comm_rank(comm_2d, &my2drank); 
  MPI_Cart_coords(comm_2d, my2drank, 2, mycoords); 


  MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank); 
  MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank); 


  nlocal = n/dims[0]; 


  MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); 
  MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,1, shiftsource, 1, comm_2d, &status); 

  MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest); 
  MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 1, shiftsource, 1, comm_2d, &status); 

  printf("rank[%d] has entered loop\n", myrank);
  for (i=0; i<dims[0]; i++) { 
    Multiply(nlocal, a, b, c); 


    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 


    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 1, downrank, 1, comm_2d, &status); 
  } 
  printf("rank[%d] has left loop\n", myrank);

  MPI_Cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest); 
  MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,shiftdest, 1, shiftsource, 1, comm_2d, &status); 

  MPI_Cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest); 
  MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 1, shiftsource, 1, comm_2d, &status); 




  printf("rank[%d] has reached the barrier...\n", myrank);
  MPI_Barrier(MPI_COMM_WORLD);

  if (myrank == 0) {
      t += MPI_Wtime();
      writeMatrix(c, argv[3], n); 
      printf("%s %d second(s)\n", "Finshed in", t);
  }

  free(a); free(b); free(c);

  MPI_Comm_free(&comm_2d); 

  MPI_Finalize();
}

double* readMatrix(char* filename, int* size) {
    FILE* file_handle = fopen(filename, "r");
    int row;
    int col;
    fread(&row, sizeof(int), 1, file_handle);
    fread(&col, sizeof(int), 1, file_handle);
    if (row == col) {
            *size = row;
    }
    else {
        *size = -1;
        return NULL;
    }
    double* buffer = calloc(row*col, sizeof(double));
    for(int i = 0; i < row; i++) {
        for(int j = 0; j < col; j++) {
            double x;
            fread(&x, sizeof(double), 1, file_handle);
            buffer[row * i + j] = x;
        }
    }
    fclose(file_handle);
    printf("Buffer has size %d\n", row*col);
    return buffer;
}

void writeMatrix(double* matrix, char* filename, int size) {
    FILE* file_handle = fopen(filename, "w");
    fwrite(&size, sizeof(int), 1, file_handle);
    fwrite(&size, sizeof(int), 1, file_handle);
    for(int i = 0; i < size; i++) {
        for(int j = 0; j < size; j++) {
            double x = matrix[size * i + j];
            fwrite(&x, sizeof(double), 1, file_handle);
        }
    }
    fclose(file_handle);
}


void Multiply(int n, double *a, double *b, double *c)
{ 
  int i, j, k; 

  for (i=0; i<n; i++) 
    for (j=0; j<n; j++) 
      for (k=0; k<n; k++) 
    c[i*n+j] += a[i*n+k]*b[k*n+j]; 
}

如果代码太多，我可以轻松删除某些部分。我只是想知道是什么导致了僵局以及如何解决它。提前感谢您的宝贵时间。

重要信息：

等级0总是撞到障碍。但是由于其他 3 个陷入僵局，第 0 级被卡住，直到所有人都遇到障碍。

输出

Reading 10
Buffer has size 100
Buffer has size 100
Reading 10
rank[0] has entered loop
rank[0] has left loop
rank[0] has reached the barrier...
rank[1] has entered loop
rank[2] has entered loop
rank[3] has entered loop

Answer 1

有两个小问题可以使某些东西正常工作：

行数：

a = calloc(n*n, sizeof(double));
b = calloc(n*n, sizeof(double));
c = calloc(n*n, sizeof(double));
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);

n 应在分配 a 之前广播。否则，n 未初始化且输出未定义。它可以触发分段错误。

在函数MPI_Cart_shift, the third argument is the displacement : negative for downward and positive for upward. I changed it to set the same displacement for everyone and it worked fine. Even if MPI_Sendrecv_replace()中，一个进程收到的消息数必须与发送给该进程的消息数相匹配。您致电 MPI_Sendrecv_replace() 时可能并非如此：
```
MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); 
MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,1, shiftsource, 1, comm_2d, &status);
```

在"skew" example of open-mpi中略有不同:

 C compute shift source and destination
    CALL MPI_CART_SHIFT(comm, 0, coords(2), source,
                        dest, ierr)
  C skew array
    CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0,
                              source, 0, comm, status,
                              ierr)

在这种情况下，每行中的所有进程都获得相同的位移。因此，每个进程都应该发送一条消息，每个进程都应该接收一条消息。然而，位移取决于线和矩阵是倾斜的。

这是生成的代码。它由 mpicc main.c -o main -lm -Wall 和运行由 mpirun -np 4 main 编译：

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"

int main(int argc, char **argv) 
{

    MPI_Init(&argc, &argv);   

    double* a,*b,*c;
    int i, t, n;
    int nlocal; 
    int npes, dims[2], periods[2]; 
    int myrank, my2drank, mycoords[2]; 
    int uprank, downrank, leftrank, rightrank; 
    int shiftsource, shiftdest; 
    MPI_Status status; 
    MPI_Comm comm_2d; 


    MPI_Comm_size(MPI_COMM_WORLD, &npes); 
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

    MPI_Barrier(MPI_COMM_WORLD);

    t = -MPI_Wtime();


    if (myrank == 0) {

        int sizeA,sizeB;
        printf("Reading \n");
        //  a = readMatrix(argv[1], &sizeA);
        sizeA=16;
        a=malloc(sizeA*sizeA*sizeof(double));
        //  b = readMatrix(argv[2], &sizeB);
        sizeB=16;
        b=malloc(sizeB*sizeB*sizeof(double));
        printf("Reading \n");
        c = calloc(sizeA*sizeB, sizeof(double));
        n = sizeA;
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        if (sizeA != sizeB) {
            printf("Matrix not sized n^2\n");
            MPI_Abort(MPI_COMM_WORLD, 0);
        }


    }
    else {
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);//n should be broadcast before allocation
        a = calloc(n*n, sizeof(double));
        b = calloc(n*n, sizeof(double));
        c = calloc(n*n, sizeof(double));

        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    }


    dims[0] = dims[1] = sqrt(npes); 




    periods[0] = periods[1] = 1; 


    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d); 


    MPI_Comm_rank(comm_2d, &my2drank); 
    MPI_Cart_coords(comm_2d, my2drank, 2, mycoords); 


    MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank); 
    MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank); 


    nlocal = n/dims[0]; 

    MPI_Cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest); 
    // MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,5, shiftsource, 5, comm_2d, &status); 

    // MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
    MPI_Cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 6, shiftsource, 6, comm_2d, &status); 

    printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout);
    for (i=0; i<dims[0]; i++) { 
        //  Multiply(nlocal, a, b, c); 

        MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 


        MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 2, downrank, 2, comm_2d, &status); 
    } 
    printf("rank[%d] has left loop\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    // MPI_Cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest); 
    MPI_Cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,shiftdest, 3, shiftsource, 3, comm_2d, &status); 

    MPI_Cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest); 
    //MPI_Cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 4, shiftsource, 4, comm_2d, &status); 




    printf("rank[%d] has reached the barrier...\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    if (myrank == 0) {
        t += MPI_Wtime();
        //  writeMatrix(c, argv[3], n); 
        printf("Finshed in %d second(s)\n",t);
    }

    free(a); free(b); free(c);

    MPI_Comm_free(&comm_2d); 

    MPI_Finalize();

    return 0;
}

解决处理笛卡尔网格的 MPI 程序中的死锁问题

Trouble Resolving Deadlock in MPI Program dealing with a Cartesian mesh

c

deadlock

mpi

完整代码在这里：

重要信息：

输出