当我为第二个数组第二次分配内存时,为什么 MPI 会出现分段错误?

Why does MPI give segmentation fault when I allocate memory for a second time for a second array?

因此,这是在导致分段错误的行之前使用 // 的代码:

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

int main (int argc, char *argv[]) {
  float *delta;
  int size,rank,*arr;

  MPI_Init (&argc, &argv); //initialize MPI library
  MPI_Comm_size(MPI_COMM_WORLD, &size); //get number of processes
  MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get my process id
  
    if (rank==0){
        //MPI_Alloc_mem(2*sizeof(int),MPI_INFO_NULL,arr);
        MPI_Alloc_mem(3* sizeof(float),MPI_INFO_NULL,delta);
        delta[0]=1;
        delta[1]=2;
        //arr[0]=1;
        for (int i=1; i<size; i++){
            MPI_Send(delta,3,MPI_FLOAT,i,4,MPI_COMM_WORLD);
            //MPI_Send(arr,2,MPI_INT,i,4,MPI_COMM_WORLD);
        }
    }
    else{
        MPI_Recv(delta, 3, MPI_FLOAT, 0, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        //MPI_Recv(arr, 2, MPI_INT, 0, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
  }
  printf("delta:%f from process: %d\n",delta[0],rank);
  //printf("arr:%d from process: %d\n",arr[0],rank);
  MPI_Finalize(); //MPI cleanup
  return 0;
}

下面是不起作用的代码:

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

int main (int argc, char *argv[]) {
  float *delta;
  int size,rank,*arr;

  MPI_Init (&argc, &argv); //initialize MPI library
  MPI_Comm_size(MPI_COMM_WORLD, &size); //get number of processes
  MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get my process id
  
    if (rank==0){
        MPI_Alloc_mem(2*sizeof(int),MPI_INFO_NULL,arr);
        MPI_Alloc_mem(3* sizeof(float),MPI_INFO_NULL,delta);
        delta[0]=1;
        delta[1]=2;
        arr[0]=1;
        for (int i=1; i<size; i++){
            MPI_Send(delta,3,MPI_FLOAT,i,4,MPI_COMM_WORLD);
            MPI_Send(arr,2,MPI_INT,i,4,MPI_COMM_WORLD);
        }
    }
    else{
        MPI_Recv(delta, 3, MPI_FLOAT, 0, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Recv(arr, 2, MPI_INT, 0, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
  }
  printf("delta:%f from process: %d\n",delta[0],rank);
  printf("arr:%d from process: %d\n",arr[0],rank);
  MPI_Finalize(); //MPI cleanup
  return 0;
}

为什么第二次分配内存会出现这种情况? 我也尝试用 malloc 而不是 MPI_Alloc_mem 以防万一,但并没有真正改变任何东西。

您仅在进程 0 上分配缓冲区:在其他进程上没有缓冲区。也许您对 send/recv 的工作原理感到困惑:它们不发送缓冲区,而是发送内容。所以接收器需要分配 space 来接收数据。接收调用不会创建缓冲区,它只会将数据放入其中。

我不熟悉 MPI_Alloc_mem,所以我使用 malloc。 正如 @Victore Eijkhout 提到的,您需要为每个进程的数组分配相同数量的内存。这是您的代码的工作版本:

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

int main (int argc, char *argv[]) {
  float *delta;
  int *arr;
  int size,rank;
  int tag;
  MPI_Status status;
  MPI_Init (&argc, &argv); //initialize MPI library
  MPI_Comm_size(MPI_COMM_WORLD, &size); //get number of processes
  MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get my process id
  
    if (rank==0){
        arr = malloc(2 * sizeof(int));
        delta = malloc(3 * sizeof(float));
        //MPI_Alloc_mem(2*sizeof(int),MPI_INFO_NULL,arr);
        //MPI_Alloc_mem(3*sizeof(float),MPI_INFO_NULL,delta);

        delta[0]=1;
        delta[1]=2;
        arr[0]=10;
        arr[1] = 20;
        for (int i=1; i<size; i++){
            MPI_Send(delta,3,MPI_FLOAT,i, 4,MPI_COMM_WORLD);
            MPI_Send(arr,2,MPI_INT,i, 4, MPI_COMM_WORLD);
        }
    }
    else{
        arr = malloc(2 * sizeof(int));
        delta = malloc(3 * sizeof(float));
        MPI_Recv(delta, 3, MPI_FLOAT, 0, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        MPI_Recv(arr, 2, MPI_INT, 0, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
  }
  printf("Process %d: arr:[%d, %d] | delta:[%f, %f]\n", rank, arr[0], arr[1], delta[0], delta[1]);

  MPI_Finalize(); //MPI cleanup
  return 0;
}

输出:

Process 3: arr:[10, 20] | delta:[1.000000, 2.000000]
Process 0: arr:[10, 20] | delta:[1.000000, 2.000000]
Process 2: arr:[10, 20] | delta:[1.000000, 2.000000]
Process 1: arr:[10, 20] | delta:[1.000000, 2.000000]