使用 MPI_Gather 收集字符数组时出现分段错误
Segmentation Fault when using MPI_Gather to gather char arrays
我正在尝试使用 MPI 并行化一个简单的 mandelbrot 集算法:
#include <iostream>
#include <cstdlib>
#include <mpi.h>
using namespace std;
int main(int argc, char **argv){
int max_row, max_column, max_n, myrank, procs, each_row;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &procs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
if(argc!=4){
cout << "Invalid number of arguments!\n";
}
max_row=atoi(argv[1]);
max_column=atoi(argv[2]);
max_n=atoi(argv[3]);
MPI_Barrier(MPI_COMM_WORLD);
each_row=max_row/procs;
char* mat = (char*)malloc(sizeof(char*) * each_row * max_column);
for(int r = each_row*myrank; r < each_row*(myrank+1); ++r){
for(int c = 0; c < max_column; ++c){
int n = 0;
float x=0, y=0, tmp;
while((x*x + y*y) < 4 && ++n < max_n) {
tmp = x*x - y*y + ((float) c * 2 / max_column - 1.5);
y = 2*x*y + ((float) r * 2 / max_row - 1);
x = tmp;
}
mat[(r-(each_row*myrank))*max_column+c]=(n == max_n ? '#' : '.');
}
}
MPI_Barrier(MPI_COMM_WORLD);
char* vfinal;
if(myrank==0){
char *vfinal = (char*)malloc(sizeof(char*) * max_row * max_column);
}
MPI_Gather(mat, each_row*max_column, MPI_CHAR, vfinal, each_row*max_column, MPI_CHAR, 0, MPI_COMM_WORLD);
/*
for(int r = 0; r < max_row; ++r){
for(int c = 0; c < max_column; ++c)
std::cout << vfinal[r*max_column+c];
cout << '\n';
}
*/
MPI_Finalize();
return 0;
}
尝试使用 MPI_Gather 收集字符数组时,我在等级为 0(MPI_Gather 的根)的进程上收到分段错误错误:
[ricardo@ricardo-desktop] ~ mpirun -np 4 a.out 1024 768 18000 > outMPI
[ricardo-desktop:42845] Read -1, expected 32768, errno = 14
[ricardo-desktop:42845] *** Process received signal ***
[ricardo-desktop:42845] Signal: Segmentation fault (11)
[ricardo-desktop:42845] Signal code: Address not mapped (1)
[ricardo-desktop:42845] Failing at address: 0x5591cc076b20
[ricardo-desktop:42845] [ 0] /usr/lib/libpthread.so.0(+0x14800)[0x7fe12151d800]
[ricardo-desktop:42845] [ 1] /usr/lib/libc.so.6(+0x1643b6)[0x7fe1214a53b6]
[ricardo-desktop:42845] [ 2] /usr/lib/openmpi/libopen-pal.so.40(opal_convertor_unpack+0x86)[0x7fe1212034a6]
[ricardo-desktop:42845] [ 3] /usr/lib/openmpi/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_request_progress_frag+0x311)[0x7fe1200bc961]
[ricardo-desktop:42845] [ 4] /usr/lib/openmpi/openmpi/mca_btl_vader.so(mca_btl_vader_poll_handle_frag+0x91)[0x7fe12026f951]
[ricardo-desktop:42845] [ 5] /usr/lib/openmpi/openmpi/mca_btl_vader.so(+0x4c15)[0x7fe12026fc15]
[ricardo-desktop:42845] [ 6] /usr/lib/openmpi/libopen-pal.so.40(opal_progress+0x2c)[0x7fe1211f1a8c]
[ricardo-desktop:42845] [ 7] /usr/lib/openmpi/libmpi.so.40(ompi_request_default_wait+0x146)[0x7fe1218ff696]
[ricardo-desktop:42845] [ 8] /usr/lib/openmpi/libmpi.so.40(ompi_coll_base_gather_intra_linear_sync+0x301)[0x7fe121967541]
[ricardo-desktop:42845] [ 9] /usr/lib/openmpi/openmpi/mca_coll_tuned.so(ompi_coll_tuned_gather_intra_dec_fixed+0xb8)[0x7fe120041a88]
[ricardo-desktop:42845] [10] /usr/lib/openmpi/libmpi.so.40(PMPI_Gather+0x3dd)[0x7fe121928b6d]
[ricardo-desktop:42845] [11] a.out(+0xbecc)[0x5591cc041ecc]
[ricardo-desktop:42845] [12] /usr/lib/libc.so.6(__libc_start_main+0xf3)[0x7fe121368023]
[ricardo-desktop:42845] [13] a.out(+0xbb0e)[0x5591cc041b0e]
[ricardo-desktop:42845] *** End of error message ***
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 0 on node ricardo-desktop exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
我感觉问题在于我的 MPI_Gather 是如何设置的。
有谁知道是什么导致了这段代码中的分段错误?
首先,你的malloc
不正确,你应该使用sizeof(char)
而不是sizeof(char *)
。
根本原因是您重新声明了 vfinal
,因此您从未在等级 0
.
上分配 MPI_Gather()
的接收缓冲区
FWIW,使用 -Wall
编译会发出一些警告,可能会向您指出此错误。
我正在尝试使用 MPI 并行化一个简单的 mandelbrot 集算法:
#include <iostream>
#include <cstdlib>
#include <mpi.h>
using namespace std;
int main(int argc, char **argv){
int max_row, max_column, max_n, myrank, procs, each_row;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &procs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
if(argc!=4){
cout << "Invalid number of arguments!\n";
}
max_row=atoi(argv[1]);
max_column=atoi(argv[2]);
max_n=atoi(argv[3]);
MPI_Barrier(MPI_COMM_WORLD);
each_row=max_row/procs;
char* mat = (char*)malloc(sizeof(char*) * each_row * max_column);
for(int r = each_row*myrank; r < each_row*(myrank+1); ++r){
for(int c = 0; c < max_column; ++c){
int n = 0;
float x=0, y=0, tmp;
while((x*x + y*y) < 4 && ++n < max_n) {
tmp = x*x - y*y + ((float) c * 2 / max_column - 1.5);
y = 2*x*y + ((float) r * 2 / max_row - 1);
x = tmp;
}
mat[(r-(each_row*myrank))*max_column+c]=(n == max_n ? '#' : '.');
}
}
MPI_Barrier(MPI_COMM_WORLD);
char* vfinal;
if(myrank==0){
char *vfinal = (char*)malloc(sizeof(char*) * max_row * max_column);
}
MPI_Gather(mat, each_row*max_column, MPI_CHAR, vfinal, each_row*max_column, MPI_CHAR, 0, MPI_COMM_WORLD);
/*
for(int r = 0; r < max_row; ++r){
for(int c = 0; c < max_column; ++c)
std::cout << vfinal[r*max_column+c];
cout << '\n';
}
*/
MPI_Finalize();
return 0;
}
尝试使用 MPI_Gather 收集字符数组时,我在等级为 0(MPI_Gather 的根)的进程上收到分段错误错误:
[ricardo@ricardo-desktop] ~ mpirun -np 4 a.out 1024 768 18000 > outMPI
[ricardo-desktop:42845] Read -1, expected 32768, errno = 14
[ricardo-desktop:42845] *** Process received signal ***
[ricardo-desktop:42845] Signal: Segmentation fault (11)
[ricardo-desktop:42845] Signal code: Address not mapped (1)
[ricardo-desktop:42845] Failing at address: 0x5591cc076b20
[ricardo-desktop:42845] [ 0] /usr/lib/libpthread.so.0(+0x14800)[0x7fe12151d800]
[ricardo-desktop:42845] [ 1] /usr/lib/libc.so.6(+0x1643b6)[0x7fe1214a53b6]
[ricardo-desktop:42845] [ 2] /usr/lib/openmpi/libopen-pal.so.40(opal_convertor_unpack+0x86)[0x7fe1212034a6]
[ricardo-desktop:42845] [ 3] /usr/lib/openmpi/openmpi/mca_pml_ob1.so(mca_pml_ob1_recv_request_progress_frag+0x311)[0x7fe1200bc961]
[ricardo-desktop:42845] [ 4] /usr/lib/openmpi/openmpi/mca_btl_vader.so(mca_btl_vader_poll_handle_frag+0x91)[0x7fe12026f951]
[ricardo-desktop:42845] [ 5] /usr/lib/openmpi/openmpi/mca_btl_vader.so(+0x4c15)[0x7fe12026fc15]
[ricardo-desktop:42845] [ 6] /usr/lib/openmpi/libopen-pal.so.40(opal_progress+0x2c)[0x7fe1211f1a8c]
[ricardo-desktop:42845] [ 7] /usr/lib/openmpi/libmpi.so.40(ompi_request_default_wait+0x146)[0x7fe1218ff696]
[ricardo-desktop:42845] [ 8] /usr/lib/openmpi/libmpi.so.40(ompi_coll_base_gather_intra_linear_sync+0x301)[0x7fe121967541]
[ricardo-desktop:42845] [ 9] /usr/lib/openmpi/openmpi/mca_coll_tuned.so(ompi_coll_tuned_gather_intra_dec_fixed+0xb8)[0x7fe120041a88]
[ricardo-desktop:42845] [10] /usr/lib/openmpi/libmpi.so.40(PMPI_Gather+0x3dd)[0x7fe121928b6d]
[ricardo-desktop:42845] [11] a.out(+0xbecc)[0x5591cc041ecc]
[ricardo-desktop:42845] [12] /usr/lib/libc.so.6(__libc_start_main+0xf3)[0x7fe121368023]
[ricardo-desktop:42845] [13] a.out(+0xbb0e)[0x5591cc041b0e]
[ricardo-desktop:42845] *** End of error message ***
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 0 on node ricardo-desktop exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
我感觉问题在于我的 MPI_Gather 是如何设置的。
有谁知道是什么导致了这段代码中的分段错误?
首先,你的malloc
不正确,你应该使用sizeof(char)
而不是sizeof(char *)
。
根本原因是您重新声明了 vfinal
,因此您从未在等级 0
.
MPI_Gather()
的接收缓冲区
FWIW,使用 -Wall
编译会发出一些警告,可能会向您指出此错误。