MPI:发送和接收动态分配的子矩阵
MPI : Sending and receiving dynamically allocated sub-matrix
我在向工作人员发送动态分配的子矩阵时遇到问题。我不明白我该如何正确地做到这一点(以及我应该发送什么)。
这里是发送部分:
MPI_Send(&(a[offset][0]), rows * NCA, MPI_DOUBLE, dest, FROM_MASTER + 2, MPI_COMM_WORLD);
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
这里是接收部分:
MPI_Recv(&(a[0][0]), rows * NCA, MPI_DOUBLE, MASTER, FROM_MASTER + 2, MPI_COMM_WORLD, &status);
MPI_Recv(&(b[0][0]), NCA * NCB, MPI_DOUBLE, MASTER, FROM_MASTER + 3, MPI_COMM_WORLD, &status);
这是错误信息:
[pop-os:29368] Read -1, expected 80000, errno = 14
[pop-os:29367] *** Process received signal ***
[pop-os:29367] Signal: Segmentation fault (11)
[pop-os:29367] Signal code: Address not mapped (1)
[pop-os:29367] Failing at address: 0x7fffc2ae8000
这是全部代码:
#include <cstdio>
#include <cstdlib>
#include "mpi.h"
#define NRA 100
/* number of rows in matrix A */
#define NCA 100
/* number of columns in matrix A */
#define NCB 100
/* number of columns in matrix B */
#define MASTER 0
/* taskid of first task */
#define FROM_MASTER 1 /* setting a message type */
#define FROM_WORKER 10 /* setting a message type */
double **alloc_2d_int(int rows, int cols) {
double **array= (double **)malloc(rows*sizeof(double*));
for (int i=0; i<rows; i++)
array[i] = (double *)malloc(rows*cols*sizeof(double));
return array;
}
int main(int argc, char *argv[]) {
int numtasks, taskid, numworkers, source, dest, rows,
/* rows of matrix A sent to each worker */
averow, extra, offset, i, j, k;
double **a = alloc_2d_int(NRA, NCA);
double **b = alloc_2d_int(NCA, NCB);
double **c = alloc_2d_int(NRA, NCB);
MPI_Init(&argc, &argv);
MPI_Status status;
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
if (numtasks < 2) {
printf("Need at least two MPI tasks. Quitting...\n");
MPI_Abort(MPI_COMM_WORLD, -1);
exit(1);
}
numworkers = numtasks - 1;
if (taskid == MASTER) {
printf("mpi_mm has started with %d tasks (task1).\n", numtasks);
for (i = 0; i < NRA; i++)
for (j = 0; j < NCA; j++) a[i][j] = 10;
for (i = 0; i < NCA; i++)
for (j = 0; j < NCB; j++) b[i][j] = 10;
double t1 = MPI_Wtime();
averow = NRA / numworkers;
extra = NRA % numworkers;
offset = 0;
for (dest = 1; dest <= numworkers; dest++) {
rows = (dest <= extra) ? averow + 1 : averow;
printf("Sending %d rows to task %d offset=%d\n", rows, dest, offset);
MPI_Send(&offset, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, FROM_MASTER + 1, MPI_COMM_WORLD);
MPI_Send(&(a[offset][0]), rows * NCA, MPI_DOUBLE, dest, FROM_MASTER + 2,
MPI_COMM_WORLD);
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3,
MPI_COMM_WORLD);
offset = offset + rows;
}
/* Receive results from worker tasks */
for (source = 1; source <= numworkers; source++) {
MPI_Recv(&offset, 1, MPI_INT, source, FROM_WORKER, MPI_COMM_WORLD,
&status);
MPI_Recv(&rows, 1, MPI_INT, source, FROM_WORKER + 1, MPI_COMM_WORLD,
&status);
MPI_Recv(&(c[offset][0]), rows * NCB, MPI_DOUBLE, source, FROM_WORKER + 2,
MPI_COMM_WORLD, &status);
printf("Received results from task %d\n", source);
}
/* Print results */
/*
printf("****\n");
printf("Result Matrix:\n");
for (i = 0; i < NRA; i++)
{
printf("\n");
for (j = 0; j < NCB; j++) printf("%6.2f ", c[i][j]);
}*/
printf("\n********\n");
printf("Done.\n");
t1 = MPI_Wtime() - t1;
printf("\nExecution time: %.2f\n", t1);
}
/******** worker task *****************/
else { /* if (taskid > MASTER) */
MPI_Recv(&offset, 1, MPI_INT, MASTER, FROM_MASTER, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, MASTER, FROM_MASTER + 1, MPI_COMM_WORLD,
&status);
MPI_Recv(&(a[0][0]), rows * NCA, MPI_DOUBLE, MASTER, FROM_MASTER + 2,
MPI_COMM_WORLD, &status);
MPI_Recv(&(b[0][0]), NCA * NCB, MPI_DOUBLE, MASTER, FROM_MASTER + 3, MPI_COMM_WORLD,
&status);
for (k = 0; k < NCB; k++)
for (i = 0; i < rows; i++) {
c[i][k] = 0.0;
for (j = 0; j < NCA; j++) c[i][k] = c[i][k] + a[i][j] * b[j][k];
}
MPI_Send(&offset, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, FROM_WORKER + 1, MPI_COMM_WORLD);
MPI_Send(&c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2,
MPI_COMM_WORLD);
}
for (i=0; i<NRA; i++)
free(a[i]);
free(a);
for (i=0; i<NCA; i++)
free(b[i]);
free(b);
for (i=0; i<NRA; i++)
free(c[i]);
free(c);
MPI_Finalize();
}
解决方案:link to GitHub 正确代码
MPI_Send and MPI_recv 第一个参数是 const void *
所以你需要改变:
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
到
MPI_Send(b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
和
MPI_Send(&c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2, MPI_COMM_WORLD);
到
MPI_Send(c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2, MPI_COMM_WORLD);
您遇到的另一个问题是您正在分配一个指针数组:
double **alloc_2d_int(int rows, int cols) {
double **array= (double **)malloc(rows*sizeof(double*));
for (int i=0; i<rows; i++)
array[i] = (double *)malloc(rows*cols*sizeof(double));
return array;
}
但是send/recv在MPI_Send
和MPI_Recv
上的数据假设是连续的。要解决此问题,您可以创建一个 continuously 2D array,只需将矩阵表示为数组,创建 MPI 自定义类型等。
我在向工作人员发送动态分配的子矩阵时遇到问题。我不明白我该如何正确地做到这一点(以及我应该发送什么)。
这里是发送部分:
MPI_Send(&(a[offset][0]), rows * NCA, MPI_DOUBLE, dest, FROM_MASTER + 2, MPI_COMM_WORLD);
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
这里是接收部分:
MPI_Recv(&(a[0][0]), rows * NCA, MPI_DOUBLE, MASTER, FROM_MASTER + 2, MPI_COMM_WORLD, &status);
MPI_Recv(&(b[0][0]), NCA * NCB, MPI_DOUBLE, MASTER, FROM_MASTER + 3, MPI_COMM_WORLD, &status);
这是错误信息:
[pop-os:29368] Read -1, expected 80000, errno = 14
[pop-os:29367] *** Process received signal ***
[pop-os:29367] Signal: Segmentation fault (11)
[pop-os:29367] Signal code: Address not mapped (1)
[pop-os:29367] Failing at address: 0x7fffc2ae8000
这是全部代码:
#include <cstdio>
#include <cstdlib>
#include "mpi.h"
#define NRA 100
/* number of rows in matrix A */
#define NCA 100
/* number of columns in matrix A */
#define NCB 100
/* number of columns in matrix B */
#define MASTER 0
/* taskid of first task */
#define FROM_MASTER 1 /* setting a message type */
#define FROM_WORKER 10 /* setting a message type */
double **alloc_2d_int(int rows, int cols) {
double **array= (double **)malloc(rows*sizeof(double*));
for (int i=0; i<rows; i++)
array[i] = (double *)malloc(rows*cols*sizeof(double));
return array;
}
int main(int argc, char *argv[]) {
int numtasks, taskid, numworkers, source, dest, rows,
/* rows of matrix A sent to each worker */
averow, extra, offset, i, j, k;
double **a = alloc_2d_int(NRA, NCA);
double **b = alloc_2d_int(NCA, NCB);
double **c = alloc_2d_int(NRA, NCB);
MPI_Init(&argc, &argv);
MPI_Status status;
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
if (numtasks < 2) {
printf("Need at least two MPI tasks. Quitting...\n");
MPI_Abort(MPI_COMM_WORLD, -1);
exit(1);
}
numworkers = numtasks - 1;
if (taskid == MASTER) {
printf("mpi_mm has started with %d tasks (task1).\n", numtasks);
for (i = 0; i < NRA; i++)
for (j = 0; j < NCA; j++) a[i][j] = 10;
for (i = 0; i < NCA; i++)
for (j = 0; j < NCB; j++) b[i][j] = 10;
double t1 = MPI_Wtime();
averow = NRA / numworkers;
extra = NRA % numworkers;
offset = 0;
for (dest = 1; dest <= numworkers; dest++) {
rows = (dest <= extra) ? averow + 1 : averow;
printf("Sending %d rows to task %d offset=%d\n", rows, dest, offset);
MPI_Send(&offset, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, FROM_MASTER + 1, MPI_COMM_WORLD);
MPI_Send(&(a[offset][0]), rows * NCA, MPI_DOUBLE, dest, FROM_MASTER + 2,
MPI_COMM_WORLD);
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3,
MPI_COMM_WORLD);
offset = offset + rows;
}
/* Receive results from worker tasks */
for (source = 1; source <= numworkers; source++) {
MPI_Recv(&offset, 1, MPI_INT, source, FROM_WORKER, MPI_COMM_WORLD,
&status);
MPI_Recv(&rows, 1, MPI_INT, source, FROM_WORKER + 1, MPI_COMM_WORLD,
&status);
MPI_Recv(&(c[offset][0]), rows * NCB, MPI_DOUBLE, source, FROM_WORKER + 2,
MPI_COMM_WORLD, &status);
printf("Received results from task %d\n", source);
}
/* Print results */
/*
printf("****\n");
printf("Result Matrix:\n");
for (i = 0; i < NRA; i++)
{
printf("\n");
for (j = 0; j < NCB; j++) printf("%6.2f ", c[i][j]);
}*/
printf("\n********\n");
printf("Done.\n");
t1 = MPI_Wtime() - t1;
printf("\nExecution time: %.2f\n", t1);
}
/******** worker task *****************/
else { /* if (taskid > MASTER) */
MPI_Recv(&offset, 1, MPI_INT, MASTER, FROM_MASTER, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, MASTER, FROM_MASTER + 1, MPI_COMM_WORLD,
&status);
MPI_Recv(&(a[0][0]), rows * NCA, MPI_DOUBLE, MASTER, FROM_MASTER + 2,
MPI_COMM_WORLD, &status);
MPI_Recv(&(b[0][0]), NCA * NCB, MPI_DOUBLE, MASTER, FROM_MASTER + 3, MPI_COMM_WORLD,
&status);
for (k = 0; k < NCB; k++)
for (i = 0; i < rows; i++) {
c[i][k] = 0.0;
for (j = 0; j < NCA; j++) c[i][k] = c[i][k] + a[i][j] * b[j][k];
}
MPI_Send(&offset, 1, MPI_INT, MASTER, FROM_WORKER, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, FROM_WORKER + 1, MPI_COMM_WORLD);
MPI_Send(&c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2,
MPI_COMM_WORLD);
}
for (i=0; i<NRA; i++)
free(a[i]);
free(a);
for (i=0; i<NCA; i++)
free(b[i]);
free(b);
for (i=0; i<NRA; i++)
free(c[i]);
free(c);
MPI_Finalize();
}
解决方案:link to GitHub 正确代码
MPI_Send and MPI_recv 第一个参数是 const void *
所以你需要改变:
MPI_Send(&b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
到
MPI_Send(b, NCA * NCB, MPI_DOUBLE, dest, FROM_MASTER + 3, MPI_COMM_WORLD);
和
MPI_Send(&c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2, MPI_COMM_WORLD);
到
MPI_Send(c, rows * NCB, MPI_DOUBLE, MASTER, FROM_WORKER + 2, MPI_COMM_WORLD);
您遇到的另一个问题是您正在分配一个指针数组:
double **alloc_2d_int(int rows, int cols) {
double **array= (double **)malloc(rows*sizeof(double*));
for (int i=0; i<rows; i++)
array[i] = (double *)malloc(rows*cols*sizeof(double));
return array;
}
但是send/recv在MPI_Send
和MPI_Recv
上的数据假设是连续的。要解决此问题,您可以创建一个 continuously 2D array,只需将矩阵表示为数组,创建 MPI 自定义类型等。