mpi_allreduce 对导出的数据类型向量求和
mpi_allreduce sum over a derived datatype vector
我正在尝试减少(求和)由 MPI_type_vector 创建的派生数据类型。当我 运行 时,代码崩溃并抱怨减少 MPI_SUM 没有为非固有数据类型定义。
我写了一段简单的代码来说明我的问题。该代码尝试减少 3*3 矩阵的对角线元素:
#include "mpi.h"
#include <stdio.h>
int main(int argc, char *argv[]) {
int rank, size, i, j;
double a[3][3] ;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Datatype diag3;
MPI_Type_vector(3,1,4,MPI_DOUBLE,&diag3);
MPI_Type_commit(&diag3);
if(rank==0)
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
a[i][j]=1;
if(rank==1)
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
a[i][j]=-1;
MPI_Allreduce( MPI_IN_PLACE, &a[0][0], 1, diag3, MPI_SUM, MPI_COMM_WORLD );
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
printf("rank=%d\ta[%d][%d]=%f\n",rank,i,j,a[i][j]);
MPI_Finalize();
}
运行之后的错误是这样的:
*** An error occurred in MPI_Allreduce: the reduction operation MPI_SUM is not defined for non-intrinsic datatypes
*** reported by process [140130307538945,1]
*** on communicator MPI_COMM_WORLD
*** MPI_ERR_OP: invalid reduce operation
我认为 Reduce 和 MPI_SUM 可以像 MPI 文档所说的那样在派生数据类型上执行。那么,代码中存在什么问题?
我认为错误是因为没有定义的方法来添加您创建的向量。如果您定义自己的求和运算:
#include "mpi.h"
#include <stdio.h>
void mySum ( int *, int *, int *, MPI_Datatype * );
void mySum(int *invec, int *inoutvec, int *len, MPI_Datatype *dtype)
{
int i;
for ( i=0; i<*len; i++ )
inoutvec[i] += invec[i];
}
int main(int argc, char *argv[]) {
int rank, size, i, j;
double a[3][3] ;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Datatype diag3;
MPI_Type_vector(3,1,4,MPI_DOUBLE,&diag3);
MPI_Type_commit(&diag3);
MPI_Op diagSum;
MPI_Op_create( (MPI_User_function *)mySum, 1, &diagSum );
if(rank==0)
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
a[i][j]=i+j;
if(rank==1)
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
a[i][j]=-1;
MPI_Allreduce( MPI_IN_PLACE, &a[0][0], 1, diag3, diagSum, MPI_COMM_WORLD );
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
printf("rank=%d\ta[%d][%d]=%f\n",rank,i,j,a[i][j]);
MPI_Op_free( &diagSum );
MPI_Finalize();
}
Ed Smith 是对的,您需要定义自己的操作;但对于非连续类型,它需要比他列出的版本更复杂一些。下面我们有一个 add_double_vector
函数,它将解码任何 double_vector 类型并对其进行操作;它相对直接地扩展到 len > 1.
#include "mpi.h"
#include <stdio.h>
void add_double_vector(void *in, void *inout, int *len, MPI_Datatype *dtype)
{
double *invec = in;
double *inoutvec = inout;
int nints, naddresses, ntypes;
int combiner;
if (*len != 1) {
fprintf(stderr,"my_add: len>1 not implemented.\n");
return;
}
MPI_Type_get_envelope(*dtype, &nints, &naddresses, &ntypes, &combiner);
if (combiner != MPI_COMBINER_VECTOR) {
fprintf(stderr,"my_add: do not understand composite datatype.\n");
return;
}
int vecargs [nints];
MPI_Aint vecaddrs[naddresses];
MPI_Datatype vectypes[ntypes];
MPI_Type_get_contents(*dtype, nints, naddresses, ntypes,
vecargs, vecaddrs, vectypes);
if (vectypes[0] != MPI_DOUBLE) {
fprintf(stderr,"my_add: not a vector of DOUBLEs.\n");
}
int count = vecargs[0];
int blocklen = vecargs[1];
int stride = vecargs[2];
for ( int i=0; i<count; i++ ) {
for ( int j=0; j<blocklen; j++) {
inoutvec[i*stride+j] += invec[i*stride+j];
}
}
}
int main(int argc, char *argv[]) {
int rank, size, i, j;
const int n=3;
double a[n][n] ;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Datatype diag3;
MPI_Type_vector(n,1,n+1,MPI_DOUBLE,&diag3);
MPI_Type_commit(&diag3);
if(rank==0)
for(i=0; i < n ; i++)
for(j=0; j < n ; j++)
a[i][j]=1;
if(rank==1)
for(i=0; i < n ; i++)
for(j=0; j < n ; j++)
a[i][j]=-1;
MPI_Op vector_add;
MPI_Op_create( add_double_vector, 1, &vector_add );
MPI_Allreduce( MPI_IN_PLACE, &a[0][0], 1, diag3, vector_add, MPI_COMM_WORLD );
MPI_Op_free( &vector_add );
for(i=0; i < n ; i++)
for(j=0; j < n ; j++)
printf("rank=%d\ta[%d][%d]=%f\n",rank,i,j,a[i][j]);
MPI_Finalize();
}
编译并运行给出正确答案:
$ mpicc -o foo foo.c -std=c99
$ mpirun -np 2 ./foo
rank=1 a[0][0]=0.000000
rank=1 a[0][1]=-1.000000
rank=1 a[0][2]=-1.000000
rank=1 a[1][0]=-1.000000
rank=1 a[1][1]=0.000000
rank=1 a[1][2]=-1.000000
rank=1 a[2][0]=-1.000000
rank=1 a[2][1]=-1.000000
rank=1 a[2][2]=0.000000
rank=0 a[0][0]=0.000000
rank=0 a[0][1]=1.000000
rank=0 a[0][2]=1.000000
rank=0 a[1][0]=1.000000
rank=0 a[1][1]=0.000000
rank=0 a[1][2]=1.000000
rank=0 a[2][0]=1.000000
rank=0 a[2][1]=1.000000
rank=0 a[2][2]=0.000000
我正在尝试减少(求和)由 MPI_type_vector 创建的派生数据类型。当我 运行 时,代码崩溃并抱怨减少 MPI_SUM 没有为非固有数据类型定义。 我写了一段简单的代码来说明我的问题。该代码尝试减少 3*3 矩阵的对角线元素:
#include "mpi.h"
#include <stdio.h>
int main(int argc, char *argv[]) {
int rank, size, i, j;
double a[3][3] ;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Datatype diag3;
MPI_Type_vector(3,1,4,MPI_DOUBLE,&diag3);
MPI_Type_commit(&diag3);
if(rank==0)
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
a[i][j]=1;
if(rank==1)
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
a[i][j]=-1;
MPI_Allreduce( MPI_IN_PLACE, &a[0][0], 1, diag3, MPI_SUM, MPI_COMM_WORLD );
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
printf("rank=%d\ta[%d][%d]=%f\n",rank,i,j,a[i][j]);
MPI_Finalize();
}
运行之后的错误是这样的:
*** An error occurred in MPI_Allreduce: the reduction operation MPI_SUM is not defined for non-intrinsic datatypes
*** reported by process [140130307538945,1]
*** on communicator MPI_COMM_WORLD
*** MPI_ERR_OP: invalid reduce operation
我认为 Reduce 和 MPI_SUM 可以像 MPI 文档所说的那样在派生数据类型上执行。那么,代码中存在什么问题?
我认为错误是因为没有定义的方法来添加您创建的向量。如果您定义自己的求和运算:
#include "mpi.h"
#include <stdio.h>
void mySum ( int *, int *, int *, MPI_Datatype * );
void mySum(int *invec, int *inoutvec, int *len, MPI_Datatype *dtype)
{
int i;
for ( i=0; i<*len; i++ )
inoutvec[i] += invec[i];
}
int main(int argc, char *argv[]) {
int rank, size, i, j;
double a[3][3] ;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Datatype diag3;
MPI_Type_vector(3,1,4,MPI_DOUBLE,&diag3);
MPI_Type_commit(&diag3);
MPI_Op diagSum;
MPI_Op_create( (MPI_User_function *)mySum, 1, &diagSum );
if(rank==0)
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
a[i][j]=i+j;
if(rank==1)
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
a[i][j]=-1;
MPI_Allreduce( MPI_IN_PLACE, &a[0][0], 1, diag3, diagSum, MPI_COMM_WORLD );
for(i=0; i < 3 ; i++)
for(j=0; j < 3 ; j++)
printf("rank=%d\ta[%d][%d]=%f\n",rank,i,j,a[i][j]);
MPI_Op_free( &diagSum );
MPI_Finalize();
}
Ed Smith 是对的,您需要定义自己的操作;但对于非连续类型,它需要比他列出的版本更复杂一些。下面我们有一个 add_double_vector
函数,它将解码任何 double_vector 类型并对其进行操作;它相对直接地扩展到 len > 1.
#include "mpi.h"
#include <stdio.h>
void add_double_vector(void *in, void *inout, int *len, MPI_Datatype *dtype)
{
double *invec = in;
double *inoutvec = inout;
int nints, naddresses, ntypes;
int combiner;
if (*len != 1) {
fprintf(stderr,"my_add: len>1 not implemented.\n");
return;
}
MPI_Type_get_envelope(*dtype, &nints, &naddresses, &ntypes, &combiner);
if (combiner != MPI_COMBINER_VECTOR) {
fprintf(stderr,"my_add: do not understand composite datatype.\n");
return;
}
int vecargs [nints];
MPI_Aint vecaddrs[naddresses];
MPI_Datatype vectypes[ntypes];
MPI_Type_get_contents(*dtype, nints, naddresses, ntypes,
vecargs, vecaddrs, vectypes);
if (vectypes[0] != MPI_DOUBLE) {
fprintf(stderr,"my_add: not a vector of DOUBLEs.\n");
}
int count = vecargs[0];
int blocklen = vecargs[1];
int stride = vecargs[2];
for ( int i=0; i<count; i++ ) {
for ( int j=0; j<blocklen; j++) {
inoutvec[i*stride+j] += invec[i*stride+j];
}
}
}
int main(int argc, char *argv[]) {
int rank, size, i, j;
const int n=3;
double a[n][n] ;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Datatype diag3;
MPI_Type_vector(n,1,n+1,MPI_DOUBLE,&diag3);
MPI_Type_commit(&diag3);
if(rank==0)
for(i=0; i < n ; i++)
for(j=0; j < n ; j++)
a[i][j]=1;
if(rank==1)
for(i=0; i < n ; i++)
for(j=0; j < n ; j++)
a[i][j]=-1;
MPI_Op vector_add;
MPI_Op_create( add_double_vector, 1, &vector_add );
MPI_Allreduce( MPI_IN_PLACE, &a[0][0], 1, diag3, vector_add, MPI_COMM_WORLD );
MPI_Op_free( &vector_add );
for(i=0; i < n ; i++)
for(j=0; j < n ; j++)
printf("rank=%d\ta[%d][%d]=%f\n",rank,i,j,a[i][j]);
MPI_Finalize();
}
编译并运行给出正确答案:
$ mpicc -o foo foo.c -std=c99
$ mpirun -np 2 ./foo
rank=1 a[0][0]=0.000000
rank=1 a[0][1]=-1.000000
rank=1 a[0][2]=-1.000000
rank=1 a[1][0]=-1.000000
rank=1 a[1][1]=0.000000
rank=1 a[1][2]=-1.000000
rank=1 a[2][0]=-1.000000
rank=1 a[2][1]=-1.000000
rank=1 a[2][2]=0.000000
rank=0 a[0][0]=0.000000
rank=0 a[0][1]=1.000000
rank=0 a[0][2]=1.000000
rank=0 a[1][0]=1.000000
rank=0 a[1][1]=0.000000
rank=0 a[1][2]=1.000000
rank=0 a[2][0]=1.000000
rank=0 a[2][1]=1.000000
rank=0 a[2][2]=0.000000