使用 OpenMPI 和调试提示并行计算数组的总和
Parallel calculation of the sum of an array with OpenMPI & Debugging tips
我正在尝试使用 OpenMPI 将串行程序转换为并行程序。我使用以下简单代码来计算数组的总和,并尝试在多个节点上将其转换为 运行,但我在 运行 期间得到一个 MPI_ERROR 我不知道如何调试。
这是我编写的用于串行计算的简单代码。
//array serial
#include <stdio.h>
#define SIZE 50000
int main(int argc, char *argv[])
{
int i, sum, Tsum, data[SIZE];
for (int i = 0; i < SIZE; ++i)
{
data[i] = i + 1;
}
Tsum = 0;
for (int i = 0; i < SIZE; ++i)
{
Tsum = Tsum + data[i];
}
printf("Total Sum: %d \n", Tsum);
return 0;
}
这就是我使用 OpenMPI 转换为并行的方式
//array parallel
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <mpi.h>
#define SIZE 50000
MPI_Status status;
int main(int argc, char *argv[])
{
int size, proc, rank, dest, index, i, source, chunksize, sum, Tsum;
int data1[SIZE], data2[SIZE];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
proc = size;
chunksize = SIZE / proc;
if (rank == 0)
{
for (int i = 0; i < SIZE; ++i)
{
data1[i] = i+1;
}
index = 0;
for (dest = 1; dest <= proc; ++dest)
{
MPI_Send(&index, 1, MPI_INT, dest, 0, MPI_COMM_WORLD);
MPI_Send(&data1[index], chunksize, MPI_INT, dest, 0, MPI_COMM_WORLD);
index = index + chunksize;
}
Tsum = 0;
for (int i = 0; i < proc; ++i)
{
source = 1;
MPI_Recv(&sum, 1, MPI_INT, source, 0, MPI_COMM_WORLD, &status);
Tsum = Tsum + sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n", source, sum);
}
printf("Total Sum received: %d \n All done \n", Tsum);
}
else if (rank > 0)
{
MPI_Recv(&index, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&data2[index], chunksize, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
sum = 0;
for (int i = index; i < index + chunksize; ++i)
{
sum = sum + data2[i];
}
MPI_Send(&sum, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
这是我在尝试 运行 时遇到的错误
$mpirun -np 2 /lab/ap709
$[andrew709:4092] *** An error occurred in MPI_Send
$[andrew709:4092] *** reported by process [512884737,0]
$[andrew709:4092] *** on communicator MPI_COMM_WORLD
$[andrew709:4092] *** MPI_ERR_RANK: invalid rank
$[andrew709:4092] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
$[andrew709:4092] *** and potentially your MPI job)
另外,我想问一下,有没有办法调试并行应用程序。在学校里,即使我们整个学期 类 都学习了 C、C++ 和 Java,但我们从未学会如何调试。发生错误时,我们会搜索源代码,试图找出问题所在。在空闲时间,我在 GDB 方面的经验非常有限(最多几个小时...),但我不知道它在并行应用程序中有何用处。
行
for (dest = 1; dest <= proc; ++dest)
应该是
for (dest = 1; dest < proc; ++dest)
您正在尝试发送到标签为 2 的等级,但只有 2 个等级,您只有等级 0 和 1
此外,当您在等级 0 上接收时,您有:
for (int i = 0; i < proc; ++i)
这意味着您打算从自己那里接收,但您没有相应的发送。
你有几个错误。
第一个根循环是错误的,所以 -np 2
你会发送到排名 2。
第二个根循环是错误的 -- 它应该匹配更正后的第一个循环。
第三个错误,只在 -np
大于 2 时出现,根 MPI_Recv
是错误的。程序会挂起。
我已经更正了程序并注释了来源。我将基本代码移到了一个函数中。每次我进行修复时,我都会在修复中添加一个新函数,这样您就可以看到调试过程的历史记录。
由于您还需要调试方面的帮助,我添加了用于查找问题的调试代码。我以前用过很多次这种代码。
我还在底部添加了输出跟踪文件。
更新:还有第四个bug。如果 -np
值减 1 而不是 平均除以 SIZE
,最后的一些元素将不会被求和(例如 -np 7
)。我添加了更多注释并添加了修复此问题的附加功能。
无论如何,这是代码[请原谅不必要的样式清理]:
//array parallel
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <mpi.h>
typedef long long s64;
double tvzero; // initial start time
int opt_debug; // 1=trace
int opt_run; // version to run
int size; // MPI size
int rank; // MPI rank of current process
int proc; // number of child processes
int chunksize; // size of chunk
MPI_Status status;
#define TVSEC 1000000000
// tvget -- get time in nanoseconds
s64
tvget(void)
{
struct timespec ts;
s64 nsec;
clock_gettime(CLOCK_REALTIME,&ts);
nsec = ts.tv_sec;
nsec *= TVSEC;
nsec += ts.tv_nsec;
return nsec;
}
// tvgetf -- get time in fractional seconds
double
tvgetf(void)
{
struct timespec ts;
double sec;
clock_gettime(CLOCK_REALTIME,&ts);
sec = ts.tv_nsec;
sec /= TVSEC;
sec += ts.tv_sec;
return sec;
}
void
_dbgprt(int lno,const char *fmt,...)
__attribute__((__format__(__printf__,2,3)));
#define dbgprt(_lvl,_fmt...) \
do { \
if (opt_debug >= _lvl) \
_dbgprt(__LINE__,_fmt); \
} while (0)
void
_dbgprt(int lno,const char *fmt,...)
{
va_list ap;
double tvnow;
char *bp;
char bf[1000];
bp = bf;
tvnow = tvgetf();
tvnow -= tvzero;
bp += sprintf(bp,"[%.9f/R:%d/L:%d] ",tvnow,rank,lno);
va_start(ap,fmt);
bp += vsprintf(bp,fmt,ap);
va_end(ap);
fputs(bf,stdout);
}
// _dbgptr -- interpret pointer
char *
_dbgptr(const void *ptr,const char *sym,MPI_Datatype typ)
{
char *bp;
static char bf[100];
bp = bf;
*bp = 0;
do {
bp += sprintf(bp,"%s=",sym);
if (typ == MPI_INT) {
bp += sprintf(bp,"%d",*(int *) ptr);
break;
}
bp += sprintf(bp,"%p",ptr);
} while (0);
return bf;
}
#define xMPI_Send(_ptr,_cnt,_typ,_torank,_tag,_comm) \
do { \
dbgprt(2,"MPI_Send ptr=[%s] cnt=%d typ=%s torank=%d tag=%d comm=%s\n", \
_dbgptr(_ptr,#_ptr,_typ),_cnt,#_typ,_torank,_tag,#_comm); \
MPI_Send(_ptr,_cnt,_typ,_torank,_tag,_comm); \
} while (0)
#define xMPI_Recv(_ptr,_cnt,_typ,_fmrank,_tag,_comm,_status) \
do { \
dbgprt(2,"MPI_Recv ptr=%p cnt=%d typ=%s fmrank=%d tag=%d comm=%s\n", \
_ptr,_cnt,#_typ,_fmrank,_tag,#_comm); \
MPI_Recv(_ptr,_cnt,_typ,_fmrank,_tag,_comm,_status); \
} while (0)
#define xMPI_Finalize() \
do { \
dbgprt(1,"MPI_Finalize\n"); \
MPI_Finalize(); \
} while (0)
#define SIZE 50000
int data1[SIZE];
int data2[SIZE];
// init -- initialize array and return sum
int
init(void)
{
int i;
int Tsum;
for (i = 0; i < SIZE; ++i)
data1[i] = i + 1;
Tsum = 0;
for (i = 0; i < SIZE; ++i)
Tsum += data1[i];
printf("Total Sum: %d \n",Tsum);
return Tsum;
}
// run_orig -- original code with all bugs noted
void
run_orig(void)
{
int i;
int dest;
int source;
int Tsum;
int Rsum;
int sum;
int index;
// BUG(1a): this should be one less (e.g. for -np 2, root=0, child=1)
proc = size;
// BUG(4): if SIZE is not a multiple of proc, the last few elements will
// _not_ be processed -- the last child must get a larger chunk size, so in
// addition to sending the index to each child, we must send it a custom
// chunk size
chunksize = SIZE / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
// BUG(1b): this loop goes one beyond the last child and caused the send
// to blow up
index = 0;
for (dest = 1; dest <= proc; ++dest) {
xMPI_Send(&index,1,MPI_INT,dest,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],chunksize,MPI_INT,dest,0,MPI_COMM_WORLD);
index = index + chunksize;
}
// collect sum of children
// BUG(2): this for loop is wrong -- it should match the one above
Rsum = 0;
for (i = 0; i < proc; ++i) {
// BUG(3): source should be "i"
source = 1;
xMPI_Recv(&sum,1,MPI_INT,source,0,MPI_COMM_WORLD,&status);
Rsum = Rsum + sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",source,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
// run_edit1 -- no changes to original
void
run_edit1(void)
{
int i;
int cldno;
int Tsum;
int Rsum;
int sum;
int index;
int source;
proc = size;
chunksize = SIZE / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
index = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Send(&index,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],chunksize,MPI_INT,cldno,0,MPI_COMM_WORLD);
index = index + chunksize;
}
// collect sum of children
Rsum = 0;
for (cldno = 0; cldno < proc; ++cldno) {
source = 1;
xMPI_Recv(&sum,1,MPI_INT,source,0,MPI_COMM_WORLD,&status);
Rsum += sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",cldno,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
// run_edit2 -- fixed child count
void
run_edit2(void)
{
int i;
int cldno;
int Tsum;
int Rsum;
int sum;
int index;
int source;
// BUGFIX #1 -- child count must be one less than size
proc = size - 1;
chunksize = SIZE / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
index = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Send(&index,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],chunksize,MPI_INT,cldno,0,MPI_COMM_WORLD);
index = index + chunksize;
}
// collect sum of children
Rsum = 0;
for (cldno = 0; cldno < proc; ++cldno) {
source = 1;
xMPI_Recv(&sum,1,MPI_INT,source,0,MPI_COMM_WORLD,&status);
Rsum += sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",cldno,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
// run_edit3 -- fixed second root loop and MPI_Recv
void
run_edit3(void)
{
int i;
int cldno;
int Tsum;
int Rsum;
int sum;
int index;
// BUGFIX #1 -- child count must be one less than size
proc = size - 1;
chunksize = SIZE / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
index = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Send(&index,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],chunksize,MPI_INT,cldno,0,MPI_COMM_WORLD);
index = index + chunksize;
}
// collect sum of children
// BUGFIX #2 -- loop was wrong and the source arg must be the child rank
Rsum = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Recv(&sum,1,MPI_INT,cldno,0,MPI_COMM_WORLD,&status);
Rsum += sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",cldno,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
// run_edit4 -- fixed chunk size allocation
void
run_edit4(void)
{
int i;
int cldno;
int Tsum;
int Rsum;
int sum;
int sendsize;
int totsize;
int index;
// BUGFIX #1 -- child count must be one less than size
proc = size - 1;
totsize = SIZE;
chunksize = totsize / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
index = 0;
for (cldno = 1; cldno <= proc; ++cldno, totsize -= sendsize) {
// BUGFIX #3a -- not every child node gets the same amount
if (cldno < proc)
sendsize = chunksize;
else
sendsize = totsize;
xMPI_Send(&index,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&sendsize,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],sendsize,MPI_INT,cldno,0,MPI_COMM_WORLD);
index = index + sendsize;
}
// collect sum of children
// BUGFIX #2 -- loop was wrong and the source arg must be the child rank
Rsum = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Recv(&sum,1,MPI_INT,cldno,0,MPI_COMM_WORLD,&status);
Rsum += sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",cldno,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
// BUGFIX #3b -- not every child node gets the same amount
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&chunksize,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
int
main(int argc,char **argv)
{
char *cp;
// NOTE: this will vary a bit with rank
// to do the job properly we'd need the root to bcast this -- see below
tvzero = tvgetf();
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
cp = *argv;
if (*cp != '-')
break;
switch (cp[1]) {
case 'R':
cp += 2;
opt_run = (*cp != 0) ? atoi(cp) : 1;
break;
case 'T':
cp += 2;
opt_debug = (*cp != 0) ? atoi(cp) : 1;
break;
}
}
// root send to all children
if (opt_debug)
MPI_Bcast(&tvzero,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
// show our pid so we can map it to rank [which is more meaningful]
dbgprt(1,"PID pid=%d\n",getpid());
switch (opt_run) {
case 1:
run_edit1();
break;
case 2:
run_edit2();
break;
case 3:
run_edit3();
break;
case 4:
run_edit4();
break;
default:
break;
}
xMPI_Finalize();
return 0;
}
这里是日志[合二为一保存space]:
# test #1 edit #1 with -np=2 -T9
[manderly:6620] *** An error occurred in MPI_Send
[manderly:6620] *** reported by process [3255828481,0]
[manderly:6620] *** on communicator MPI_COMM_WORLD
[manderly:6620] *** MPI_ERR_RANK: invalid rank
[manderly:6620] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[manderly:6620] *** and potentially your MPI job)
[0.014549255/R:0/L:480] PID pid=6620
[0.014554262/R:1/L:480] PID pid=6621
[0.014572620/R:1/L:259] MPI_Recv ptr=0x7ffc98d18a6c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
Total Sum: 1250025000
[0.014715672/R:0/L:239] MPI_Send ptr=[&index=0] cnt=1 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.014730692/R:0/L:240] MPI_Send ptr=[&data1[index]=1] cnt=25000 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.014730930/R:1/L:260] MPI_Recv ptr=0x603960 cnt=25000 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.014817476/R:0/L:239] MPI_Send ptr=[&index=25000] cnt=1 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.014842749/R:1/L:266] MPI_Send ptr=[&sum=312512500] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.014861822/R:1/L:499] MPI_Finalize
# test #2 edit #2 with -np=2 -T0
Total Sum: 1250025000
- - - - - - - - - - - - - -
Received from process 0, Sum: 1250025000
Total Sum received: 1250025000 -- PASS
# test #3 edit #2 with -np=5 -T9
[0.028285980/R:0/L:480] PID pid=6632
[0.028294086/R:1/L:480] PID pid=6633
[0.028315544/R:1/L:313] MPI_Recv ptr=0x7ffe22a554ec cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028294325/R:4/L:480] PID pid=6637
[0.028332472/R:4/L:313] MPI_Recv ptr=0x7ffd5ed6a77c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028387308/R:2/L:480] PID pid=6634
[0.028410435/R:2/L:313] MPI_Recv ptr=0x7fff6f23896c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028449297/R:3/L:480] PID pid=6635
[0.028471947/R:3/L:313] MPI_Recv ptr=0x7ffd23af2ecc cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
Total Sum: 1250025000
[0.028527975/R:0/L:293] MPI_Send ptr=[&index=0] cnt=1 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.028543711/R:0/L:294] MPI_Send ptr=[&data1[index]=1] cnt=12500 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.028544903/R:1/L:314] MPI_Recv ptr=0x603960 cnt=12500 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028627157/R:0/L:293] MPI_Send ptr=[&index=12500] cnt=1 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.028636694/R:0/L:294] MPI_Send ptr=[&data1[index]=12501] cnt=12500 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.028637648/R:2/L:314] MPI_Recv ptr=0x60fcb0 cnt=12500 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028641224/R:1/L:320] MPI_Send ptr=[&sum=78131250] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.028657198/R:1/L:499] MPI_Finalize
[0.028701305/R:0/L:293] MPI_Send ptr=[&index=25000] cnt=1 typ=MPI_INT torank=3 tag=0 comm=MPI_COMM_WORLD
[0.028716326/R:0/L:294] MPI_Send ptr=[&data1[index]=25001] cnt=12500 typ=MPI_INT torank=3 tag=0 comm=MPI_COMM_WORLD
[0.028719425/R:2/L:320] MPI_Send ptr=[&sum=234381250] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.028721094/R:3/L:314] MPI_Recv ptr=0x61c000 cnt=12500 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028738737/R:2/L:499] MPI_Finalize
[0.028776169/R:0/L:293] MPI_Send ptr=[&index=37500] cnt=1 typ=MPI_INT torank=4 tag=0 comm=MPI_COMM_WORLD
[0.028784752/R:0/L:294] MPI_Send ptr=[&data1[index]=37501] cnt=12500 typ=MPI_INT torank=4 tag=0 comm=MPI_COMM_WORLD
[0.028797865/R:3/L:320] MPI_Send ptr=[&sum=390631250] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.028819084/R:3/L:499] MPI_Finalize
[0.028877974/R:4/L:314] MPI_Recv ptr=0x628350 cnt=12500 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028944492/R:0/L:302] MPI_Recv ptr=0x7ffe7db6e4b8 cnt=1 typ=MPI_INT fmrank=1 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 0, Sum: 78131250
[0.028969765/R:0/L:302] MPI_Recv ptr=0x7ffe7db6e4b8 cnt=1 typ=MPI_INT fmrank=1 tag=0 comm=MPI_COMM_WORLD
[0.028957367/R:4/L:320] MPI_Send ptr=[&sum=546881250] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.028982162/R:4/L:499] MPI_Finalize
TIMEOUT!!!
# test #4 edit #3 with -np=5 -T0
Total Sum: 1250025000
- - - - - - - - - - - - - -
Received from process 1, Sum: 78131250
- - - - - - - - - - - - - -
Received from process 2, Sum: 234381250
- - - - - - - - - - - - - -
Received from process 3, Sum: 390631250
- - - - - - - - - - - - - -
Received from process 4, Sum: 546881250
Total Sum received: 1250025000 -- PASS
# test #5 edit #3 with -np=7 -T9
[0.043676138/R:0/L:480] PID pid=6662
[0.043689251/R:1/L:480] PID pid=6663
[0.043709040/R:1/L:366] MPI_Recv ptr=0x7ffc571a085c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043683767/R:4/L:480] PID pid=6666
[0.043709040/R:4/L:366] MPI_Recv ptr=0x7ffd9e8ea57c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043860435/R:3/L:480] PID pid=6665
[0.043884993/R:3/L:366] MPI_Recv ptr=0x7ffd1f26c0fc cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043950558/R:5/L:480] PID pid=6669
[0.043974400/R:5/L:366] MPI_Recv ptr=0x7ffc171ddf2c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043961048/R:2/L:480] PID pid=6664
[0.043980122/R:2/L:366] MPI_Recv ptr=0x7ffde0a06dec cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043968439/R:6/L:480] PID pid=6670
[0.043987513/R:6/L:366] MPI_Recv ptr=0x7ffe4afba88c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
Total Sum: 1250025000
[0.044004679/R:0/L:346] MPI_Send ptr=[&index=0] cnt=1 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.044021130/R:0/L:347] MPI_Send ptr=[&data1[index]=1] cnt=8333 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.044066429/R:1/L:367] MPI_Recv ptr=0x603960 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044160843/R:0/L:346] MPI_Send ptr=[&index=8333] cnt=1 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.044172764/R:0/L:347] MPI_Send ptr=[&data1[index]=8334] cnt=8333 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.044173002/R:2/L:367] MPI_Recv ptr=0x60bb94 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044213533/R:1/L:373] MPI_Send ptr=[&sum=34723611] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044217110/R:0/L:346] MPI_Send ptr=[&index=16666] cnt=1 typ=MPI_INT torank=3 tag=0 comm=MPI_COMM_WORLD
[0.044228792/R:0/L:347] MPI_Send ptr=[&data1[index]=16667] cnt=8333 typ=MPI_INT torank=3 tag=0 comm=MPI_COMM_WORLD
[0.044233561/R:2/L:373] MPI_Send ptr=[&sum=104162500] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044246435/R:2/L:499] MPI_Finalize
[0.044233799/R:3/L:367] MPI_Recv ptr=0x613dc8 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044275522/R:0/L:346] MPI_Send ptr=[&index=24999] cnt=1 typ=MPI_INT torank=4 tag=0 comm=MPI_COMM_WORLD
[0.044286489/R:4/L:367] MPI_Recv ptr=0x61bffc cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044291496/R:3/L:373] MPI_Send ptr=[&sum=173601389] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044314146/R:3/L:499] MPI_Finalize
[0.044286251/R:0/L:347] MPI_Send ptr=[&data1[index]=25000] cnt=8333 typ=MPI_INT torank=4 tag=0 comm=MPI_COMM_WORLD
[0.044338703/R:0/L:346] MPI_Send ptr=[&index=33332] cnt=1 typ=MPI_INT torank=5 tag=0 comm=MPI_COMM_WORLD
[0.044353485/R:0/L:347] MPI_Send ptr=[&data1[index]=33333] cnt=8333 typ=MPI_INT torank=5 tag=0 comm=MPI_COMM_WORLD
[0.044402599/R:0/L:346] MPI_Send ptr=[&index=41665] cnt=1 typ=MPI_INT torank=6 tag=0 comm=MPI_COMM_WORLD
[0.044354916/R:4/L:373] MPI_Send ptr=[&sum=243040278] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044372797/R:4/L:499] MPI_Finalize
[0.044359684/R:5/L:367] MPI_Recv ptr=0x624230 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044411659/R:5/L:373] MPI_Send ptr=[&sum=312479167] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044427156/R:5/L:499] MPI_Finalize
[0.044423819/R:6/L:367] MPI_Recv ptr=0x62c464 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044469357/R:6/L:373] MPI_Send ptr=[&sum=381918056] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044484377/R:6/L:499] MPI_Finalize
[0.044419050/R:0/L:347] MPI_Send ptr=[&data1[index]=41666] cnt=8333 typ=MPI_INT torank=6 tag=0 comm=MPI_COMM_WORLD
[0.044459820/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=1 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 1, Sum: 34723611
[0.044672012/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=2 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 2, Sum: 104162500
[0.044713497/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=3 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 3, Sum: 173601389
[0.044723034/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=4 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 4, Sum: 243040278
[0.044659376/R:1/L:499] MPI_Finalize
[0.044735193/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=5 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 5, Sum: 312479167
[0.044743538/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=6 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 6, Sum: 381918056
Total Sum received: 1249925001 -- FAIL
[0.044760466/R:0/L:499] MPI_Finalize
# test #6 edit #4 with -np=7 -T0
Total Sum: 1250025000
- - - - - - - - - - - - - -
Received from process 1, Sum: 34723611
- - - - - - - - - - - - - -
Received from process 2, Sum: 104162500
- - - - - - - - - - - - - -
Received from process 3, Sum: 173601389
- - - - - - - - - - - - - -
Received from process 4, Sum: 243040278
- - - - - - - - - - - - - -
Received from process 5, Sum: 312479167
- - - - - - - - - - - - - -
Received from process 6, Sum: 382018055
Total Sum received: 1250025000 -- PASS
我正在尝试使用 OpenMPI 将串行程序转换为并行程序。我使用以下简单代码来计算数组的总和,并尝试在多个节点上将其转换为 运行,但我在 运行 期间得到一个 MPI_ERROR 我不知道如何调试。
这是我编写的用于串行计算的简单代码。
//array serial
#include <stdio.h>
#define SIZE 50000
int main(int argc, char *argv[])
{
int i, sum, Tsum, data[SIZE];
for (int i = 0; i < SIZE; ++i)
{
data[i] = i + 1;
}
Tsum = 0;
for (int i = 0; i < SIZE; ++i)
{
Tsum = Tsum + data[i];
}
printf("Total Sum: %d \n", Tsum);
return 0;
}
这就是我使用 OpenMPI 转换为并行的方式
//array parallel
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <mpi.h>
#define SIZE 50000
MPI_Status status;
int main(int argc, char *argv[])
{
int size, proc, rank, dest, index, i, source, chunksize, sum, Tsum;
int data1[SIZE], data2[SIZE];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
proc = size;
chunksize = SIZE / proc;
if (rank == 0)
{
for (int i = 0; i < SIZE; ++i)
{
data1[i] = i+1;
}
index = 0;
for (dest = 1; dest <= proc; ++dest)
{
MPI_Send(&index, 1, MPI_INT, dest, 0, MPI_COMM_WORLD);
MPI_Send(&data1[index], chunksize, MPI_INT, dest, 0, MPI_COMM_WORLD);
index = index + chunksize;
}
Tsum = 0;
for (int i = 0; i < proc; ++i)
{
source = 1;
MPI_Recv(&sum, 1, MPI_INT, source, 0, MPI_COMM_WORLD, &status);
Tsum = Tsum + sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n", source, sum);
}
printf("Total Sum received: %d \n All done \n", Tsum);
}
else if (rank > 0)
{
MPI_Recv(&index, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
MPI_Recv(&data2[index], chunksize, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
sum = 0;
for (int i = index; i < index + chunksize; ++i)
{
sum = sum + data2[i];
}
MPI_Send(&sum, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
这是我在尝试 运行 时遇到的错误
$mpirun -np 2 /lab/ap709
$[andrew709:4092] *** An error occurred in MPI_Send
$[andrew709:4092] *** reported by process [512884737,0]
$[andrew709:4092] *** on communicator MPI_COMM_WORLD
$[andrew709:4092] *** MPI_ERR_RANK: invalid rank
$[andrew709:4092] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
$[andrew709:4092] *** and potentially your MPI job)
另外,我想问一下,有没有办法调试并行应用程序。在学校里,即使我们整个学期 类 都学习了 C、C++ 和 Java,但我们从未学会如何调试。发生错误时,我们会搜索源代码,试图找出问题所在。在空闲时间,我在 GDB 方面的经验非常有限(最多几个小时...),但我不知道它在并行应用程序中有何用处。
行
for (dest = 1; dest <= proc; ++dest)
应该是
for (dest = 1; dest < proc; ++dest)
您正在尝试发送到标签为 2 的等级,但只有 2 个等级,您只有等级 0 和 1
此外,当您在等级 0 上接收时,您有:
for (int i = 0; i < proc; ++i)
这意味着您打算从自己那里接收,但您没有相应的发送。
你有几个错误。
第一个根循环是错误的,所以 -np 2
你会发送到排名 2。
第二个根循环是错误的 -- 它应该匹配更正后的第一个循环。
第三个错误,只在 -np
大于 2 时出现,根 MPI_Recv
是错误的。程序会挂起。
我已经更正了程序并注释了来源。我将基本代码移到了一个函数中。每次我进行修复时,我都会在修复中添加一个新函数,这样您就可以看到调试过程的历史记录。
由于您还需要调试方面的帮助,我添加了用于查找问题的调试代码。我以前用过很多次这种代码。
我还在底部添加了输出跟踪文件。
更新:还有第四个bug。如果 -np
值减 1 而不是 平均除以 SIZE
,最后的一些元素将不会被求和(例如 -np 7
)。我添加了更多注释并添加了修复此问题的附加功能。
无论如何,这是代码[请原谅不必要的样式清理]:
//array parallel
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <mpi.h>
typedef long long s64;
double tvzero; // initial start time
int opt_debug; // 1=trace
int opt_run; // version to run
int size; // MPI size
int rank; // MPI rank of current process
int proc; // number of child processes
int chunksize; // size of chunk
MPI_Status status;
#define TVSEC 1000000000
// tvget -- get time in nanoseconds
s64
tvget(void)
{
struct timespec ts;
s64 nsec;
clock_gettime(CLOCK_REALTIME,&ts);
nsec = ts.tv_sec;
nsec *= TVSEC;
nsec += ts.tv_nsec;
return nsec;
}
// tvgetf -- get time in fractional seconds
double
tvgetf(void)
{
struct timespec ts;
double sec;
clock_gettime(CLOCK_REALTIME,&ts);
sec = ts.tv_nsec;
sec /= TVSEC;
sec += ts.tv_sec;
return sec;
}
void
_dbgprt(int lno,const char *fmt,...)
__attribute__((__format__(__printf__,2,3)));
#define dbgprt(_lvl,_fmt...) \
do { \
if (opt_debug >= _lvl) \
_dbgprt(__LINE__,_fmt); \
} while (0)
void
_dbgprt(int lno,const char *fmt,...)
{
va_list ap;
double tvnow;
char *bp;
char bf[1000];
bp = bf;
tvnow = tvgetf();
tvnow -= tvzero;
bp += sprintf(bp,"[%.9f/R:%d/L:%d] ",tvnow,rank,lno);
va_start(ap,fmt);
bp += vsprintf(bp,fmt,ap);
va_end(ap);
fputs(bf,stdout);
}
// _dbgptr -- interpret pointer
char *
_dbgptr(const void *ptr,const char *sym,MPI_Datatype typ)
{
char *bp;
static char bf[100];
bp = bf;
*bp = 0;
do {
bp += sprintf(bp,"%s=",sym);
if (typ == MPI_INT) {
bp += sprintf(bp,"%d",*(int *) ptr);
break;
}
bp += sprintf(bp,"%p",ptr);
} while (0);
return bf;
}
#define xMPI_Send(_ptr,_cnt,_typ,_torank,_tag,_comm) \
do { \
dbgprt(2,"MPI_Send ptr=[%s] cnt=%d typ=%s torank=%d tag=%d comm=%s\n", \
_dbgptr(_ptr,#_ptr,_typ),_cnt,#_typ,_torank,_tag,#_comm); \
MPI_Send(_ptr,_cnt,_typ,_torank,_tag,_comm); \
} while (0)
#define xMPI_Recv(_ptr,_cnt,_typ,_fmrank,_tag,_comm,_status) \
do { \
dbgprt(2,"MPI_Recv ptr=%p cnt=%d typ=%s fmrank=%d tag=%d comm=%s\n", \
_ptr,_cnt,#_typ,_fmrank,_tag,#_comm); \
MPI_Recv(_ptr,_cnt,_typ,_fmrank,_tag,_comm,_status); \
} while (0)
#define xMPI_Finalize() \
do { \
dbgprt(1,"MPI_Finalize\n"); \
MPI_Finalize(); \
} while (0)
#define SIZE 50000
int data1[SIZE];
int data2[SIZE];
// init -- initialize array and return sum
int
init(void)
{
int i;
int Tsum;
for (i = 0; i < SIZE; ++i)
data1[i] = i + 1;
Tsum = 0;
for (i = 0; i < SIZE; ++i)
Tsum += data1[i];
printf("Total Sum: %d \n",Tsum);
return Tsum;
}
// run_orig -- original code with all bugs noted
void
run_orig(void)
{
int i;
int dest;
int source;
int Tsum;
int Rsum;
int sum;
int index;
// BUG(1a): this should be one less (e.g. for -np 2, root=0, child=1)
proc = size;
// BUG(4): if SIZE is not a multiple of proc, the last few elements will
// _not_ be processed -- the last child must get a larger chunk size, so in
// addition to sending the index to each child, we must send it a custom
// chunk size
chunksize = SIZE / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
// BUG(1b): this loop goes one beyond the last child and caused the send
// to blow up
index = 0;
for (dest = 1; dest <= proc; ++dest) {
xMPI_Send(&index,1,MPI_INT,dest,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],chunksize,MPI_INT,dest,0,MPI_COMM_WORLD);
index = index + chunksize;
}
// collect sum of children
// BUG(2): this for loop is wrong -- it should match the one above
Rsum = 0;
for (i = 0; i < proc; ++i) {
// BUG(3): source should be "i"
source = 1;
xMPI_Recv(&sum,1,MPI_INT,source,0,MPI_COMM_WORLD,&status);
Rsum = Rsum + sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",source,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
// run_edit1 -- no changes to original
void
run_edit1(void)
{
int i;
int cldno;
int Tsum;
int Rsum;
int sum;
int index;
int source;
proc = size;
chunksize = SIZE / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
index = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Send(&index,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],chunksize,MPI_INT,cldno,0,MPI_COMM_WORLD);
index = index + chunksize;
}
// collect sum of children
Rsum = 0;
for (cldno = 0; cldno < proc; ++cldno) {
source = 1;
xMPI_Recv(&sum,1,MPI_INT,source,0,MPI_COMM_WORLD,&status);
Rsum += sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",cldno,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
// run_edit2 -- fixed child count
void
run_edit2(void)
{
int i;
int cldno;
int Tsum;
int Rsum;
int sum;
int index;
int source;
// BUGFIX #1 -- child count must be one less than size
proc = size - 1;
chunksize = SIZE / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
index = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Send(&index,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],chunksize,MPI_INT,cldno,0,MPI_COMM_WORLD);
index = index + chunksize;
}
// collect sum of children
Rsum = 0;
for (cldno = 0; cldno < proc; ++cldno) {
source = 1;
xMPI_Recv(&sum,1,MPI_INT,source,0,MPI_COMM_WORLD,&status);
Rsum += sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",cldno,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
// run_edit3 -- fixed second root loop and MPI_Recv
void
run_edit3(void)
{
int i;
int cldno;
int Tsum;
int Rsum;
int sum;
int index;
// BUGFIX #1 -- child count must be one less than size
proc = size - 1;
chunksize = SIZE / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
index = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Send(&index,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],chunksize,MPI_INT,cldno,0,MPI_COMM_WORLD);
index = index + chunksize;
}
// collect sum of children
// BUGFIX #2 -- loop was wrong and the source arg must be the child rank
Rsum = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Recv(&sum,1,MPI_INT,cldno,0,MPI_COMM_WORLD,&status);
Rsum += sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",cldno,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
// run_edit4 -- fixed chunk size allocation
void
run_edit4(void)
{
int i;
int cldno;
int Tsum;
int Rsum;
int sum;
int sendsize;
int totsize;
int index;
// BUGFIX #1 -- child count must be one less than size
proc = size - 1;
totsize = SIZE;
chunksize = totsize / proc;
if (rank == 0) {
Tsum = init();
// send split of array to children
index = 0;
for (cldno = 1; cldno <= proc; ++cldno, totsize -= sendsize) {
// BUGFIX #3a -- not every child node gets the same amount
if (cldno < proc)
sendsize = chunksize;
else
sendsize = totsize;
xMPI_Send(&index,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&sendsize,1,MPI_INT,cldno,0,MPI_COMM_WORLD);
xMPI_Send(&data1[index],sendsize,MPI_INT,cldno,0,MPI_COMM_WORLD);
index = index + sendsize;
}
// collect sum of children
// BUGFIX #2 -- loop was wrong and the source arg must be the child rank
Rsum = 0;
for (cldno = 1; cldno <= proc; ++cldno) {
xMPI_Recv(&sum,1,MPI_INT,cldno,0,MPI_COMM_WORLD,&status);
Rsum += sum;
printf("- - - - - - - - - - - - - -\n");
printf("Received from process %d, Sum: %d \n",cldno,sum);
}
printf("Total Sum received: %d -- %s\n",
Rsum,(Rsum == Tsum) ? "PASS" : "FAIL");
}
else {
// BUGFIX #3b -- not every child node gets the same amount
xMPI_Recv(&index,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&chunksize,1,MPI_INT,0,0,MPI_COMM_WORLD,&status);
xMPI_Recv(&data2[index],chunksize,MPI_INT,0,0,MPI_COMM_WORLD,&status);
sum = 0;
for (i = index; i < index + chunksize; ++i)
sum += data2[i];
xMPI_Send(&sum,1,MPI_INT,0,0,MPI_COMM_WORLD);
}
}
int
main(int argc,char **argv)
{
char *cp;
// NOTE: this will vary a bit with rank
// to do the job properly we'd need the root to bcast this -- see below
tvzero = tvgetf();
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
cp = *argv;
if (*cp != '-')
break;
switch (cp[1]) {
case 'R':
cp += 2;
opt_run = (*cp != 0) ? atoi(cp) : 1;
break;
case 'T':
cp += 2;
opt_debug = (*cp != 0) ? atoi(cp) : 1;
break;
}
}
// root send to all children
if (opt_debug)
MPI_Bcast(&tvzero,1,MPI_DOUBLE,0,MPI_COMM_WORLD);
// show our pid so we can map it to rank [which is more meaningful]
dbgprt(1,"PID pid=%d\n",getpid());
switch (opt_run) {
case 1:
run_edit1();
break;
case 2:
run_edit2();
break;
case 3:
run_edit3();
break;
case 4:
run_edit4();
break;
default:
break;
}
xMPI_Finalize();
return 0;
}
这里是日志[合二为一保存space]:
# test #1 edit #1 with -np=2 -T9
[manderly:6620] *** An error occurred in MPI_Send
[manderly:6620] *** reported by process [3255828481,0]
[manderly:6620] *** on communicator MPI_COMM_WORLD
[manderly:6620] *** MPI_ERR_RANK: invalid rank
[manderly:6620] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[manderly:6620] *** and potentially your MPI job)
[0.014549255/R:0/L:480] PID pid=6620
[0.014554262/R:1/L:480] PID pid=6621
[0.014572620/R:1/L:259] MPI_Recv ptr=0x7ffc98d18a6c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
Total Sum: 1250025000
[0.014715672/R:0/L:239] MPI_Send ptr=[&index=0] cnt=1 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.014730692/R:0/L:240] MPI_Send ptr=[&data1[index]=1] cnt=25000 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.014730930/R:1/L:260] MPI_Recv ptr=0x603960 cnt=25000 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.014817476/R:0/L:239] MPI_Send ptr=[&index=25000] cnt=1 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.014842749/R:1/L:266] MPI_Send ptr=[&sum=312512500] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.014861822/R:1/L:499] MPI_Finalize
# test #2 edit #2 with -np=2 -T0
Total Sum: 1250025000
- - - - - - - - - - - - - -
Received from process 0, Sum: 1250025000
Total Sum received: 1250025000 -- PASS
# test #3 edit #2 with -np=5 -T9
[0.028285980/R:0/L:480] PID pid=6632
[0.028294086/R:1/L:480] PID pid=6633
[0.028315544/R:1/L:313] MPI_Recv ptr=0x7ffe22a554ec cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028294325/R:4/L:480] PID pid=6637
[0.028332472/R:4/L:313] MPI_Recv ptr=0x7ffd5ed6a77c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028387308/R:2/L:480] PID pid=6634
[0.028410435/R:2/L:313] MPI_Recv ptr=0x7fff6f23896c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028449297/R:3/L:480] PID pid=6635
[0.028471947/R:3/L:313] MPI_Recv ptr=0x7ffd23af2ecc cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
Total Sum: 1250025000
[0.028527975/R:0/L:293] MPI_Send ptr=[&index=0] cnt=1 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.028543711/R:0/L:294] MPI_Send ptr=[&data1[index]=1] cnt=12500 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.028544903/R:1/L:314] MPI_Recv ptr=0x603960 cnt=12500 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028627157/R:0/L:293] MPI_Send ptr=[&index=12500] cnt=1 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.028636694/R:0/L:294] MPI_Send ptr=[&data1[index]=12501] cnt=12500 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.028637648/R:2/L:314] MPI_Recv ptr=0x60fcb0 cnt=12500 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028641224/R:1/L:320] MPI_Send ptr=[&sum=78131250] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.028657198/R:1/L:499] MPI_Finalize
[0.028701305/R:0/L:293] MPI_Send ptr=[&index=25000] cnt=1 typ=MPI_INT torank=3 tag=0 comm=MPI_COMM_WORLD
[0.028716326/R:0/L:294] MPI_Send ptr=[&data1[index]=25001] cnt=12500 typ=MPI_INT torank=3 tag=0 comm=MPI_COMM_WORLD
[0.028719425/R:2/L:320] MPI_Send ptr=[&sum=234381250] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.028721094/R:3/L:314] MPI_Recv ptr=0x61c000 cnt=12500 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028738737/R:2/L:499] MPI_Finalize
[0.028776169/R:0/L:293] MPI_Send ptr=[&index=37500] cnt=1 typ=MPI_INT torank=4 tag=0 comm=MPI_COMM_WORLD
[0.028784752/R:0/L:294] MPI_Send ptr=[&data1[index]=37501] cnt=12500 typ=MPI_INT torank=4 tag=0 comm=MPI_COMM_WORLD
[0.028797865/R:3/L:320] MPI_Send ptr=[&sum=390631250] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.028819084/R:3/L:499] MPI_Finalize
[0.028877974/R:4/L:314] MPI_Recv ptr=0x628350 cnt=12500 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.028944492/R:0/L:302] MPI_Recv ptr=0x7ffe7db6e4b8 cnt=1 typ=MPI_INT fmrank=1 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 0, Sum: 78131250
[0.028969765/R:0/L:302] MPI_Recv ptr=0x7ffe7db6e4b8 cnt=1 typ=MPI_INT fmrank=1 tag=0 comm=MPI_COMM_WORLD
[0.028957367/R:4/L:320] MPI_Send ptr=[&sum=546881250] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.028982162/R:4/L:499] MPI_Finalize
TIMEOUT!!!
# test #4 edit #3 with -np=5 -T0
Total Sum: 1250025000
- - - - - - - - - - - - - -
Received from process 1, Sum: 78131250
- - - - - - - - - - - - - -
Received from process 2, Sum: 234381250
- - - - - - - - - - - - - -
Received from process 3, Sum: 390631250
- - - - - - - - - - - - - -
Received from process 4, Sum: 546881250
Total Sum received: 1250025000 -- PASS
# test #5 edit #3 with -np=7 -T9
[0.043676138/R:0/L:480] PID pid=6662
[0.043689251/R:1/L:480] PID pid=6663
[0.043709040/R:1/L:366] MPI_Recv ptr=0x7ffc571a085c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043683767/R:4/L:480] PID pid=6666
[0.043709040/R:4/L:366] MPI_Recv ptr=0x7ffd9e8ea57c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043860435/R:3/L:480] PID pid=6665
[0.043884993/R:3/L:366] MPI_Recv ptr=0x7ffd1f26c0fc cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043950558/R:5/L:480] PID pid=6669
[0.043974400/R:5/L:366] MPI_Recv ptr=0x7ffc171ddf2c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043961048/R:2/L:480] PID pid=6664
[0.043980122/R:2/L:366] MPI_Recv ptr=0x7ffde0a06dec cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.043968439/R:6/L:480] PID pid=6670
[0.043987513/R:6/L:366] MPI_Recv ptr=0x7ffe4afba88c cnt=1 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
Total Sum: 1250025000
[0.044004679/R:0/L:346] MPI_Send ptr=[&index=0] cnt=1 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.044021130/R:0/L:347] MPI_Send ptr=[&data1[index]=1] cnt=8333 typ=MPI_INT torank=1 tag=0 comm=MPI_COMM_WORLD
[0.044066429/R:1/L:367] MPI_Recv ptr=0x603960 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044160843/R:0/L:346] MPI_Send ptr=[&index=8333] cnt=1 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.044172764/R:0/L:347] MPI_Send ptr=[&data1[index]=8334] cnt=8333 typ=MPI_INT torank=2 tag=0 comm=MPI_COMM_WORLD
[0.044173002/R:2/L:367] MPI_Recv ptr=0x60bb94 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044213533/R:1/L:373] MPI_Send ptr=[&sum=34723611] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044217110/R:0/L:346] MPI_Send ptr=[&index=16666] cnt=1 typ=MPI_INT torank=3 tag=0 comm=MPI_COMM_WORLD
[0.044228792/R:0/L:347] MPI_Send ptr=[&data1[index]=16667] cnt=8333 typ=MPI_INT torank=3 tag=0 comm=MPI_COMM_WORLD
[0.044233561/R:2/L:373] MPI_Send ptr=[&sum=104162500] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044246435/R:2/L:499] MPI_Finalize
[0.044233799/R:3/L:367] MPI_Recv ptr=0x613dc8 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044275522/R:0/L:346] MPI_Send ptr=[&index=24999] cnt=1 typ=MPI_INT torank=4 tag=0 comm=MPI_COMM_WORLD
[0.044286489/R:4/L:367] MPI_Recv ptr=0x61bffc cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044291496/R:3/L:373] MPI_Send ptr=[&sum=173601389] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044314146/R:3/L:499] MPI_Finalize
[0.044286251/R:0/L:347] MPI_Send ptr=[&data1[index]=25000] cnt=8333 typ=MPI_INT torank=4 tag=0 comm=MPI_COMM_WORLD
[0.044338703/R:0/L:346] MPI_Send ptr=[&index=33332] cnt=1 typ=MPI_INT torank=5 tag=0 comm=MPI_COMM_WORLD
[0.044353485/R:0/L:347] MPI_Send ptr=[&data1[index]=33333] cnt=8333 typ=MPI_INT torank=5 tag=0 comm=MPI_COMM_WORLD
[0.044402599/R:0/L:346] MPI_Send ptr=[&index=41665] cnt=1 typ=MPI_INT torank=6 tag=0 comm=MPI_COMM_WORLD
[0.044354916/R:4/L:373] MPI_Send ptr=[&sum=243040278] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044372797/R:4/L:499] MPI_Finalize
[0.044359684/R:5/L:367] MPI_Recv ptr=0x624230 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044411659/R:5/L:373] MPI_Send ptr=[&sum=312479167] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044427156/R:5/L:499] MPI_Finalize
[0.044423819/R:6/L:367] MPI_Recv ptr=0x62c464 cnt=8333 typ=MPI_INT fmrank=0 tag=0 comm=MPI_COMM_WORLD
[0.044469357/R:6/L:373] MPI_Send ptr=[&sum=381918056] cnt=1 typ=MPI_INT torank=0 tag=0 comm=MPI_COMM_WORLD
[0.044484377/R:6/L:499] MPI_Finalize
[0.044419050/R:0/L:347] MPI_Send ptr=[&data1[index]=41666] cnt=8333 typ=MPI_INT torank=6 tag=0 comm=MPI_COMM_WORLD
[0.044459820/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=1 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 1, Sum: 34723611
[0.044672012/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=2 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 2, Sum: 104162500
[0.044713497/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=3 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 3, Sum: 173601389
[0.044723034/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=4 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 4, Sum: 243040278
[0.044659376/R:1/L:499] MPI_Finalize
[0.044735193/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=5 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 5, Sum: 312479167
[0.044743538/R:0/L:355] MPI_Recv ptr=0x7fffd9e14708 cnt=1 typ=MPI_INT fmrank=6 tag=0 comm=MPI_COMM_WORLD
- - - - - - - - - - - - - -
Received from process 6, Sum: 381918056
Total Sum received: 1249925001 -- FAIL
[0.044760466/R:0/L:499] MPI_Finalize
# test #6 edit #4 with -np=7 -T0
Total Sum: 1250025000
- - - - - - - - - - - - - -
Received from process 1, Sum: 34723611
- - - - - - - - - - - - - -
Received from process 2, Sum: 104162500
- - - - - - - - - - - - - -
Received from process 3, Sum: 173601389
- - - - - - - - - - - - - -
Received from process 4, Sum: 243040278
- - - - - - - - - - - - - -
Received from process 5, Sum: 312479167
- - - - - - - - - - - - - -
Received from process 6, Sum: 382018055
Total Sum received: 1250025000 -- PASS