程序停在 MPI_Send
Program stops at MPI_Send
当我用超过 1 个处理器执行程序时,程序停止工作。
它首先停止 MPI_Send
我做错了什么?
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SIZE 200000
#define SIZE2 256
#define VYVOD 1
int main(int argc, char *argv[])
{
int NX, NT;
double TK, UM, DX, DY, DT;
double starttime, endtime;
int numnode, rank, delta=0, ierr, NXnode;
double **U;
double **U1;
double *sosed1;
double *sosed2;
int i, j, k;
MPI_Status stats;
NX = 1*(SIZE2+1);
TK = 20.00;
UM = 10.0;
DX = 0.1;
DY = DX;
DT = 0.1;
NT = (TK/DT);
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numnode);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
if(rank == 0)
printf("\nTotal nodes: %d\n", numnode);
NX = NX - 2;
NXnode = (NX-(NX%numnode))/numnode;
if (rank < (NX%numnode))
{
delta = rank * NXnode + rank + 1;
NXnode++;
}
else
{
delta = rank * NXnode + (NX%numnode) + 1;
}
if(rank == 0){
printf("Order counting complete, NXnode = %d\n", NXnode);
}
U = (double**)malloc(NXnode*sizeof(double*));
U1 = (double**)malloc(NXnode*sizeof(double*));
sosed1 = (double*)malloc(SIZE*sizeof(double));
sosed2 = (double*)malloc(SIZE*sizeof(double));
for (i=0; i < NXnode; i++)
{
U[i] = (double*)malloc(SIZE*sizeof(double));
U[i][0]=0;
U[i][SIZE-1]=0;
U1[i] = (double*)malloc(SIZE*sizeof(double));
U1[i][0]=0;
U1[i][SIZE-1]=0;
if (U[i]==NULL || U1[i]==NULL)
{
printf("Error at memory allocation!");
return 1;
}
}
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
starttime = MPI_Wtime();
printf("Array allocation complete\n");
}
for (i = 0; i < NXnode; i++)
{
for (j = 1; j < SIZE-1; j++)
{
if ((delta)<=(NXnode/2))
{
U1[i][j]=2*(UM/NXnode)*(delta+i);
}
else
{
U1[i][j]=-2*(UM/NXnode) + 2*UM;
}
}
}
printf("Array init 1 complete, rank %d\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
if (rank > 0)
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD, &stats);
}
else
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed1[initInd]=0;
}
}
if (rank < (numnode-1))
{
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
}
else
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
}
}
printf("Send complete, rank %d\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
printf("Array init complete, rank %d\n", rank);
for (k = 1; k <= NT; k++)
{
int cycle = 0;
for (cycle=1; cycle < SIZE-1; cycle++)
{
U[0][cycle] = U1[0][cycle] + DT/(DX*DX)*(U1[1][cycle]-2*U1[0][cycle]+sosed1[cycle])+DT/(DY*DY)*(U1[0][cycle+1]+U1[0][cycle-1]-(U1[0][cycle]*2));
}
for (i=1; i<NXnode-1; i++)
{
for(j=1; j<SIZE-1; j++)
{
U[i][j] = U1[i][j] + DT/(DX*DX)*(U1[i+1][j]-2*U1[i][j]+U[i-1][j])+DT/(DY*DY)*(U1[i][j+1]+U1[i][j-1]-(U1[i][j]*2));
}
}
for (cycle=1; cycle < SIZE-1; cycle++)
{
U[NXnode-1][cycle]=U1[NXnode-1][cycle]+DT/(DX*DX)*(sosed2[cycle]-2*U1[NXnode-1][cycle]+U1[NXnode-2][cycle])+DT/(DY*DY)*(U1[NXnode-1][cycle+1]+U1[NXnode-1][cycle-1]-(U1[NXnode-1][cycle]*2));
}
/*U[0] = U1[0]+DT/(DX*DX)*(U1[0+1]-2*U1[0]+sosed1);
for (j = 0; j<NXnode; j++)
{
U[j]=U1[j]+DT/(DX*DX)*(U1[j+1]-2*U1[j]+U1[j-1]);
}
U[NXnode-1]=U1[NXnode-1]+DT/(DX*DX)*(sosed2-2*U1[NXnode-1]+U1[(NXnode-1)-1]);*/
if (rank > 0)
{
MPI_Send(&(U[0][0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD);
}
if (rank < (numnode-1))
{
MPI_Send(&(U[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD);
}
if (rank > 0)
{
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
}
if (rank < (numnode-1))
{
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
}
for (i = 0; i<NXnode; i++)
{
for (j=0; j<SIZE; j++)
{
U1[i][j]=U[i][j];
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
printf("Array count complete, rank %d\n", rank);
if (rank == 0)
{
endtime=MPI_Wtime();
printf("\n## TIME: %f\n", endtime-starttime);
}
MPI_Finalize();
}
更新#1
试过了,所以第0位是第一个,还是不行:
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0 && numnode > 1)
{
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed1[initInd]=0;
}
}
else if (rank == 0)
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
sosed1[initInd]=0;
}
}
else if (rank < (numnode-1))
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
}
else if (rank == (numnode - 1))
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
}
}
更新#2
已解决,对所有 Send/Recv.
使用相同的标签
MPI_Send
会阻塞执行,直到相应的 MPI_Recv
被调用(大概在另一个进程中)。
在你的程序中,除 rank=0 之外的所有进程都在第一个屏障之后立即调用 MPI_Send
,并且没有人准备好 Recv
消息,因此 MPI_Send
无限阻塞.本质上,每个进程都在等待其消息被具有较低等级的进程接受(等级 2 正在等待等级 1,等级 1 正在等待等级 0),等级 0 根本不接受任何消息(它去到下一个代码块并依次调用 MPI_Send
),所以一切都挂起。
您似乎缺少 rank=0 进程的通信部分(它应该做类似 MPI_Recv(from rank 1); ...; MPI_Send(to rank 1);
.
的事情
另一件事是,您使用带有标签 1
的 MPI_Send
,但使用标签 0
调用 MPI_Recv
。这不会耦合。您需要使用相同的标签,或在接收操作中指定MPI_TAG_ANY
。
当我用超过 1 个处理器执行程序时,程序停止工作。
它首先停止 MPI_Send
我做错了什么?
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SIZE 200000
#define SIZE2 256
#define VYVOD 1
int main(int argc, char *argv[])
{
int NX, NT;
double TK, UM, DX, DY, DT;
double starttime, endtime;
int numnode, rank, delta=0, ierr, NXnode;
double **U;
double **U1;
double *sosed1;
double *sosed2;
int i, j, k;
MPI_Status stats;
NX = 1*(SIZE2+1);
TK = 20.00;
UM = 10.0;
DX = 0.1;
DY = DX;
DT = 0.1;
NT = (TK/DT);
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numnode);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
if(rank == 0)
printf("\nTotal nodes: %d\n", numnode);
NX = NX - 2;
NXnode = (NX-(NX%numnode))/numnode;
if (rank < (NX%numnode))
{
delta = rank * NXnode + rank + 1;
NXnode++;
}
else
{
delta = rank * NXnode + (NX%numnode) + 1;
}
if(rank == 0){
printf("Order counting complete, NXnode = %d\n", NXnode);
}
U = (double**)malloc(NXnode*sizeof(double*));
U1 = (double**)malloc(NXnode*sizeof(double*));
sosed1 = (double*)malloc(SIZE*sizeof(double));
sosed2 = (double*)malloc(SIZE*sizeof(double));
for (i=0; i < NXnode; i++)
{
U[i] = (double*)malloc(SIZE*sizeof(double));
U[i][0]=0;
U[i][SIZE-1]=0;
U1[i] = (double*)malloc(SIZE*sizeof(double));
U1[i][0]=0;
U1[i][SIZE-1]=0;
if (U[i]==NULL || U1[i]==NULL)
{
printf("Error at memory allocation!");
return 1;
}
}
MPI_Barrier(MPI_COMM_WORLD);
if(rank == 0){
starttime = MPI_Wtime();
printf("Array allocation complete\n");
}
for (i = 0; i < NXnode; i++)
{
for (j = 1; j < SIZE-1; j++)
{
if ((delta)<=(NXnode/2))
{
U1[i][j]=2*(UM/NXnode)*(delta+i);
}
else
{
U1[i][j]=-2*(UM/NXnode) + 2*UM;
}
}
}
printf("Array init 1 complete, rank %d\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
if (rank > 0)
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD, &stats);
}
else
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed1[initInd]=0;
}
}
if (rank < (numnode-1))
{
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
}
else
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
}
}
printf("Send complete, rank %d\n", rank);
MPI_Barrier(MPI_COMM_WORLD);
printf("Array init complete, rank %d\n", rank);
for (k = 1; k <= NT; k++)
{
int cycle = 0;
for (cycle=1; cycle < SIZE-1; cycle++)
{
U[0][cycle] = U1[0][cycle] + DT/(DX*DX)*(U1[1][cycle]-2*U1[0][cycle]+sosed1[cycle])+DT/(DY*DY)*(U1[0][cycle+1]+U1[0][cycle-1]-(U1[0][cycle]*2));
}
for (i=1; i<NXnode-1; i++)
{
for(j=1; j<SIZE-1; j++)
{
U[i][j] = U1[i][j] + DT/(DX*DX)*(U1[i+1][j]-2*U1[i][j]+U[i-1][j])+DT/(DY*DY)*(U1[i][j+1]+U1[i][j-1]-(U1[i][j]*2));
}
}
for (cycle=1; cycle < SIZE-1; cycle++)
{
U[NXnode-1][cycle]=U1[NXnode-1][cycle]+DT/(DX*DX)*(sosed2[cycle]-2*U1[NXnode-1][cycle]+U1[NXnode-2][cycle])+DT/(DY*DY)*(U1[NXnode-1][cycle+1]+U1[NXnode-1][cycle-1]-(U1[NXnode-1][cycle]*2));
}
/*U[0] = U1[0]+DT/(DX*DX)*(U1[0+1]-2*U1[0]+sosed1);
for (j = 0; j<NXnode; j++)
{
U[j]=U1[j]+DT/(DX*DX)*(U1[j+1]-2*U1[j]+U1[j-1]);
}
U[NXnode-1]=U1[NXnode-1]+DT/(DX*DX)*(sosed2-2*U1[NXnode-1]+U1[(NXnode-1)-1]);*/
if (rank > 0)
{
MPI_Send(&(U[0][0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD);
}
if (rank < (numnode-1))
{
MPI_Send(&(U[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD);
}
if (rank > 0)
{
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
}
if (rank < (numnode-1))
{
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
}
for (i = 0; i<NXnode; i++)
{
for (j=0; j<SIZE; j++)
{
U1[i][j]=U[i][j];
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
printf("Array count complete, rank %d\n", rank);
if (rank == 0)
{
endtime=MPI_Wtime();
printf("\n## TIME: %f\n", endtime-starttime);
}
MPI_Finalize();
}
更新#1 试过了,所以第0位是第一个,还是不行:
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0 && numnode > 1)
{
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed1[initInd]=0;
}
}
else if (rank == 0)
{
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
sosed1[initInd]=0;
}
}
else if (rank < (numnode-1))
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
MPI_Recv(&(sosed2[0]), SIZE, MPI_DOUBLE , rank+1, 0, MPI_COMM_WORLD, &stats);
MPI_Send(&(U1[NXnode-1][0]), SIZE, MPI_DOUBLE , rank+1, 1, MPI_COMM_WORLD);
}
else if (rank == (numnode - 1))
{
MPI_Send(&(U1[0][0]), SIZE, MPI_DOUBLE , rank-1, 1, MPI_COMM_WORLD);
MPI_Recv(&(sosed1[0]), SIZE, MPI_DOUBLE , rank-1, 0, MPI_COMM_WORLD, &stats);
int initInd = 0;
for (initInd = 0; initInd < SIZE; initInd++)
{
sosed2[initInd]=0;
}
}
更新#2 已解决,对所有 Send/Recv.
使用相同的标签MPI_Send
会阻塞执行,直到相应的 MPI_Recv
被调用(大概在另一个进程中)。
在你的程序中,除 rank=0 之外的所有进程都在第一个屏障之后立即调用 MPI_Send
,并且没有人准备好 Recv
消息,因此 MPI_Send
无限阻塞.本质上,每个进程都在等待其消息被具有较低等级的进程接受(等级 2 正在等待等级 1,等级 1 正在等待等级 0),等级 0 根本不接受任何消息(它去到下一个代码块并依次调用 MPI_Send
),所以一切都挂起。
您似乎缺少 rank=0 进程的通信部分(它应该做类似 MPI_Recv(from rank 1); ...; MPI_Send(to rank 1);
.
另一件事是,您使用带有标签 1
的 MPI_Send
,但使用标签 0
调用 MPI_Recv
。这不会耦合。您需要使用相同的标签,或在接收操作中指定MPI_TAG_ANY
。