MPI Reduce 和 Broadcast 工作,但导致未来 return 值失败
MPI Reduce and Broadcast work, but cause a future return value to fail
我正在使用 MPI 为 class 实施 Dijkstras 算法。我的老师也不知道为什么这会被破坏,并允许我在这里 post。
我的问题出在 chooseVertex 函数中。该程序适用于 1 个处理器,但是当我 运行 使用 2 个处理器时,处理器 0 无法 return leastPostition,即使我能够在 [= 之前的行上打印 leastPosition 的内容21=]。
我的代码:
#include "mpi.h"
#include <stdlib.h>
#include <stdio.h>
#define min(x,y) ((x) > (y) ? (y) : (x))
#define MASTER 0
#define INFINTY 100000
void dijkstra(int, int, int **, int *, int, int);
int chooseVertex(int *, int, int *, int, int);
int main(int argc, char* argv[])
{
int rank, size, i, j;
//Initialize MPI
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
//Initialize graph
int src = 0;
int n = 12;
int **edge = (int**) malloc(n * sizeof(int *));
for (i = 0; i < n; i++)
edge[i] = (int *)malloc(n * sizeof(int));
int dist[12];
//Set all graph lengths to infinity
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
if (i == j) { edge[i][j] = 0; }
else { edge[i][j] = INFINTY; }
}
}
//set graph edge lengths
edge[0][3] = 5;
edge[0][6] = 13;
edge[1][5] = 12;
edge[2][1] = 7;
edge[3][2] = 9;
edge[3][4] = 2;
edge[4][7] = 3;
edge[5][10] = 1;
edge[5][11] = 4;
edge[6][9] = 9;
edge[7][8] = 4;
edge[8][9] = 10;
edge[8][10] = 7;
edge[9][10] = 6;
edge[10][11] = 1;
dijkstra(src, n, edge, dist, rank, size);
if(rank == MASTER){ printf("The distance is %d", dist[n - 1]); }
MPI_Finalize();
return 0;
}
//called by dijkstras function below
int chooseVertex(int *dist, int n, int *found, int rank, int size) {
int i, tmp, partition, lower, upper, leastPosition;
int least = INFINTY;
//set the number of nodes wach processor will work with
partition = n / size;
lower = rank * partition;
upper = lower + partition;
//used for MPI_Reduce
struct {
int pos;
int val;
} sendBuffr, recvBuffr;
//calculate least position
for (i = lower; i < upper; i++) {
tmp = dist[i];
if ((!found[i]) && (tmp < least)) {
least = tmp;
leastPosition = i;
}
}
//if all nodes checked are INFINITY, go with last node checked
if (least == INFINTY) leastPosition = i;
//set the send buffer for MPI_Reduce
sendBuffr.val = least;
sendBuffr.pos = leastPosition;
//Rank 0 processor has correct least position and value
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_DOUBLE_INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
if (rank == MASTER) leastPosition = recvBuffr.pos;
//Update all processors to have correct position
MPI_Bcast(&leastPosition, 1, MPI_INT, MASTER, MPI_COMM_WORLD);
//Print the contents of leastPosition on rank 0 for debugging
if(rank == MASTER) printf("LeastPosition for rank %d is: %d\n", rank, leastPosition);
fflush(stdout);
return leastPosition;
}
void dijkstra(int SOURCE, int n, int **edge, int *dist, int rank, int size)
{
int i, j, count, partition, lower, upper, *found, *sendBuffer;
j = INFINTY;
sendBuffer = (int *)malloc(n * sizeof(int));
found = (int *)calloc(n, sizeof(int));
partition = n / size;
lower = rank * partition;
upper = lower + partition;
//set the distance array
for (i = 0; i < n; i++) {
found[i] = 0;
dist[i] = edge[SOURCE][i];
sendBuffer[i] = dist[i];
}
found[SOURCE] = 1;
count = 1;
//Dijkstra loop
while (count < n) {
printf("before ChooseVertex: rank %d reporting\n", rank);
fflush(stdout);
j = chooseVertex(dist, n, found, rank, size);
printf("after ChooseVertex: rank %d reporting\n", rank);
fflush(stdout);
count++;
found[j] = 1;
for (i = lower; i < upper; i++) {
if (!found[i])
{
dist[i] = min(dist[i], dist[j] + edge[j][i]);
sendBuffer[i] = dist[i];
}
}
MPI_Reduce(sendBuffer, dist, n, MPI_INT, MPI_MIN, MASTER, MPI_COMM_WORLD);
MPI_Bcast(dist, n, MPI_INT, MASTER, MPI_COMM_WORLD);
}
}
示例错误消息:
before ChooseVertex: rank 1 reporting
before ChooseVertex: rank 0 reporting
LeastPosition for rank 0 is: 3
after ChooseVertex: rank 1 reporting
after ChooseVertex: rank 0 reporting
before ChooseVertex: rank 1 reporting
before ChooseVertex: rank 0 reporting
after ChooseVertex: rank 1 reporting
LeastPosition for rank 0 is: 4
after ChooseVertex: rank 0 reporting
before ChooseVertex: rank 0 reporting
before ChooseVertex: rank 1 reporting
LeastPosition for rank 0 is: 7
after ChooseVertex: rank 1 reporting
job aborted:
[ranks] message
[0] process exited without calling finalize
[1] terminated
---- error analysis -----
[0] on My-ComputerName
Assignmet3PP ended prematurely and may have crashed. exit code 3
---- error analysis -----
你的减少命令是:
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_DOUBLE_INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
通过使用 MPI_DOUBLE_INT
,您表示您正在发送一个包含两个变量的结构:一个 double
后跟一个 int
。然而,这不是您的结构:您只有 2 个整数。因此你应该使用 MPI_2INT
。这些类型源自 this source。或者,您可以使用向量创建自己的类型。
示例修复是:
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_2INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
此外,可以通过 MPI_Allreduce()
.
轻松地将缩减和广播合并为一个步骤
我正在使用 MPI 为 class 实施 Dijkstras 算法。我的老师也不知道为什么这会被破坏,并允许我在这里 post。
我的问题出在 chooseVertex 函数中。该程序适用于 1 个处理器,但是当我 运行 使用 2 个处理器时,处理器 0 无法 return leastPostition,即使我能够在 [= 之前的行上打印 leastPosition 的内容21=]。 我的代码:
#include "mpi.h"
#include <stdlib.h>
#include <stdio.h>
#define min(x,y) ((x) > (y) ? (y) : (x))
#define MASTER 0
#define INFINTY 100000
void dijkstra(int, int, int **, int *, int, int);
int chooseVertex(int *, int, int *, int, int);
int main(int argc, char* argv[])
{
int rank, size, i, j;
//Initialize MPI
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
//Initialize graph
int src = 0;
int n = 12;
int **edge = (int**) malloc(n * sizeof(int *));
for (i = 0; i < n; i++)
edge[i] = (int *)malloc(n * sizeof(int));
int dist[12];
//Set all graph lengths to infinity
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
if (i == j) { edge[i][j] = 0; }
else { edge[i][j] = INFINTY; }
}
}
//set graph edge lengths
edge[0][3] = 5;
edge[0][6] = 13;
edge[1][5] = 12;
edge[2][1] = 7;
edge[3][2] = 9;
edge[3][4] = 2;
edge[4][7] = 3;
edge[5][10] = 1;
edge[5][11] = 4;
edge[6][9] = 9;
edge[7][8] = 4;
edge[8][9] = 10;
edge[8][10] = 7;
edge[9][10] = 6;
edge[10][11] = 1;
dijkstra(src, n, edge, dist, rank, size);
if(rank == MASTER){ printf("The distance is %d", dist[n - 1]); }
MPI_Finalize();
return 0;
}
//called by dijkstras function below
int chooseVertex(int *dist, int n, int *found, int rank, int size) {
int i, tmp, partition, lower, upper, leastPosition;
int least = INFINTY;
//set the number of nodes wach processor will work with
partition = n / size;
lower = rank * partition;
upper = lower + partition;
//used for MPI_Reduce
struct {
int pos;
int val;
} sendBuffr, recvBuffr;
//calculate least position
for (i = lower; i < upper; i++) {
tmp = dist[i];
if ((!found[i]) && (tmp < least)) {
least = tmp;
leastPosition = i;
}
}
//if all nodes checked are INFINITY, go with last node checked
if (least == INFINTY) leastPosition = i;
//set the send buffer for MPI_Reduce
sendBuffr.val = least;
sendBuffr.pos = leastPosition;
//Rank 0 processor has correct least position and value
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_DOUBLE_INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
if (rank == MASTER) leastPosition = recvBuffr.pos;
//Update all processors to have correct position
MPI_Bcast(&leastPosition, 1, MPI_INT, MASTER, MPI_COMM_WORLD);
//Print the contents of leastPosition on rank 0 for debugging
if(rank == MASTER) printf("LeastPosition for rank %d is: %d\n", rank, leastPosition);
fflush(stdout);
return leastPosition;
}
void dijkstra(int SOURCE, int n, int **edge, int *dist, int rank, int size)
{
int i, j, count, partition, lower, upper, *found, *sendBuffer;
j = INFINTY;
sendBuffer = (int *)malloc(n * sizeof(int));
found = (int *)calloc(n, sizeof(int));
partition = n / size;
lower = rank * partition;
upper = lower + partition;
//set the distance array
for (i = 0; i < n; i++) {
found[i] = 0;
dist[i] = edge[SOURCE][i];
sendBuffer[i] = dist[i];
}
found[SOURCE] = 1;
count = 1;
//Dijkstra loop
while (count < n) {
printf("before ChooseVertex: rank %d reporting\n", rank);
fflush(stdout);
j = chooseVertex(dist, n, found, rank, size);
printf("after ChooseVertex: rank %d reporting\n", rank);
fflush(stdout);
count++;
found[j] = 1;
for (i = lower; i < upper; i++) {
if (!found[i])
{
dist[i] = min(dist[i], dist[j] + edge[j][i]);
sendBuffer[i] = dist[i];
}
}
MPI_Reduce(sendBuffer, dist, n, MPI_INT, MPI_MIN, MASTER, MPI_COMM_WORLD);
MPI_Bcast(dist, n, MPI_INT, MASTER, MPI_COMM_WORLD);
}
}
示例错误消息:
before ChooseVertex: rank 1 reporting
before ChooseVertex: rank 0 reporting
LeastPosition for rank 0 is: 3
after ChooseVertex: rank 1 reporting
after ChooseVertex: rank 0 reporting
before ChooseVertex: rank 1 reporting
before ChooseVertex: rank 0 reporting
after ChooseVertex: rank 1 reporting
LeastPosition for rank 0 is: 4
after ChooseVertex: rank 0 reporting
before ChooseVertex: rank 0 reporting
before ChooseVertex: rank 1 reporting
LeastPosition for rank 0 is: 7
after ChooseVertex: rank 1 reporting
job aborted:
[ranks] message
[0] process exited without calling finalize
[1] terminated
---- error analysis -----
[0] on My-ComputerName
Assignmet3PP ended prematurely and may have crashed. exit code 3
---- error analysis -----
你的减少命令是:
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_DOUBLE_INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
通过使用 MPI_DOUBLE_INT
,您表示您正在发送一个包含两个变量的结构:一个 double
后跟一个 int
。然而,这不是您的结构:您只有 2 个整数。因此你应该使用 MPI_2INT
。这些类型源自 this source。或者,您可以使用向量创建自己的类型。
示例修复是:
MPI_Reduce(&sendBuffr, &recvBuffr, 1, MPI_2INT, MPI_MINLOC, MASTER, MPI_COMM_WORLD);
此外,可以通过 MPI_Allreduce()
.