将 MPI_Scatter 与 MPI_Type_vector 结合使用时列的意外分布
Unexpected distribution of columns when using using MPI_Scatter with MPI_Type_vector
我正在尝试使用 MPI 在 N 个进程中划分二维矩阵的列。对于模板,我使用了 MPI_Scatter - sending columns of 2D array.
上的示例
我的代码:
//HEADERS
char** board_initialize(int n, int m)
{
int k, l;
char* bd = (char*)malloc(sizeof(char) * n * m);
char** b = (char**)malloc(sizeof(char*) * n);
for (k = 0; k < n; k++)
b[k] = &bd[k * m];
for (k = 0; k < n; k++)
for (l = 0; l < m; l++)
b[k][l] = rand() < 0.25 * RAND_MAX;
return b;
}
void board_print(char** b, int n, int m)
{
int k, l;
// system("@cls||clear");
for (k = 0; k < n; k++)
{
for (l = 0; l < m; l++)
printf("%d", b[k][l]);
printf("\n");
}
printf("\n");
}
int main(int argc, char* argv[])
{
int N = 10;
int i, j;
char * boardptr = NULL; // ptr to board
char ** board; // board, 2D matrix, contignous memory allocation!
int procs, myid;
int mycols;
char ** myboard; // part of board that belongs to a process
MPI_Init(&argc, &argv); // initiailzation
MPI_Comm_rank(MPI_COMM_WORLD, &myid); // process ID
MPI_Comm_size(MPI_COMM_WORLD, &procs); // number of processes
// initialize global board
if (myid == 0)
{
srand(1573949136);
board = board_initialize(N, N);
boardptr = *board;
board_print(board, N, N);
}
// divide work
mycols = N / procs;
// initialize my structures
myboard = board_initialize(N,mycols);
MPI_Datatype column_not_resized, column_resized;
MPI_Type_vector(N, 1, N, MPI_CHAR, &column_not_resized);
MPI_Type_commit(&column_not_resized);
MPI_Type_create_resized(column_not_resized, 0, 1*sizeof(char), &column_resized);
MPI_Type_commit(&column_resized);
// scatter initial matrix
MPI_Scatter(boardptr, mycols, column_resized, *myboard, mycols, column_resized, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
board_print(myboard, N, mycols);
MPI_Finalize(); // finalize MPI
return 0;
}
整个板看起来像:
0000010010
0100000000
0000101100
0101000010
1000000100
0000010010
0000001110
0110000100
0000100000
0100010010
如果我使用 2 个流程,我希望每个流程都能得到一半(第一个流程第 1-5 列和第二个流程第 6-10 列)。但是,如果我打印 myboard 这两个过程,我会得到一些奇怪的结果:
proc0: proc1:
0 0 0 0 0 1 0 0 1 0
0 0 1 0 0 0 0 0 1 1
0 1 0 0 0 0 0 0 0 0
0 1 0 0 0 0 1 0 1 0
0 0 0 0 1 0 1 1 0 0
0 1 1 0 0 0 0 1 0 0
0 1 0 1 0 0 0 0 1 0
0 0 0 0 1 0 0 1 0 0
1 0 0 0 0 0 0 1 0 0
0 1 0 0 0 0 1 0 0 0
这可能是一些愚蠢的错误,但我似乎找不到它。任何帮助将非常感激。 <3
注意:proc1 的输出可能只是一些垃圾,因为我在每个 运行.
上得到不同的输出
您忘记区分发送和接收类型,即 MPI_Type_vector
的参数将取决于适当的类型。您需要执行以下操作:
MPI_Datatype acol, acoltype, bcol, bcoltype;
if (myid == 0) {
MPI_Type_vector(N, 1, N, MPI_CHAR, &acol);
MPI_Type_commit(&acol);
MPI_Type_create_resized(acol, 0, 1*sizeof(char), &acoltype);
}
MPI_Type_vector(N, 1, mycols, MPI_CHAR, &bcol);
MPI_Type_commit(&bcol);
MPI_Type_create_resized(bcol, 0, 1*sizeof(char), &bcoltype);
MPI_Type_commit(&bcoltype);
MPI_Scatter (boardptr, mycols, acoltype, *myboard, mycols, bcoltype, 0, MPI_COMM_WORLD);
从发件人的角度来看,您要创建 N
个块(即 ,N 行),大小为 1,步幅为 N
(即 N列)。因此:
MPI_Type_vector(N, 1, N, MPI_CHAR, &acol);
从接收方的角度来看,您想要创建 N
个块(即 ,N 行),大小为 1,步幅为 mycols
(即 mycols 列)。因此:
MPI_Type_vector(N, 1, mycols, MPI_CHAR, &bcol);
旁注您确实需要 MPI_Scatter 之后的 MPI_Barrier 因为后者已经是一个阻塞调用。
最终代码如下所示:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
char** board_initialize(int n, int m)
{
int k, l;
char* bd = (char*)malloc(sizeof(char) * n * m);
char** b = (char**)malloc(sizeof(char*) * n);
for (k = 0; k < n; k++)
b[k] = &bd[k * m];
for (k = 0; k < n; k++)
for (l = 0; l < m; l++)
b[k][l] = rand() < 0.25 * RAND_MAX;
return b;
}
void board_print(char** b, int n, int m)
{
int k, l;
// system("@cls||clear");
for (k = 0; k < n; k++)
{
for (l = 0; l < m; l++)
printf("%d", b[k][l]);
printf("\n");
}
printf("\n");
}
int main(int argc, char* argv[])
{
int N = 10;
int i, j;
char * boardptr = NULL; // ptr to board
char ** board; // board, 2D matrix, contignous memory allocation!
int procs, myid;
int mycols;
char ** myboard; // part of board that belongs to a process
MPI_Init(&argc, &argv); // initiailzation
MPI_Comm_rank(MPI_COMM_WORLD, &myid); // process ID
MPI_Comm_size(MPI_COMM_WORLD, &procs); // number of processes
// initialize global board
if (myid == 0)
{
srand(1573949136);
board = board_initialize(N, N);
boardptr = *board;
board_print(board, N, N);
}
// divide work
mycols = N / procs;
// initialize my structures
myboard = board_initialize(N,mycols);
MPI_Datatype acol, acoltype, bcol, bcoltype;
if (myid == 0) {
MPI_Type_vector(N,
1,
N,
MPI_CHAR,
&acol);
MPI_Type_commit(&acol);
MPI_Type_create_resized(acol, 0, 1*sizeof(char), &acoltype);
}
MPI_Type_vector(N,
1,
mycols,
MPI_CHAR,
&bcol);
MPI_Type_commit(&bcol);
MPI_Type_create_resized(bcol, 0, 1*sizeof(char), &bcoltype);
MPI_Type_commit(&bcoltype);
MPI_Scatter (boardptr, mycols, acoltype, *myboard, mycols, bcoltype, 0, MPI_COMM_WORLD);
board_print(myboard, N, mycols);
MPI_Finalize(); // finalize MPI
return 0;
}
或者,在 IMO 中更容易的是按行而不是按列划分,代码如下所示:
int myrows = N / procs;
myboard = board_initialize(myrows, N);
MPI_Scatter (boardptr, myrows, MPI_CHAR, *myboard, myrows, MPI_CHAR, 0, MPI_COMM_WORLD);
board_print(myboard, myrows, N);
我正在尝试使用 MPI 在 N 个进程中划分二维矩阵的列。对于模板,我使用了 MPI_Scatter - sending columns of 2D array.
上的示例我的代码:
//HEADERS
char** board_initialize(int n, int m)
{
int k, l;
char* bd = (char*)malloc(sizeof(char) * n * m);
char** b = (char**)malloc(sizeof(char*) * n);
for (k = 0; k < n; k++)
b[k] = &bd[k * m];
for (k = 0; k < n; k++)
for (l = 0; l < m; l++)
b[k][l] = rand() < 0.25 * RAND_MAX;
return b;
}
void board_print(char** b, int n, int m)
{
int k, l;
// system("@cls||clear");
for (k = 0; k < n; k++)
{
for (l = 0; l < m; l++)
printf("%d", b[k][l]);
printf("\n");
}
printf("\n");
}
int main(int argc, char* argv[])
{
int N = 10;
int i, j;
char * boardptr = NULL; // ptr to board
char ** board; // board, 2D matrix, contignous memory allocation!
int procs, myid;
int mycols;
char ** myboard; // part of board that belongs to a process
MPI_Init(&argc, &argv); // initiailzation
MPI_Comm_rank(MPI_COMM_WORLD, &myid); // process ID
MPI_Comm_size(MPI_COMM_WORLD, &procs); // number of processes
// initialize global board
if (myid == 0)
{
srand(1573949136);
board = board_initialize(N, N);
boardptr = *board;
board_print(board, N, N);
}
// divide work
mycols = N / procs;
// initialize my structures
myboard = board_initialize(N,mycols);
MPI_Datatype column_not_resized, column_resized;
MPI_Type_vector(N, 1, N, MPI_CHAR, &column_not_resized);
MPI_Type_commit(&column_not_resized);
MPI_Type_create_resized(column_not_resized, 0, 1*sizeof(char), &column_resized);
MPI_Type_commit(&column_resized);
// scatter initial matrix
MPI_Scatter(boardptr, mycols, column_resized, *myboard, mycols, column_resized, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
board_print(myboard, N, mycols);
MPI_Finalize(); // finalize MPI
return 0;
}
整个板看起来像:
0000010010
0100000000
0000101100
0101000010
1000000100
0000010010
0000001110
0110000100
0000100000
0100010010
如果我使用 2 个流程,我希望每个流程都能得到一半(第一个流程第 1-5 列和第二个流程第 6-10 列)。但是,如果我打印 myboard 这两个过程,我会得到一些奇怪的结果:
proc0: proc1:
0 0 0 0 0 1 0 0 1 0
0 0 1 0 0 0 0 0 1 1
0 1 0 0 0 0 0 0 0 0
0 1 0 0 0 0 1 0 1 0
0 0 0 0 1 0 1 1 0 0
0 1 1 0 0 0 0 1 0 0
0 1 0 1 0 0 0 0 1 0
0 0 0 0 1 0 0 1 0 0
1 0 0 0 0 0 0 1 0 0
0 1 0 0 0 0 1 0 0 0
这可能是一些愚蠢的错误,但我似乎找不到它。任何帮助将非常感激。 <3
注意:proc1 的输出可能只是一些垃圾,因为我在每个 运行.
上得到不同的输出您忘记区分发送和接收类型,即 MPI_Type_vector
的参数将取决于适当的类型。您需要执行以下操作:
MPI_Datatype acol, acoltype, bcol, bcoltype;
if (myid == 0) {
MPI_Type_vector(N, 1, N, MPI_CHAR, &acol);
MPI_Type_commit(&acol);
MPI_Type_create_resized(acol, 0, 1*sizeof(char), &acoltype);
}
MPI_Type_vector(N, 1, mycols, MPI_CHAR, &bcol);
MPI_Type_commit(&bcol);
MPI_Type_create_resized(bcol, 0, 1*sizeof(char), &bcoltype);
MPI_Type_commit(&bcoltype);
MPI_Scatter (boardptr, mycols, acoltype, *myboard, mycols, bcoltype, 0, MPI_COMM_WORLD);
从发件人的角度来看,您要创建 N
个块(即 ,N 行),大小为 1,步幅为 N
(即 N列)。因此:
MPI_Type_vector(N, 1, N, MPI_CHAR, &acol);
从接收方的角度来看,您想要创建 N
个块(即 ,N 行),大小为 1,步幅为 mycols
(即 mycols 列)。因此:
MPI_Type_vector(N, 1, mycols, MPI_CHAR, &bcol);
旁注您确实需要 MPI_Scatter 之后的 MPI_Barrier 因为后者已经是一个阻塞调用。
最终代码如下所示:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
char** board_initialize(int n, int m)
{
int k, l;
char* bd = (char*)malloc(sizeof(char) * n * m);
char** b = (char**)malloc(sizeof(char*) * n);
for (k = 0; k < n; k++)
b[k] = &bd[k * m];
for (k = 0; k < n; k++)
for (l = 0; l < m; l++)
b[k][l] = rand() < 0.25 * RAND_MAX;
return b;
}
void board_print(char** b, int n, int m)
{
int k, l;
// system("@cls||clear");
for (k = 0; k < n; k++)
{
for (l = 0; l < m; l++)
printf("%d", b[k][l]);
printf("\n");
}
printf("\n");
}
int main(int argc, char* argv[])
{
int N = 10;
int i, j;
char * boardptr = NULL; // ptr to board
char ** board; // board, 2D matrix, contignous memory allocation!
int procs, myid;
int mycols;
char ** myboard; // part of board that belongs to a process
MPI_Init(&argc, &argv); // initiailzation
MPI_Comm_rank(MPI_COMM_WORLD, &myid); // process ID
MPI_Comm_size(MPI_COMM_WORLD, &procs); // number of processes
// initialize global board
if (myid == 0)
{
srand(1573949136);
board = board_initialize(N, N);
boardptr = *board;
board_print(board, N, N);
}
// divide work
mycols = N / procs;
// initialize my structures
myboard = board_initialize(N,mycols);
MPI_Datatype acol, acoltype, bcol, bcoltype;
if (myid == 0) {
MPI_Type_vector(N,
1,
N,
MPI_CHAR,
&acol);
MPI_Type_commit(&acol);
MPI_Type_create_resized(acol, 0, 1*sizeof(char), &acoltype);
}
MPI_Type_vector(N,
1,
mycols,
MPI_CHAR,
&bcol);
MPI_Type_commit(&bcol);
MPI_Type_create_resized(bcol, 0, 1*sizeof(char), &bcoltype);
MPI_Type_commit(&bcoltype);
MPI_Scatter (boardptr, mycols, acoltype, *myboard, mycols, bcoltype, 0, MPI_COMM_WORLD);
board_print(myboard, N, mycols);
MPI_Finalize(); // finalize MPI
return 0;
}
或者,在 IMO 中更容易的是按行而不是按列划分,代码如下所示:
int myrows = N / procs;
myboard = board_initialize(myrows, N);
MPI_Scatter (boardptr, myrows, MPI_CHAR, *myboard, myrows, MPI_CHAR, 0, MPI_COMM_WORLD);
board_print(myboard, myrows, N);