尝试分发数组时 MPI_Scatterv 发生错误(消息被截断)
An error occurred in MPI_Scatterv (message truncated) while trying to distribute an array
1。目标
假设我们想要在 4
个处理器中分配一个 square mxn=4x4=16
数组,这些处理器位于一个名为 mesh
的拓扑 mesh/grid 中]:
P0-P1
| |
P2-P3
其中-
代表mesh_r
(mesh_rows)通讯器,|
代表mesh_c
(mesh_columns)通讯器,通过[=构建22=] 程序。假设 m
是处理器数量 world_size
的倍数。如果
- 变量
A
是一个作为一维数组实现的“矩阵”,其值为{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}
然后
我预计:
- P0 从
A
接收值 {1,2,5,6}
- P1 从
A
接收值 {3,4,7,8}
- P2 从
A
接收值 {9,10,13,14}
- P3 从
A
接收值 {11,12,15,16}
这里清楚地显示:
2。我的代码
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <mpi.h>
bool is_divisible(int, int);
void build_mesh(MPI_Comm*, MPI_Comm*, MPI_Comm*, int, int, int, int, int*);
int *fill_matrix(int*, int, int);
int *fill_array(int*, int);
void print_matrix(int*, int, int, int, int);
int main(int argc, char *argv[])
{
int process_rank, world_size;
int mesh_rows, mesh_columns;
int mesh_dimension = 2;
int *process_coordinates;
MPI_Comm mesh, mesh_r, mesh_c;
int process_rank_mesh;
int *A, *A_loc;
int m, n; // input A square shape
int mloc, nloc; // local A square shape
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &process_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
if (process_rank == 0) {
m = n = 8; // multiple of world_size = 4
}
MPI_Bcast(&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
A = fill_matrix(A, m, n);
if (process_rank == 0)
mesh_rows = 2;
MPI_Bcast(&mesh_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (is_divisible(world_size, mesh_rows))
mesh_columns = world_size / mesh_rows;
else {
mesh_rows = 1;
mesh_columns = world_size / mesh_rows;
}
MPI_Bcast(&mesh_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&mesh_columns, 1, MPI_INT, 0, MPI_COMM_WORLD);
process_coordinates = (int*) calloc(mesh_dimension, sizeof(int));
build_mesh(&mesh, &mesh_r, &mesh_c, process_rank, world_size, mesh_rows, mesh_columns, process_coordinates);
MPI_Comm_rank(mesh, &process_rank_mesh);
mloc = m / mesh_rows;
nloc = m / mesh_columns;
// Everything is correct up to now - The problem must be here:
// Distribution
A_loc = (int*) calloc(mloc * nloc, sizeof(int));
A_loc = (int*) calloc(mloc * nloc, sizeof(int));
MPI_Datatype square_block;
int stride = n;
int count = mloc;
int block_length = nloc;
MPI_Type_vector(count, block_length, stride, MPI_INT, &square_block);
MPI_Datatype square_block_resized;
MPI_Type_create_resized(square_block, 0, sizeof(int), &square_block_resized);
MPI_Type_commit(&square_block_resized);
int *send_counts = (int*) calloc(world_size, sizeof(int));
int *displs = (int*) calloc(world_size, sizeof(int));
for (int i = 0; i < mesh_rows; i++) {
for (int j = 0; j < mesh_columns; j++) {
send_counts[i * mesh_columns + j] = 1;
displs[i * mesh_columns + j] = i * n * block_length + j * block_length;
}
}
MPI_Scatterv(A, send_counts, displs, square_block_resized, A_loc, 4, MPI_INT, 0, MPI_COMM_WORLD);
// dbg distribution
print_matrix(A, m, n, process_rank, -1); // original matrix
for (int i = 0; i < world_size; i++) {
MPI_Barrier(MPI_COMM_WORLD);
print_matrix(A_loc, mloc, nloc, process_rank, i);
}
MPI_Finalize();
return 0;
}
// 100% working - no need to check
bool is_divisible(int dividend, int divisor)
{
return dividend % divisor == 0;
}
// 100% working - no need to check
void build_mesh(MPI_Comm *mesh, MPI_Comm *mesh_r, MPI_Comm *mesh_c, int process_rank, int world_size,
int mesh_rows, int mesh_columns, int *process_coordinates)
{
int mesh_dimension = 2;
int *mesh_n_dimension;
int mesh_reorder = 0;
int *mesh_period;
int *remain_dims = (int*) calloc(mesh_dimension, sizeof(int));
mesh_n_dimension = (int*) calloc(mesh_dimension, sizeof(int));
mesh_n_dimension[0] = mesh_rows;
mesh_n_dimension[1] = mesh_columns;
mesh_period = (int*) calloc(mesh_dimension, sizeof(int));
mesh_period[0] = mesh_period[1] = 0;
MPI_Cart_create(MPI_COMM_WORLD, mesh_dimension, mesh_n_dimension, mesh_period, mesh_reorder, mesh);
MPI_Cart_coords(*mesh, process_rank, mesh_dimension, process_coordinates);
remain_dims[0] = 0;
remain_dims[1] = 1;
MPI_Cart_sub(*mesh, remain_dims, mesh_r);
remain_dims[0] = 1;
remain_dims[1] = 0;
MPI_Cart_sub(*mesh, remain_dims, mesh_c);
}
// 100% working - no need to check
int *fill_matrix(int *A, int m, int n)
{
int k = 0;
A = (int*) calloc(m * n, sizeof(int));
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
A[i * n + j] = ++k;
}
}
return A;
}
// 100% working - no need to check
void print_matrix(int *A, int m, int n, int process_rank, int id)
{
if (id == -1) {
if (process_rank == 0) {
printf("Original matrix:\n");
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
printf("%d\t", A[i * n + j]);
}
printf("\n");
}
printf("\n");
}
}
if (process_rank == id) {
printf("P%d local matrix:\n", id);
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
printf("%d\t", A[i * n + j]);
}
printf("\n");
}
printf("\n");
}
}
3。结果
如果 m = 4
:
Original matrix:
1 2 3 4
5 6 7 8
9 10 11 12
13 14 15 16
P0 local matrix:
1 2
5 6
P1 local matrix:
3 4
7 8
P2 local matrix:
9 10
13 14
P3 local matrix:
11 12
15 16
但不幸的是,如果 m != 4
但它是 world_size
的倍数,如您在此处所见(例如 m = 8
、12
...):
[Air:01914] *** An error occurred in MPI_Scatterv
[Air:01914] *** reported by process [138870785,2]
[Air:01914] *** on communicator MPI_COMM_WORLD
[Air:01914] *** MPI_ERR_TRUNCATE: message truncated
[Air:01914] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[Air:01914] *** and potentially your MPI job)
[Air:01911] PMIX ERROR: UNREACHABLE in file server/pmix_server.c at line 2198
[Air:01911] PMIX ERROR: UNREACHABLE in file server/pmix_server.c at line 2198
[Air:01911] 2 more processes have sent help message help-mpi-errors.txt / mpi_errors_are_fatal
[Air:01911] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
- 你能帮帮我吗?
displs[]
值正确...
我简单地解决了替换
MPI_Scatterv(A, send_counts, displs, square_block_resized, A_loc, 4, MPI_INT, 0, MPI_COMM_WORLD);
与
MPI_Scatterv(A, send_counts, displs, square_block_resized, A_loc, mloc * nloc, MPI_INT, 0, MPI_COMM_WORLD);
即将硬编码的 4
替换为 mloc * nloc
1。目标
假设我们想要在 4
个处理器中分配一个 square mxn=4x4=16
数组,这些处理器位于一个名为 mesh
的拓扑 mesh/grid 中]:
P0-P1
| |
P2-P3
其中-
代表mesh_r
(mesh_rows)通讯器,|
代表mesh_c
(mesh_columns)通讯器,通过[=构建22=] 程序。假设 m
是处理器数量 world_size
的倍数。如果
- 变量
A
是一个作为一维数组实现的“矩阵”,其值为{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}
然后
我预计:
- P0 从
A
接收值{1,2,5,6}
- P1 从
A
接收值{3,4,7,8}
- P2 从
A
接收值{9,10,13,14}
- P3 从
A
接收值{11,12,15,16}
这里清楚地显示:
2。我的代码
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <mpi.h>
bool is_divisible(int, int);
void build_mesh(MPI_Comm*, MPI_Comm*, MPI_Comm*, int, int, int, int, int*);
int *fill_matrix(int*, int, int);
int *fill_array(int*, int);
void print_matrix(int*, int, int, int, int);
int main(int argc, char *argv[])
{
int process_rank, world_size;
int mesh_rows, mesh_columns;
int mesh_dimension = 2;
int *process_coordinates;
MPI_Comm mesh, mesh_r, mesh_c;
int process_rank_mesh;
int *A, *A_loc;
int m, n; // input A square shape
int mloc, nloc; // local A square shape
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &process_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
if (process_rank == 0) {
m = n = 8; // multiple of world_size = 4
}
MPI_Bcast(&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
A = fill_matrix(A, m, n);
if (process_rank == 0)
mesh_rows = 2;
MPI_Bcast(&mesh_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (is_divisible(world_size, mesh_rows))
mesh_columns = world_size / mesh_rows;
else {
mesh_rows = 1;
mesh_columns = world_size / mesh_rows;
}
MPI_Bcast(&mesh_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&mesh_columns, 1, MPI_INT, 0, MPI_COMM_WORLD);
process_coordinates = (int*) calloc(mesh_dimension, sizeof(int));
build_mesh(&mesh, &mesh_r, &mesh_c, process_rank, world_size, mesh_rows, mesh_columns, process_coordinates);
MPI_Comm_rank(mesh, &process_rank_mesh);
mloc = m / mesh_rows;
nloc = m / mesh_columns;
// Everything is correct up to now - The problem must be here:
// Distribution
A_loc = (int*) calloc(mloc * nloc, sizeof(int));
A_loc = (int*) calloc(mloc * nloc, sizeof(int));
MPI_Datatype square_block;
int stride = n;
int count = mloc;
int block_length = nloc;
MPI_Type_vector(count, block_length, stride, MPI_INT, &square_block);
MPI_Datatype square_block_resized;
MPI_Type_create_resized(square_block, 0, sizeof(int), &square_block_resized);
MPI_Type_commit(&square_block_resized);
int *send_counts = (int*) calloc(world_size, sizeof(int));
int *displs = (int*) calloc(world_size, sizeof(int));
for (int i = 0; i < mesh_rows; i++) {
for (int j = 0; j < mesh_columns; j++) {
send_counts[i * mesh_columns + j] = 1;
displs[i * mesh_columns + j] = i * n * block_length + j * block_length;
}
}
MPI_Scatterv(A, send_counts, displs, square_block_resized, A_loc, 4, MPI_INT, 0, MPI_COMM_WORLD);
// dbg distribution
print_matrix(A, m, n, process_rank, -1); // original matrix
for (int i = 0; i < world_size; i++) {
MPI_Barrier(MPI_COMM_WORLD);
print_matrix(A_loc, mloc, nloc, process_rank, i);
}
MPI_Finalize();
return 0;
}
// 100% working - no need to check
bool is_divisible(int dividend, int divisor)
{
return dividend % divisor == 0;
}
// 100% working - no need to check
void build_mesh(MPI_Comm *mesh, MPI_Comm *mesh_r, MPI_Comm *mesh_c, int process_rank, int world_size,
int mesh_rows, int mesh_columns, int *process_coordinates)
{
int mesh_dimension = 2;
int *mesh_n_dimension;
int mesh_reorder = 0;
int *mesh_period;
int *remain_dims = (int*) calloc(mesh_dimension, sizeof(int));
mesh_n_dimension = (int*) calloc(mesh_dimension, sizeof(int));
mesh_n_dimension[0] = mesh_rows;
mesh_n_dimension[1] = mesh_columns;
mesh_period = (int*) calloc(mesh_dimension, sizeof(int));
mesh_period[0] = mesh_period[1] = 0;
MPI_Cart_create(MPI_COMM_WORLD, mesh_dimension, mesh_n_dimension, mesh_period, mesh_reorder, mesh);
MPI_Cart_coords(*mesh, process_rank, mesh_dimension, process_coordinates);
remain_dims[0] = 0;
remain_dims[1] = 1;
MPI_Cart_sub(*mesh, remain_dims, mesh_r);
remain_dims[0] = 1;
remain_dims[1] = 0;
MPI_Cart_sub(*mesh, remain_dims, mesh_c);
}
// 100% working - no need to check
int *fill_matrix(int *A, int m, int n)
{
int k = 0;
A = (int*) calloc(m * n, sizeof(int));
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
A[i * n + j] = ++k;
}
}
return A;
}
// 100% working - no need to check
void print_matrix(int *A, int m, int n, int process_rank, int id)
{
if (id == -1) {
if (process_rank == 0) {
printf("Original matrix:\n");
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
printf("%d\t", A[i * n + j]);
}
printf("\n");
}
printf("\n");
}
}
if (process_rank == id) {
printf("P%d local matrix:\n", id);
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
printf("%d\t", A[i * n + j]);
}
printf("\n");
}
printf("\n");
}
}
3。结果
如果 m = 4
:
Original matrix:
1 2 3 4
5 6 7 8
9 10 11 12
13 14 15 16
P0 local matrix:
1 2
5 6
P1 local matrix:
3 4
7 8
P2 local matrix:
9 10
13 14
P3 local matrix:
11 12
15 16
但不幸的是,如果 m != 4
但它是 world_size
的倍数,如您在此处所见(例如 m = 8
、12
...):
[Air:01914] *** An error occurred in MPI_Scatterv
[Air:01914] *** reported by process [138870785,2]
[Air:01914] *** on communicator MPI_COMM_WORLD
[Air:01914] *** MPI_ERR_TRUNCATE: message truncated
[Air:01914] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[Air:01914] *** and potentially your MPI job)
[Air:01911] PMIX ERROR: UNREACHABLE in file server/pmix_server.c at line 2198
[Air:01911] PMIX ERROR: UNREACHABLE in file server/pmix_server.c at line 2198
[Air:01911] 2 more processes have sent help message help-mpi-errors.txt / mpi_errors_are_fatal
[Air:01911] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
- 你能帮帮我吗?
displs[]
值正确...
我简单地解决了替换
MPI_Scatterv(A, send_counts, displs, square_block_resized, A_loc, 4, MPI_INT, 0, MPI_COMM_WORLD);
与
MPI_Scatterv(A, send_counts, displs, square_block_resized, A_loc, mloc * nloc, MPI_INT, 0, MPI_COMM_WORLD);
即将硬编码的 4
替换为 mloc * nloc