MPI 矩阵结构散点数组
MPI Scatter Array of Matrices Struct
我有一个矩阵类型的数组,程序从用户的输入中获取。我需要使用 OpenMPI 将矩阵分发到进程。我尝试使用 Scatter,但我对程序运行所需的参数(以及如何接收每个本地数组中的数据)感到很困惑。这是我当前的代码:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#define nil NULL
#define NMAX 100
#define DATAMAX 1000
#define DATAMIN -1000
typedef struct Matrix
{
int mat[NMAX][NMAX]; // Matrix cells
int row_eff; // Matrix effective row
int col_eff; // Matrix effective column
} Matrix;
void init_matrix(Matrix *m, int nrow, int ncol)
{
m->row_eff = nrow;
m->col_eff = ncol;
for (int i = 0; i < m->row_eff; i++)
{
for (int j = 0; j < m->col_eff; j++)
{
m->mat[i][j] = 0;
}
}
}
Matrix input_matrix(int nrow, int ncol)
{
Matrix input;
init_matrix(&input, nrow, ncol);
for (int i = 0; i < nrow; i++)
{
for (int j = 0; j < ncol; j++)
{
scanf("%d", &input.mat[i][j]);
}
}
return input;
}
void print_matrix(Matrix *m)
{
for (int i = 0; i < m->row_eff; i++)
{
for (int j = 0; j < m->col_eff; j++)
{
printf("%d ", m->mat[i][j]);
}
printf("\n");
}
}
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);
// Get number of processes
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
// Get process rank
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// Get matrices from user inputs
int kernel_row, kernel_col, num_targets, target_row, target_col;
// reads kernel's row and column and initalize kernel matrix from input
scanf("%d %d", &kernel_row, &kernel_col);
Matrix kernel = input_matrix(kernel_row, kernel_col);
// reads number of target matrices and their dimensions.
// initialize array of matrices and array of data ranges (int)
scanf("%d %d %d", &num_targets, &target_row, &target_col);
Matrix *arr_mat = (Matrix *)malloc(num_targets * sizeof(Matrix));
for (int i = 0; i < num_targets; i++)
{
arr_mat[i] = input_matrix(target_row, target_col);
}
// Get number of matrices per process
int num_mat_per_proc = ceil(num_targets / size);
// Init local matrices and scatter the global matrices
Matrix *local_mats = (Matrix *)malloc(num_mat_per_proc * sizeof(Matrix));
MPI_Scatter(arr_mat, sizeof(local_mats), MPI_BYTE, &local_mats, sizeof(local_mats), MPI_BYTE, 0, MPI_COMM_WORLD);
if (rank == 0)
{
// Range arrays -> array of convolution results
int arr_range[num_targets];
printf("From master \n");
for (int i = 0; i < 3; i++)
{
print_matrix(&arr_mat[i]);
}
}
else
{
printf("From slave %d = \n", rank);
print_matrix(&local_mats[0]);
}
MPI_Finalize();
}
所以我对当前的实现有一些疑问:
- 我可以像那样接受输入还是应该让它只发生在等级 0 中?
- 我如何实现分散部分并可能使用 Scatterv,因为数组的数量可能无法被进程的数量整除?
Can I accept the input just like that or should I make it so that it
only happens in rank 0?
不,您应该使用命令行参数或从文件中读取作为最佳实践。
如果你想使用scanf
,那么在rank 0
里面使用它。 STDIN
被转发到 rank 0
(据我所知,这在标准中不受支持,但我想这应该有效并且将取决于实现)
How do I implement the scatter part and possibly using Scatterv
because the amount of arrays might not be divisible to the number of
processes?
如果你用不同的尺寸发送不同的进程,那么你应该使用scatterv
。
Scatter
语法:
MPI_Scatter(
void* send_data,
int send_count,
MPI_Datatype send_datatype,
void* recv_data,
int recv_count,
MPI_Datatype recv_datatype,
int root,
MPI_Comm communicator)
您的使用情况:
MPI_Scatter(arr_mat, sizeof(local_mats), MPI_BYTE, &local_mats, sizeof(local_mats), MPI_BYTE, 0, MPI_COMM_WORLD);
潜在错误点:
In send_count
:要发送的大小(如 Gilles Gouaillardet 在评论中指出)。 Sizeof(local_mats)
应该是 num_mat_per_proc * sizeof(Matrix)
.
recv_count
:我认为收到的尺寸不应该是sizeof(local_mats)
。
由于您对 SEND 和 RECV 使用相同的类型 (MPI_BYTES),您的 send_count == recv_count
我有一个矩阵类型的数组,程序从用户的输入中获取。我需要使用 OpenMPI 将矩阵分发到进程。我尝试使用 Scatter,但我对程序运行所需的参数(以及如何接收每个本地数组中的数据)感到很困惑。这是我当前的代码:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#define nil NULL
#define NMAX 100
#define DATAMAX 1000
#define DATAMIN -1000
typedef struct Matrix
{
int mat[NMAX][NMAX]; // Matrix cells
int row_eff; // Matrix effective row
int col_eff; // Matrix effective column
} Matrix;
void init_matrix(Matrix *m, int nrow, int ncol)
{
m->row_eff = nrow;
m->col_eff = ncol;
for (int i = 0; i < m->row_eff; i++)
{
for (int j = 0; j < m->col_eff; j++)
{
m->mat[i][j] = 0;
}
}
}
Matrix input_matrix(int nrow, int ncol)
{
Matrix input;
init_matrix(&input, nrow, ncol);
for (int i = 0; i < nrow; i++)
{
for (int j = 0; j < ncol; j++)
{
scanf("%d", &input.mat[i][j]);
}
}
return input;
}
void print_matrix(Matrix *m)
{
for (int i = 0; i < m->row_eff; i++)
{
for (int j = 0; j < m->col_eff; j++)
{
printf("%d ", m->mat[i][j]);
}
printf("\n");
}
}
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);
// Get number of processes
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
// Get process rank
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// Get matrices from user inputs
int kernel_row, kernel_col, num_targets, target_row, target_col;
// reads kernel's row and column and initalize kernel matrix from input
scanf("%d %d", &kernel_row, &kernel_col);
Matrix kernel = input_matrix(kernel_row, kernel_col);
// reads number of target matrices and their dimensions.
// initialize array of matrices and array of data ranges (int)
scanf("%d %d %d", &num_targets, &target_row, &target_col);
Matrix *arr_mat = (Matrix *)malloc(num_targets * sizeof(Matrix));
for (int i = 0; i < num_targets; i++)
{
arr_mat[i] = input_matrix(target_row, target_col);
}
// Get number of matrices per process
int num_mat_per_proc = ceil(num_targets / size);
// Init local matrices and scatter the global matrices
Matrix *local_mats = (Matrix *)malloc(num_mat_per_proc * sizeof(Matrix));
MPI_Scatter(arr_mat, sizeof(local_mats), MPI_BYTE, &local_mats, sizeof(local_mats), MPI_BYTE, 0, MPI_COMM_WORLD);
if (rank == 0)
{
// Range arrays -> array of convolution results
int arr_range[num_targets];
printf("From master \n");
for (int i = 0; i < 3; i++)
{
print_matrix(&arr_mat[i]);
}
}
else
{
printf("From slave %d = \n", rank);
print_matrix(&local_mats[0]);
}
MPI_Finalize();
}
所以我对当前的实现有一些疑问:
- 我可以像那样接受输入还是应该让它只发生在等级 0 中?
- 我如何实现分散部分并可能使用 Scatterv,因为数组的数量可能无法被进程的数量整除?
Can I accept the input just like that or should I make it so that it only happens in rank 0?
不,您应该使用命令行参数或从文件中读取作为最佳实践。
如果你想使用scanf
,那么在rank 0
里面使用它。 STDIN
被转发到 rank 0
(据我所知,这在标准中不受支持,但我想这应该有效并且将取决于实现)
How do I implement the scatter part and possibly using Scatterv because the amount of arrays might not be divisible to the number of processes?
如果你用不同的尺寸发送不同的进程,那么你应该使用scatterv
。
Scatter
语法:
MPI_Scatter(
void* send_data,
int send_count,
MPI_Datatype send_datatype,
void* recv_data,
int recv_count,
MPI_Datatype recv_datatype,
int root,
MPI_Comm communicator)
您的使用情况:
MPI_Scatter(arr_mat, sizeof(local_mats), MPI_BYTE, &local_mats, sizeof(local_mats), MPI_BYTE, 0, MPI_COMM_WORLD);
潜在错误点:
In send_count
:要发送的大小(如 Gilles Gouaillardet 在评论中指出)。 Sizeof(local_mats)
应该是 num_mat_per_proc * sizeof(Matrix)
.
recv_count
:我认为收到的尺寸不应该是sizeof(local_mats)
。
由于您对 SEND 和 RECV 使用相同的类型 (MPI_BYTES),您的 send_count == recv_count