使用 MPI-IO 和笛卡尔拓扑编写分布式数组
Writing distributed arrays using MPI-IO and Cartesian topology
我有一个 MPI 代码,它实现二维域分解来计算 PDE 的数值解。目前我为每个进程写出某些 2D 分布式数组(例如 array_x--> proc000x.bin)。我想将其缩减为单个二进制文件。
array_0、array_1、
array_2、array_3、
假设上面说明了具有 4 个进程 (2x2) 的笛卡尔拓扑。每个二维数组的维度为 (nx + 2, nz + 2)。 +2 表示 "ghost" 层添加到所有方面以进行通信。
我想提取主数组(省略 ghost 层)并将它们写入单个二进制文件,顺序类似于
array_0, array_1, array_2, array_3 --> output.bin
如果可能的话,最好把它写成好像我可以访问全局网格并且正在逐行编写,即
array_0 的第 0 行,array_1 的第 0 行,array_0 的第 1 行 row_1 array_1 ....
下面的尝试尝试了文件 array_test.c
中两种输出格式中的前者
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
/* 2D array allocation */
float **alloc2D(int rows, int cols);
float **alloc2D(int rows, int cols) {
int i, j;
float *data = malloc(rows * cols * sizeof(float));
float **arr2D = malloc(rows * sizeof(float *));
for (i = 0; i < rows; i++) {
arr2D[i] = &(data[i * cols]);
}
/* Initialize to zero */
for (i= 0; i < rows; i++) {
for (j=0; j < cols; j++) {
arr2D[i][j] = 0.0;
}
}
return arr2D;
}
int main(void) {
/* Creates 5x5 array of floats with padding layers and
* attempts to write distributed arrays */
/* Run toy example with 4 processes */
int i, j, row, col;
int nx = 5, ny = 5, npad = 1;
int my_rank, nproc=4;
int dim[2] = {2, 2}; /* 2x2 cartesian grid */
int period[2] = {0, 0};
int coord[2];
int reorder = 1;
float **A = NULL;
MPI_Comm grid_Comm;
/* Initialize MPI */
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* Establish cartesian topology */
MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &grid_Comm);
/* Get cartesian grid indicies of processes */
MPI_Cart_coords(grid_Comm, my_rank, 2, coord);
row = coord[1];
col = coord[0];
/* Add ghost layers */
nx += 2 * npad;
ny += 2 * npad;
A = alloc2D(nx, ny);
/* Create derived datatype for interior grid (output grid) */
MPI_Datatype grid;
int start[2] = {npad, npad};
int arrsize[2] = {nx, ny};
int gridsize[2] = {nx - 2 * npad, ny - 2 * npad};
MPI_Type_create_subarray(2, arrsize, gridsize,
start, MPI_ORDER_C, MPI_FLOAT, &grid);
MPI_Type_commit(&grid);
/* Fill interior grid */
for (i = npad; i < nx-npad; i++) {
for (j = npad; j < ny-npad; j++) {
A[i][j] = my_rank + i;
}
}
/* MPI IO */
MPI_File fh;
MPI_Status status;
char file_name[100];
int N, offset;
sprintf(file_name, "output.bin");
MPI_File_open(grid_Comm, file_name, MPI_MODE_CREATE | MPI_MODE_WRONLY,
MPI_INFO_NULL, &fh);
N = (nx - 2 * npad) * (ny - 2 *npad);
offset = (row * 2 + col) * N * sizeof(float);
MPI_File_set_view(fh, offset, MPI_FLOAT, grid, "native",
MPI_INFO_NULL);
MPI_File_write_all(fh, &A[0][0], N, MPI_FLOAT, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
/* Cleanup */
free(A[0]);
free(A);
MPI_Type_free(&grid);
MPI_Finalize();
return 0;
}
用
编译
mpicc -o array_test array_test.c
与
一起运行
mpiexec -n 4 array_test
代码编译运行时,输出不正确。我假设我在这种情况下误解了派生数据类型和文件写入的使用。我会很感激一些帮助找出我的错误。
您在这里犯的错误是您的文件视图有误。您无需创建表示当前处理器负责的文件份额的类型,而是使用与您要写入的本地数据对应的掩码。
您实际上有两个非常不同的面具需要考虑:
- 本地数据的掩码,不包括光晕层;和
- 全局数据的掩码,因为它应该被整理到文件中。
前者对应此布局:
这里,对于给定的进程,你想要在文件上输出的数据是深蓝色的,不应该写在文件上的光晕层是浅蓝色的。
后面对应这个布局:
在这里,每种颜色对应于来自不同过程的本地数据,分布在二维笛卡尔网格上。
要了解达到此最终结果需要创建什么,您必须向后思考:
- 您对 IO 例程的最终调用应该是
MPI_File_write_all(fh, &A[0][0], 1, interior, MPI_STATUS_IGNORE);
。因此,您必须定义 interior
类型以排除光晕边界。幸运的是,您创建的类型 grid
已经做到了这一点。所以我们会用到它。
- 但是现在,您必须拥有文件视图才能进行此
MPI_Fie_write_all()
调用。所以视图必须如第二张图片中描述的那样。因此,我们将创建一个新的 MPI 类型来表示它。为此,MPI_Type_create_subarray()
是我们所需要的。
这里是这个函数的概要:
int MPI_Type_create_subarray(int ndims,
const int array_of_sizes[],
const int array_of_subsizes[],
const int array_of_starts[],
int order,
MPI_Datatype oldtype,
MPI_Datatype *newtype)
Create a datatype for a subarray of a regular, multidimensional array
INPUT PARAMETERS
ndims - number of array dimensions (positive integer)
array_of_sizes
- number of elements of type oldtype in each
dimension of the full array (array of positive integers)
array_of_subsizes
- number of elements of type oldtype in each dimension of
the subarray (array of positive integers)
array_of_starts
- starting coordinates of the subarray in each dimension
(array of nonnegative integers)
order - array storage order flag (state)
oldtype - array element datatype (handle)
OUTPUT PARAMETERS
newtype - new datatype (handle)
对于我们的 2D 笛卡尔文件视图,以下是我们需要的这些输入参数:
ndims
: 2 因为网格是二维的
array_of_sizes
:这些是要输出的全局数组的维度,即{ nnx*dim[0], nny*dim[1] }
array_of_subsizes
:这些是输出数据局部份额的维度,即{ nnx, nny }
array_of_start
:这些是局部共享进入全局网格的x,y起始坐标,即{ nnx*coord[0], nny*coord[1] }
order
:顺序是 C 所以这必须是 MPI_ORDER_C
oldtype
:数据是 float
s 所以这必须是 MPI_FLOAT
现在我们有了文件视图的类型,我们只需将它应用到 MPI_File_set_view(fh, 0, MPI_FLOAT, view, "native", MPI_INFO_NULL);
就可以了。
您的完整代码变为:
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
/* 2D array allocation */
float **alloc2D(int rows, int cols);
float **alloc2D(int rows, int cols) {
int i, j;
float *data = malloc(rows * cols * sizeof(float));
float **arr2D = malloc(rows * sizeof(float *));
for (i = 0; i < rows; i++) {
arr2D[i] = &(data[i * cols]);
}
/* Initialize to zero */
for (i= 0; i < rows; i++) {
for (j=0; j < cols; j++) {
arr2D[i][j] = 0.0;
}
}
return arr2D;
}
int main(void) {
/* Creates 5x5 array of floats with padding layers and
* attempts to write distributed arrays */
/* Run toy example with 4 processes */
int i, j, row, col;
int nx = 5, ny = 5, npad = 1;
int my_rank, nproc=4;
int dim[2] = {2, 2}; /* 2x2 cartesian grid */
int period[2] = {0, 0};
int coord[2];
int reorder = 1;
float **A = NULL;
MPI_Comm grid_Comm;
/* Initialize MPI */
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* Establish cartesian topology */
MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &grid_Comm);
/* Get cartesian grid indicies of processes */
MPI_Cart_coords(grid_Comm, my_rank, 2, coord);
row = coord[1];
col = coord[0];
/* Add ghost layers */
nx += 2 * npad;
ny += 2 * npad;
A = alloc2D(nx, ny);
/* Create derived datatype for interior grid (output grid) */
MPI_Datatype grid;
int start[2] = {npad, npad};
int arrsize[2] = {nx, ny};
int gridsize[2] = {nx - 2 * npad, ny - 2 * npad};
MPI_Type_create_subarray(2, arrsize, gridsize,
start, MPI_ORDER_C, MPI_FLOAT, &grid);
MPI_Type_commit(&grid);
/* Fill interior grid */
for (i = npad; i < nx-npad; i++) {
for (j = npad; j < ny-npad; j++) {
A[i][j] = my_rank + i;
}
}
/* Create derived type for file view */
MPI_Datatype view;
int nnx = nx-2*npad, nny = ny-2*npad;
int startV[2] = { coord[0]*nnx, coord[1]*nny };
int arrsizeV[2] = { dim[0]*nnx, dim[1]*nny };
int gridsizeV[2] = { nnx, nny };
MPI_Type_create_subarray(2, arrsizeV, gridsizeV,
startV, MPI_ORDER_C, MPI_FLOAT, &view);
MPI_Type_commit(&view);
/* MPI IO */
MPI_File fh;
MPI_File_open(grid_Comm, "output.bin", MPI_MODE_CREATE | MPI_MODE_WRONLY,
MPI_INFO_NULL, &fh);
MPI_File_set_view(fh, 0, MPI_FLOAT, view, "native", MPI_INFO_NULL);
MPI_File_write_all(fh, &A[0][0], 1, grid, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
/* Cleanup */
free(A[0]);
free(A);
MPI_Type_free(&view);
MPI_Type_free(&grid);
MPI_Finalize();
return 0;
}
我有一个 MPI 代码,它实现二维域分解来计算 PDE 的数值解。目前我为每个进程写出某些 2D 分布式数组(例如 array_x--> proc000x.bin)。我想将其缩减为单个二进制文件。
array_0、array_1、
array_2、array_3、
假设上面说明了具有 4 个进程 (2x2) 的笛卡尔拓扑。每个二维数组的维度为 (nx + 2, nz + 2)。 +2 表示 "ghost" 层添加到所有方面以进行通信。
我想提取主数组(省略 ghost 层)并将它们写入单个二进制文件,顺序类似于
array_0, array_1, array_2, array_3 --> output.bin
如果可能的话,最好把它写成好像我可以访问全局网格并且正在逐行编写,即
array_0 的第 0 行,array_1 的第 0 行,array_0 的第 1 行 row_1 array_1 ....
下面的尝试尝试了文件 array_test.c
中两种输出格式中的前者#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
/* 2D array allocation */
float **alloc2D(int rows, int cols);
float **alloc2D(int rows, int cols) {
int i, j;
float *data = malloc(rows * cols * sizeof(float));
float **arr2D = malloc(rows * sizeof(float *));
for (i = 0; i < rows; i++) {
arr2D[i] = &(data[i * cols]);
}
/* Initialize to zero */
for (i= 0; i < rows; i++) {
for (j=0; j < cols; j++) {
arr2D[i][j] = 0.0;
}
}
return arr2D;
}
int main(void) {
/* Creates 5x5 array of floats with padding layers and
* attempts to write distributed arrays */
/* Run toy example with 4 processes */
int i, j, row, col;
int nx = 5, ny = 5, npad = 1;
int my_rank, nproc=4;
int dim[2] = {2, 2}; /* 2x2 cartesian grid */
int period[2] = {0, 0};
int coord[2];
int reorder = 1;
float **A = NULL;
MPI_Comm grid_Comm;
/* Initialize MPI */
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* Establish cartesian topology */
MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &grid_Comm);
/* Get cartesian grid indicies of processes */
MPI_Cart_coords(grid_Comm, my_rank, 2, coord);
row = coord[1];
col = coord[0];
/* Add ghost layers */
nx += 2 * npad;
ny += 2 * npad;
A = alloc2D(nx, ny);
/* Create derived datatype for interior grid (output grid) */
MPI_Datatype grid;
int start[2] = {npad, npad};
int arrsize[2] = {nx, ny};
int gridsize[2] = {nx - 2 * npad, ny - 2 * npad};
MPI_Type_create_subarray(2, arrsize, gridsize,
start, MPI_ORDER_C, MPI_FLOAT, &grid);
MPI_Type_commit(&grid);
/* Fill interior grid */
for (i = npad; i < nx-npad; i++) {
for (j = npad; j < ny-npad; j++) {
A[i][j] = my_rank + i;
}
}
/* MPI IO */
MPI_File fh;
MPI_Status status;
char file_name[100];
int N, offset;
sprintf(file_name, "output.bin");
MPI_File_open(grid_Comm, file_name, MPI_MODE_CREATE | MPI_MODE_WRONLY,
MPI_INFO_NULL, &fh);
N = (nx - 2 * npad) * (ny - 2 *npad);
offset = (row * 2 + col) * N * sizeof(float);
MPI_File_set_view(fh, offset, MPI_FLOAT, grid, "native",
MPI_INFO_NULL);
MPI_File_write_all(fh, &A[0][0], N, MPI_FLOAT, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
/* Cleanup */
free(A[0]);
free(A);
MPI_Type_free(&grid);
MPI_Finalize();
return 0;
}
用
编译mpicc -o array_test array_test.c
与
一起运行mpiexec -n 4 array_test
代码编译运行时,输出不正确。我假设我在这种情况下误解了派生数据类型和文件写入的使用。我会很感激一些帮助找出我的错误。
您在这里犯的错误是您的文件视图有误。您无需创建表示当前处理器负责的文件份额的类型,而是使用与您要写入的本地数据对应的掩码。
您实际上有两个非常不同的面具需要考虑:
- 本地数据的掩码,不包括光晕层;和
- 全局数据的掩码,因为它应该被整理到文件中。
前者对应此布局:
这里,对于给定的进程,你想要在文件上输出的数据是深蓝色的,不应该写在文件上的光晕层是浅蓝色的。
后面对应这个布局:
在这里,每种颜色对应于来自不同过程的本地数据,分布在二维笛卡尔网格上。
要了解达到此最终结果需要创建什么,您必须向后思考:
- 您对 IO 例程的最终调用应该是
MPI_File_write_all(fh, &A[0][0], 1, interior, MPI_STATUS_IGNORE);
。因此,您必须定义interior
类型以排除光晕边界。幸运的是,您创建的类型grid
已经做到了这一点。所以我们会用到它。 - 但是现在,您必须拥有文件视图才能进行此
MPI_Fie_write_all()
调用。所以视图必须如第二张图片中描述的那样。因此,我们将创建一个新的 MPI 类型来表示它。为此,MPI_Type_create_subarray()
是我们所需要的。
这里是这个函数的概要:
int MPI_Type_create_subarray(int ndims,
const int array_of_sizes[],
const int array_of_subsizes[],
const int array_of_starts[],
int order,
MPI_Datatype oldtype,
MPI_Datatype *newtype)
Create a datatype for a subarray of a regular, multidimensional array
INPUT PARAMETERS
ndims - number of array dimensions (positive integer)
array_of_sizes
- number of elements of type oldtype in each
dimension of the full array (array of positive integers)
array_of_subsizes
- number of elements of type oldtype in each dimension of
the subarray (array of positive integers)
array_of_starts
- starting coordinates of the subarray in each dimension
(array of nonnegative integers)
order - array storage order flag (state)
oldtype - array element datatype (handle)
OUTPUT PARAMETERS
newtype - new datatype (handle)
对于我们的 2D 笛卡尔文件视图,以下是我们需要的这些输入参数:
ndims
: 2 因为网格是二维的array_of_sizes
:这些是要输出的全局数组的维度,即{ nnx*dim[0], nny*dim[1] }
array_of_subsizes
:这些是输出数据局部份额的维度,即{ nnx, nny }
array_of_start
:这些是局部共享进入全局网格的x,y起始坐标,即{ nnx*coord[0], nny*coord[1] }
order
:顺序是 C 所以这必须是MPI_ORDER_C
oldtype
:数据是float
s 所以这必须是MPI_FLOAT
现在我们有了文件视图的类型,我们只需将它应用到 MPI_File_set_view(fh, 0, MPI_FLOAT, view, "native", MPI_INFO_NULL);
就可以了。
您的完整代码变为:
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
/* 2D array allocation */
float **alloc2D(int rows, int cols);
float **alloc2D(int rows, int cols) {
int i, j;
float *data = malloc(rows * cols * sizeof(float));
float **arr2D = malloc(rows * sizeof(float *));
for (i = 0; i < rows; i++) {
arr2D[i] = &(data[i * cols]);
}
/* Initialize to zero */
for (i= 0; i < rows; i++) {
for (j=0; j < cols; j++) {
arr2D[i][j] = 0.0;
}
}
return arr2D;
}
int main(void) {
/* Creates 5x5 array of floats with padding layers and
* attempts to write distributed arrays */
/* Run toy example with 4 processes */
int i, j, row, col;
int nx = 5, ny = 5, npad = 1;
int my_rank, nproc=4;
int dim[2] = {2, 2}; /* 2x2 cartesian grid */
int period[2] = {0, 0};
int coord[2];
int reorder = 1;
float **A = NULL;
MPI_Comm grid_Comm;
/* Initialize MPI */
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* Establish cartesian topology */
MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &grid_Comm);
/* Get cartesian grid indicies of processes */
MPI_Cart_coords(grid_Comm, my_rank, 2, coord);
row = coord[1];
col = coord[0];
/* Add ghost layers */
nx += 2 * npad;
ny += 2 * npad;
A = alloc2D(nx, ny);
/* Create derived datatype for interior grid (output grid) */
MPI_Datatype grid;
int start[2] = {npad, npad};
int arrsize[2] = {nx, ny};
int gridsize[2] = {nx - 2 * npad, ny - 2 * npad};
MPI_Type_create_subarray(2, arrsize, gridsize,
start, MPI_ORDER_C, MPI_FLOAT, &grid);
MPI_Type_commit(&grid);
/* Fill interior grid */
for (i = npad; i < nx-npad; i++) {
for (j = npad; j < ny-npad; j++) {
A[i][j] = my_rank + i;
}
}
/* Create derived type for file view */
MPI_Datatype view;
int nnx = nx-2*npad, nny = ny-2*npad;
int startV[2] = { coord[0]*nnx, coord[1]*nny };
int arrsizeV[2] = { dim[0]*nnx, dim[1]*nny };
int gridsizeV[2] = { nnx, nny };
MPI_Type_create_subarray(2, arrsizeV, gridsizeV,
startV, MPI_ORDER_C, MPI_FLOAT, &view);
MPI_Type_commit(&view);
/* MPI IO */
MPI_File fh;
MPI_File_open(grid_Comm, "output.bin", MPI_MODE_CREATE | MPI_MODE_WRONLY,
MPI_INFO_NULL, &fh);
MPI_File_set_view(fh, 0, MPI_FLOAT, view, "native", MPI_INFO_NULL);
MPI_File_write_all(fh, &A[0][0], 1, grid, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
/* Cleanup */
free(A[0]);
free(A);
MPI_Type_free(&view);
MPI_Type_free(&grid);
MPI_Finalize();
return 0;
}