我自己的 Class 类型不适用于 MPI_scatterv 和 Gatherv

my own Class type not working well with MPI_scatterv and Gatherv

我在 .h 文件中有自己的 class class2,我正在使用它来创建如下代码所示的结构。 (所有代码均已验证完整)

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <sys/time.h>
#include <string.h>
#include <numeric>
#include <iterator> 
#include "class2.h"
#define MPI_CLASS2 MPI_FLOAT
using namespace std;


struct Pcle
{
    Class2 Pot;
    Class2 Vy;
    float   ss;
    Pcle(){}; //default empty constructor
    Pcle(float M, int Px, int Py) // constructor
    :Pot(Px, Py)
    , Vy(0.f, 0.f )
    , ss (M)
    {}
};

如有必要,我也会提供.h文件。问题是,从逻辑上讲,通过 MPI_ScattervMPI_Gatherv 传递的值最后会发生变化(或至少其中一些),当我尝试从 Class2 更改为简单的 int 输入(并明显改变整个想法)代码工作正常。在这里有一个想法是我的 main 文件,它用 int 而不是 Class2

产生了很好的结果
 int main(int argc, char *argv[]){

    int size, rank, chunk,j=0;
    int recvcount, part;

    vector<Pcle> in;
    vector<Pcle> c;
    vector<Pcle> f;

     MPI_Init(&argc, &argv);
     MPI_Comm_rank(MPI_COMM_WORLD,&rank);
     MPI_Comm_size(MPI_COMM_WORLD,&size);

        MPI_Datatype MPI_PART, oldtypes[2];
        int blockcounts[2];

       // MPI_Aint type used to be consistent with syntax of
       // MPI_Type_extent routine
        MPI_Aint offsets[2], extent;

        MPI_Status stat;

        offsets[0] = 0;
        oldtypes[0] = MPI_CLASS2;
        blockcounts[0] = 2;

        // Setup description of the 1 MPI_FLOAT field, Mass
        // Need to first figure offset by getting size of MPI_Vector2
       MPI_Type_extent(MPI_CLASS2, &extent);
        offsets[1] = 2*extent;
       oldtypes[1] = MPI_FLOAT;
       blockcounts[1] = 1;

        // Define structured type and commit it
        MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MPI_PART);
        MPI_Type_commit(&MPI_PART);

    if (rank ==0){ //initalizing stuff in rank ==0
        part = 64;
        chunk = floorf(part/size);

        for (int i=0; i< part; i++){
            in.push_back(Pcle(i,2*i,2.5*i));
                      f.push_back(Pcle());
        }

     }
    //broadcasting needed variables
    MPI_Bcast(&chunk, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&part,1,MPI_INT,0,MPI_COMM_WORLD);

    int sendcount[size];
   //creating the arguments for scatterv
   for(size_t ct = 0; ct<size; ct++){
         if (ct < part%size)
            sendcount[ct] = chunk+1;
        else
           sendcount[ct] = chunk;
    }

    int displs[size];

    displs[0]=0;

    for ( j =1; j< size; j++)
    displs[j] = displs[j-1] +sendcount[j];

    recvcount = sendcount[rank];


    c.reserve(recvcount);

    MPI_Scatterv(in.data(), sendcount, displs, MPI_PART, c.data(), recvcount, MPI_PART, 0, MPI_COMM_WORLD);

    for(int iteration =0; iteration <10; ++iteration){

        //two functions which changes some of the data in Pot but leaves ss the same and can be left out
        MPI_Gatherv(c.data(), sendcount[rank], MPI_PART, f.data(), sendcount, displs, MPI_PART, 0, MPI_COMM_WORLD);

    }
    if(rank ==0)
        for (int k=0; k<64; k++)
            cout<<f[k].ss<<" ";

    MPI_Finalize(); 
    return 0;

}

这段代码的输出是

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 Program ended with exit code: 0

最后几个值仍然为零,这是错误的。我完全消除了我的 class 并尝试 运行 一个 int 类型而不是结构中的 class2 类型,并相应地更改了代码。结果正确

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 Program ended with exit code: 0

据我所知,我想我没有正确定义 MPI 的 class。我正在用预处理器指令编写 #define MPI_CLASS2 MPI_FLOAT 可能有问题。

这是头文件。有了这个,应该编译代码并复制输出。

           #pragma once

 #include <math.h>
 #include <assert.h>

 struct Class2
 {

     union
     {
         float Element[2];
         struct { float X, Y; };
         struct { float U, V; };
     };

     Class2() {}

     Class2(float p_fValue)
     : X(p_fValue), Y(p_fValue) {}

     Class2(float p_x, float p_y)
     : X(p_x), Y(p_y) {}

     Class2(const Class2 &p_vector)
     : X(p_vector.X), Y(p_vector.Y) {}

     float operator[](int p_nIndex) const { return Element[p_nIndex]; }
     float& operator[](int p_nIndex) { return Element[p_nIndex]; }

     inline void Set(float p_x, float p_y) {
         X = p_x; Y = p_y;
     }

     inline bool Equals(const Class2 &p_vector, const float p_epsilon = 1e-5f) const
     {
         if (fabs(X - p_vector.X) > p_epsilon) return false;
         if (fabs(Y - p_vector.Y) > p_epsilon) return false;

         return true;
     }

     Class2& operator=(const Class2 &p_vector)
     {
         X = p_vector.X;
         Y = p_vector.Y;

         return *this;
     }

     inline bool operator==(const Class2 &p_vector) const {
         return Equals(p_vector);
     }

     inline bool operator!=(const Class2& p_vector) const {
         return !(*this == p_vector);
 }

     inline Class2 operator*(float p_fValue) const {
        return Class2(p_fValue * X, p_fValue * Y);
     }

     inline Class2 operator/(float p_fValue) const
     {
         assert(p_fValue != 0.f);
         return Class2(*this * (1.0f / p_fValue));
     }

     inline Class2 operator*(const Class2 &p_vector) const {
         return Class2(p_vector.X * X, p_vector.Y * Y);
     }

     inline Class2 operator+(const Class2 &p_vector) const {
         return Class2(X + p_vector.X, Y + p_vector.Y);
     }

     inline Class2 operator-(const Class2 &p_vector) const {
         return Class2(X - p_vector.X, Y - p_vector.Y);
     }

     inline Class2 operator-(void) const {
         return Class2(-X, -Y);
     }

     inline Class2& operator*=(float p_fValue) {
         return *this = *this * p_fValue;
     }

     inline Class2& operator*=(const Class2 &p_vector) {
         return *this = *this * p_vector;
     }

     inline Class2& operator/=(float p_fValue) {
         return *this = *this / p_fValue;
     }

     inline Class2& operator+=(const Class2 &p_vector) {
         return *this = *this + p_vector;
     }

     inline Class2& operator-=(const Class2 &p_vector) {
         return *this = *this - p_vector;
     }

     inline float MaxComponent() const {
         return std::max(X, Y);
     }

     inline float MinComponent() const {
         return std::min(X, Y);
     }

     inline float MaxAbsComponent() const {
         return std::max(fabs(X), fabs(Y));
     }

     inline float MinAbsComponent() const
     {
         return std::min(fabs(X), fabs(Y));
          }

     static Class2 Max(const Class2 &p_vector1, const Class2 &p_vector2)
     {
              return Class2(std::max(p_vector1.X, p_vector2.X),
                   std::max(p_vector1.Y, p_vector2.Y));
     }

     static Class2 Min(const Class2 &p_vector1, const Class2 &p_vector2)
     {
         return Class2(std::min(p_vector1.X, p_vector2.X),
                        std::min(p_vector1.Y, p_vector2.Y));
     }

     inline float Length(void) const {
        return sqrt(X * X + Y * Y);
     }

     inline float LengthSquared(void) const {
         return X * X + Y * Y;
     }

   inline void Normalize(void) {
          *this = Class2::Normalize(*this);
   }

   inline float Dot(const Class2 &p_vector) const {
        return Class2::Dot(*this, p_vector);
   }

   inline float AbsDot(const Class2 &p_vector) const {
        return Class2::AbsDot(*this, p_vector);
    }

   static float Dot(const Class2 &p_vector1, const Class2 &p_vector2) {
        return p_vector1.X * p_vector2.X + p_vector1.Y * p_vector2.Y;
    }

    static float AbsDot(const Class2 &p_vector1, const Class2 &p_vector2) {
            return fabs(p_vector1.X * p_vector2.X +
                   p_vector1.Y * p_vector2.Y);
    }

    static Class2 Normalize(const Class2 &p_vector) {
        return p_vector / sqrt(p_vector.Length());
   }

    static float DistanceSquared(const Class2 &p_point1, const Class2 &p_point2) {
        return (p_point2 - p_point1).LengthSquared();
    }

    static float Distance(const Class2 &p_point1, const Class2 &p_point2) {
        return (p_point2 - p_point1).Length();
   }
};

inline Class2 operator*(float p_fValue, const Class2 &p_vector) {
    return Class2(p_fValue * p_vector.X, p_fValue * p_vector.Y);
}

首先,代码很混乱。您不应使用 MPI_ 前缀来命名您自己的符号,因为该前缀是为 MPI 实现保留的,这样做可能会导致名称冲突,从而导致难以调试的后果。然后,您将 MPI_CLASS2 定义为 MPI_FLOAT,考虑到 Class2 是具有多个字段的结构的名称而不仅仅是标量,这确实令人困惑。请改用 MPI_Type_create_structMPI_Type_contiguous。因为 MPI_CLASS2MPI_FLOATMPI_PART 的类型映射与实际数据布局不对应,并且 MPI 错误地计算了分散和聚集操作中的偏移量。

最简单的解决方案是将 MPI_CLASS2 的定义替换为以下内容:

MPI_Datatype MPI_CLASS2;
MPI_Type_contiguous(2, MPI_FLOAT, &MPI_CLASS2);

更详细的解决方案是注意 MPI 数据类型的范围可能并不总是与语言类型的真实大小相匹配,因此 offsets[1] = 2*extent; 等计算可能并不总是有效。建议改为实例化 object 并分别获取每个字段的地址(或使用 cstddef header 中的 C 标准 offsetof 宏):

MPI_Datatype dt_class2;
MPI_Type_contiguous(2, MPI_FLOAT, &dt_class2);

MPI_Type dt_temp, dt_pcle;
MPI_Aint offsets[3], base;
int blockcounts[3];
MPI_Datatype oldtypes[3];

Pcle dummy;
MPI_Get_address(&dummy.Pot, &base);
MPI_Get_address(&dummy.Vy, &offsets[1]);
MPI_Get_address(&dummy.ss, &offsets[2]);

offsets[0] = 0;
blockcounts[0] = 1;
oldtypes[0] = dt_class2;

offsets[1] -= base;
blockcounts[1] = 1;
oldtypes[1] = dt_class2;

offsets[2] -= base;
blockcounts[2] = 1;
oldtypes[2] = MPI_FLOAT;

MPI_Type_create_struct(3, blockcounts, offsets, oldtypes, &dt_temp);
MPI_Type_create_resized(dt_temp, 0, sizeof(Pcle), &dt_pcle);
MPI_Type_commit(&dt_pcle);

MPI_Type_free(&dt_temp);
MPI_Type_free(&dt_class2);

// Now dt_pcle is ready to use

MPI_Type_create_resized 用于确保 MPI 数据类型的范围与结构 Pcle 的大小完全匹配,它负责处理编译器可能插入的任何填充(非在你的情况下,因为只有 floats).