NBody 问题并行化为相同的输入给出不同的结果

Question

这是 NBody 问题的 MPI 版本。我已经有一个 OpenMP 版本，其结果与具有一个线程的 nbody 版本相同，但 MPI 结果不同，最重要的是在最后的交互中。在第一次交互时，输出非常相似，但在最后，输出差异很大。

#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include <vector>
#include <cstdlib>
#include <chrono> 
#include <stdio.h> 
#include <string.h> 
#include <omp.h>
#include <mpi.h>
#include "vector2.h"

/*
 * Constant definitions for field dimensions, and particle masses
 */
const int fieldWidth = 1000;
const int fieldHalfWidth = fieldWidth >> 1;
const int fieldHeight = 1000;
const int fieldHalfHeight = fieldHeight >> 1;

const float minBodyMass = 2.5f;
const float maxBodyMassVariance = 5.f;

/*
 * Particle structure
 */
struct Particle
{
    float PositionX;
  float PositionY;
    float VelocityX;
  float VelocityY;
    float   Mass;

    Particle(float mass, float x, float y):
        PositionX( x )
    , PositionY( y )
        , VelocityX( 0.f )
    , VelocityY( 0.f )
        , Mass ( mass )
    { }
    
};

/*
 * Compute forces of particles exerted on one another
 */
void ComputeForces(std::vector<Particle> &p_bodies, float p_gravitationalTerm, float p_deltaT)
{
    Vector2 direction,
        force, acceleration;

  Vector2 position1, position2,
    velocity1, velocity2;

    float distance;

    for (size_t j = 0; j < p_bodies.size(); ++j)
    {
    position1 = 0.f, position2 = 0.f;
        
    Particle &p1 = p_bodies[j];
      position1 = Vector2(p1.PositionX, p1.PositionY);
    
        force = 0.f, acceleration = 0.f;
        for (size_t k = 0; k < p_bodies.size(); ++k)
        {
            if (k == j) continue;
        
            Particle &p2 = p_bodies[k];
            position2 = Vector2(p2.PositionX, p2.PositionY);
      
            // Compute direction vector
            direction = position2 - position1;
            
            // Limit distance term to avoid singularities
            distance = std::max<float>( 0.5f * (p2.Mass + p1.Mass), fabs(direction.Length()) );
            
            // Accumulate force
            force += direction / (distance * distance * distance) * p2.Mass; 
        }
        // Compute acceleration for body 
        acceleration = force * p_gravitationalTerm;
        
        // Integrate velocity (m/s)
        p1.VelocityX += acceleration[0] * p_deltaT;
    p1.VelocityY += acceleration[1] * p_deltaT;
    }
}

/*
 * Update particle positions
 */
void MoveBodies(std::vector<Particle> &p_bodies, float p_deltaT)
{
    for (size_t j = 0; j < p_bodies.size(); ++j)
    {
        p_bodies[j].PositionX += p_bodies[j].VelocityX * p_deltaT;
     p_bodies[j].PositionY += p_bodies[j].VelocityY * p_deltaT;
    }
}

/*
 * Commit particle masses and positions to file in CSV format
 */
void PersistPositions(const std::string &p_strFilename, std::vector<Particle> &p_bodies)
{
    std::cout << "Writing to file: " << p_strFilename << std::endl;
    std::string path = "./NBodyOutputMPI/" + p_strFilename;
    std::ofstream output(path);
    
    if (output.is_open())
    {   
        for (int j = 0; j < p_bodies.size(); j++)
        {
            output <<   p_bodies[j].Mass << ", " <<
                p_bodies[j].PositionX << ", " <<
                p_bodies[j].PositionY << std::endl;
        }
        
        output.close();
    }
    else
        std::cerr << "Unable to persist data to file:" << p_strFilename << std::endl;

}



int main(int argc, char** argv)
{
    int particleCount = 1024;
    int maxIteration = 1000;
    float deltaT = 0.05f;
    float gTerm = 1.f;
    bool enableOutput = true; //Enable the output files or not
    bool randomParticles = true; //If particles are built randomly or not

    std::ifstream fileInput;
    std::stringstream fileOutput;
    std::vector<Particle> bodies;
    
    std::string line;
    std::string argument;
    std::string fileName;
    
    const char delimiter = ','; //To split the numbers.
  
  //Variables for the struct.
  MPI_Datatype particletype, oldtypes[1]; 
  MPI_Aint offsets[1];
  int blockcounts[1];
  
  //Start MPI.
  int rc = MPI_Init(&argc, &argv); 
  
  //Variables for MPI.
  int rank, numtasks;
  
  MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
  MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
  
  //Initialize variables for Type_struct.
  offsets[0] = 0;
  oldtypes[0] = MPI_FLOAT;
  blockcounts[0] = 5;
  
  //Define the struct.
  MPI_Type_create_struct(1, blockcounts, offsets, oldtypes, &particletype);
  MPI_Type_commit(&particletype);
    
//Introduce inputs

for(int i = 0; i < argc; i++){ //Walk through the arguments in the command line.
        argument = argv[i]; //Argument in the command line.       
      if(argument == "-f"){
        while(fileInput) //While the file is opened.
          {
            std::vector<float> vector (3);
              while(std::getline(fileInput,line)){ //Process each line.
                    std::stringstream splitlines;
                  splitlines << line;
                  std::string word;
                    int counter = 0;

                    while(std::getline(splitlines,word,delimiter)){ //Process each word in each line.
                    float number = std::stof(word);
                    vector[counter] = number;
                    counter++;
                    }   
                    
                  bodies.push_back(Particle(vector[0],vector[1],vector[2])); //Add particle to the vector.
              }         
      }
    }       



  std::vector<Particle> particles;
  std::vector<Particle> final;
  
  if (rank == 0)
    final.resize(bodies.size());
    
  particles.resize(bodies.size() / numtasks);
  
  /*if(bodies.size()%numtasks!=0){
    MPI_Abort(MPI_COMM_WORLD, MPI_ERR_SIZE);
  }*/
  
  //Start the timer.
    double start = MPI_Wtime();
    for (int iteration = 0; iteration < maxIteration; ++iteration)
    {
    MPI_Scatter(bodies.data(), particles.size(), particletype, particles.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);

      ComputeForces(particles, gTerm, deltaT);
      MoveBodies(particles, deltaT);
      
    MPI_Gather(particles.data(), particles.size(), particletype, final.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);

      if(enableOutput && rank == 0){
            fileOutput.str(std::string());
            fileOutput << "nbody_" << iteration << ".txt";
            PersistPositions(fileOutput.str(), final);
        }
    }
  //Finish the timer.
    double finish = MPI_Wtime();

  //Calculate and show the time.
  if(rank == 0){
      double elapsed = finish - start;
      std::cout << "Elapsed time: " << elapsed << " s\n";
  }
    
  //End MPI.
  MPI_Finalize();
 
    return 0;
}

Answer 1

当运行多于一个 MPI 等级时，您的算法正在解决完全不同的问题，因此您会得到不同的结果。

您正在通过将粒子阵列分成几部分来执行域分解，然后每个等级计算的力仅考虑其自己子域中的粒子：

MPI_Scatter(bodies.data(), particles.size(), particletype,
            particles.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);

ComputeForces(particles, gTerm, deltaT); // <---- only knows about its own particles
MoveBodies(particles, deltaT);
      
MPI_Gather(particles.data(), particles.size(), particletype,
           final.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);

整个计算过程中不考虑域间交互。因此，您正在模拟许多非相互作用粒子簇的轨迹，这与原始问题不同。一开始差异很小，但随后集群开始漂移，差异呈指数增长。

相比之下，当您运行使用单个 MPI 等级时，只有一个包含所有粒子的子域，您仍在解决原始问题。这同样适用于 OpenMP 版本，其中每个线程都可以看到系统中的所有粒子。

NBody 问题并行化为相同的输入给出不同的结果

NBody problem parallelization gives different results for the same input

c++

mpi

openmpi