NBody 问题并行化为相同的输入给出不同的结果
NBody problem parallelization gives different results for the same input
这是 NBody 问题的 MPI 版本。我已经有一个 OpenMP 版本,其结果与具有一个线程的 nbody 版本相同,但 MPI 结果不同,最重要的是在最后的交互中。在第一次交互时,输出非常相似,但在最后,输出差异很大。
#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include <vector>
#include <cstdlib>
#include <chrono>
#include <stdio.h>
#include <string.h>
#include <omp.h>
#include <mpi.h>
#include "vector2.h"
/*
* Constant definitions for field dimensions, and particle masses
*/
const int fieldWidth = 1000;
const int fieldHalfWidth = fieldWidth >> 1;
const int fieldHeight = 1000;
const int fieldHalfHeight = fieldHeight >> 1;
const float minBodyMass = 2.5f;
const float maxBodyMassVariance = 5.f;
/*
* Particle structure
*/
struct Particle
{
float PositionX;
float PositionY;
float VelocityX;
float VelocityY;
float Mass;
Particle(float mass, float x, float y):
PositionX( x )
, PositionY( y )
, VelocityX( 0.f )
, VelocityY( 0.f )
, Mass ( mass )
{ }
};
/*
* Compute forces of particles exerted on one another
*/
void ComputeForces(std::vector<Particle> &p_bodies, float p_gravitationalTerm, float p_deltaT)
{
Vector2 direction,
force, acceleration;
Vector2 position1, position2,
velocity1, velocity2;
float distance;
for (size_t j = 0; j < p_bodies.size(); ++j)
{
position1 = 0.f, position2 = 0.f;
Particle &p1 = p_bodies[j];
position1 = Vector2(p1.PositionX, p1.PositionY);
force = 0.f, acceleration = 0.f;
for (size_t k = 0; k < p_bodies.size(); ++k)
{
if (k == j) continue;
Particle &p2 = p_bodies[k];
position2 = Vector2(p2.PositionX, p2.PositionY);
// Compute direction vector
direction = position2 - position1;
// Limit distance term to avoid singularities
distance = std::max<float>( 0.5f * (p2.Mass + p1.Mass), fabs(direction.Length()) );
// Accumulate force
force += direction / (distance * distance * distance) * p2.Mass;
}
// Compute acceleration for body
acceleration = force * p_gravitationalTerm;
// Integrate velocity (m/s)
p1.VelocityX += acceleration[0] * p_deltaT;
p1.VelocityY += acceleration[1] * p_deltaT;
}
}
/*
* Update particle positions
*/
void MoveBodies(std::vector<Particle> &p_bodies, float p_deltaT)
{
for (size_t j = 0; j < p_bodies.size(); ++j)
{
p_bodies[j].PositionX += p_bodies[j].VelocityX * p_deltaT;
p_bodies[j].PositionY += p_bodies[j].VelocityY * p_deltaT;
}
}
/*
* Commit particle masses and positions to file in CSV format
*/
void PersistPositions(const std::string &p_strFilename, std::vector<Particle> &p_bodies)
{
std::cout << "Writing to file: " << p_strFilename << std::endl;
std::string path = "./NBodyOutputMPI/" + p_strFilename;
std::ofstream output(path);
if (output.is_open())
{
for (int j = 0; j < p_bodies.size(); j++)
{
output << p_bodies[j].Mass << ", " <<
p_bodies[j].PositionX << ", " <<
p_bodies[j].PositionY << std::endl;
}
output.close();
}
else
std::cerr << "Unable to persist data to file:" << p_strFilename << std::endl;
}
int main(int argc, char** argv)
{
int particleCount = 1024;
int maxIteration = 1000;
float deltaT = 0.05f;
float gTerm = 1.f;
bool enableOutput = true; //Enable the output files or not
bool randomParticles = true; //If particles are built randomly or not
std::ifstream fileInput;
std::stringstream fileOutput;
std::vector<Particle> bodies;
std::string line;
std::string argument;
std::string fileName;
const char delimiter = ','; //To split the numbers.
//Variables for the struct.
MPI_Datatype particletype, oldtypes[1];
MPI_Aint offsets[1];
int blockcounts[1];
//Start MPI.
int rc = MPI_Init(&argc, &argv);
//Variables for MPI.
int rank, numtasks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
//Initialize variables for Type_struct.
offsets[0] = 0;
oldtypes[0] = MPI_FLOAT;
blockcounts[0] = 5;
//Define the struct.
MPI_Type_create_struct(1, blockcounts, offsets, oldtypes, &particletype);
MPI_Type_commit(&particletype);
//Introduce inputs
for(int i = 0; i < argc; i++){ //Walk through the arguments in the command line.
argument = argv[i]; //Argument in the command line.
if(argument == "-f"){
while(fileInput) //While the file is opened.
{
std::vector<float> vector (3);
while(std::getline(fileInput,line)){ //Process each line.
std::stringstream splitlines;
splitlines << line;
std::string word;
int counter = 0;
while(std::getline(splitlines,word,delimiter)){ //Process each word in each line.
float number = std::stof(word);
vector[counter] = number;
counter++;
}
bodies.push_back(Particle(vector[0],vector[1],vector[2])); //Add particle to the vector.
}
}
}
std::vector<Particle> particles;
std::vector<Particle> final;
if (rank == 0)
final.resize(bodies.size());
particles.resize(bodies.size() / numtasks);
/*if(bodies.size()%numtasks!=0){
MPI_Abort(MPI_COMM_WORLD, MPI_ERR_SIZE);
}*/
//Start the timer.
double start = MPI_Wtime();
for (int iteration = 0; iteration < maxIteration; ++iteration)
{
MPI_Scatter(bodies.data(), particles.size(), particletype, particles.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
ComputeForces(particles, gTerm, deltaT);
MoveBodies(particles, deltaT);
MPI_Gather(particles.data(), particles.size(), particletype, final.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
if(enableOutput && rank == 0){
fileOutput.str(std::string());
fileOutput << "nbody_" << iteration << ".txt";
PersistPositions(fileOutput.str(), final);
}
}
//Finish the timer.
double finish = MPI_Wtime();
//Calculate and show the time.
if(rank == 0){
double elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed << " s\n";
}
//End MPI.
MPI_Finalize();
return 0;
}
当 运行 多于一个 MPI 等级时,您的算法正在解决完全不同的问题,因此您会得到不同的结果。
您正在通过将粒子阵列分成几部分来执行域分解,然后每个等级计算的力仅考虑其自己子域中的粒子:
MPI_Scatter(bodies.data(), particles.size(), particletype,
particles.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
ComputeForces(particles, gTerm, deltaT); // <---- only knows about its own particles
MoveBodies(particles, deltaT);
MPI_Gather(particles.data(), particles.size(), particletype,
final.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
整个计算过程中不考虑域间交互。因此,您正在模拟许多非相互作用粒子簇的轨迹,这与原始问题不同。一开始差异很小,但随后集群开始漂移,差异呈指数增长。
相比之下,当您 运行 使用单个 MPI 等级时,只有一个包含所有粒子的子域,您仍在解决原始问题。这同样适用于 OpenMP 版本,其中每个线程都可以看到系统中的所有粒子。
这是 NBody 问题的 MPI 版本。我已经有一个 OpenMP 版本,其结果与具有一个线程的 nbody 版本相同,但 MPI 结果不同,最重要的是在最后的交互中。在第一次交互时,输出非常相似,但在最后,输出差异很大。
#include <stdlib.h>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>
#include <vector>
#include <cstdlib>
#include <chrono>
#include <stdio.h>
#include <string.h>
#include <omp.h>
#include <mpi.h>
#include "vector2.h"
/*
* Constant definitions for field dimensions, and particle masses
*/
const int fieldWidth = 1000;
const int fieldHalfWidth = fieldWidth >> 1;
const int fieldHeight = 1000;
const int fieldHalfHeight = fieldHeight >> 1;
const float minBodyMass = 2.5f;
const float maxBodyMassVariance = 5.f;
/*
* Particle structure
*/
struct Particle
{
float PositionX;
float PositionY;
float VelocityX;
float VelocityY;
float Mass;
Particle(float mass, float x, float y):
PositionX( x )
, PositionY( y )
, VelocityX( 0.f )
, VelocityY( 0.f )
, Mass ( mass )
{ }
};
/*
* Compute forces of particles exerted on one another
*/
void ComputeForces(std::vector<Particle> &p_bodies, float p_gravitationalTerm, float p_deltaT)
{
Vector2 direction,
force, acceleration;
Vector2 position1, position2,
velocity1, velocity2;
float distance;
for (size_t j = 0; j < p_bodies.size(); ++j)
{
position1 = 0.f, position2 = 0.f;
Particle &p1 = p_bodies[j];
position1 = Vector2(p1.PositionX, p1.PositionY);
force = 0.f, acceleration = 0.f;
for (size_t k = 0; k < p_bodies.size(); ++k)
{
if (k == j) continue;
Particle &p2 = p_bodies[k];
position2 = Vector2(p2.PositionX, p2.PositionY);
// Compute direction vector
direction = position2 - position1;
// Limit distance term to avoid singularities
distance = std::max<float>( 0.5f * (p2.Mass + p1.Mass), fabs(direction.Length()) );
// Accumulate force
force += direction / (distance * distance * distance) * p2.Mass;
}
// Compute acceleration for body
acceleration = force * p_gravitationalTerm;
// Integrate velocity (m/s)
p1.VelocityX += acceleration[0] * p_deltaT;
p1.VelocityY += acceleration[1] * p_deltaT;
}
}
/*
* Update particle positions
*/
void MoveBodies(std::vector<Particle> &p_bodies, float p_deltaT)
{
for (size_t j = 0; j < p_bodies.size(); ++j)
{
p_bodies[j].PositionX += p_bodies[j].VelocityX * p_deltaT;
p_bodies[j].PositionY += p_bodies[j].VelocityY * p_deltaT;
}
}
/*
* Commit particle masses and positions to file in CSV format
*/
void PersistPositions(const std::string &p_strFilename, std::vector<Particle> &p_bodies)
{
std::cout << "Writing to file: " << p_strFilename << std::endl;
std::string path = "./NBodyOutputMPI/" + p_strFilename;
std::ofstream output(path);
if (output.is_open())
{
for (int j = 0; j < p_bodies.size(); j++)
{
output << p_bodies[j].Mass << ", " <<
p_bodies[j].PositionX << ", " <<
p_bodies[j].PositionY << std::endl;
}
output.close();
}
else
std::cerr << "Unable to persist data to file:" << p_strFilename << std::endl;
}
int main(int argc, char** argv)
{
int particleCount = 1024;
int maxIteration = 1000;
float deltaT = 0.05f;
float gTerm = 1.f;
bool enableOutput = true; //Enable the output files or not
bool randomParticles = true; //If particles are built randomly or not
std::ifstream fileInput;
std::stringstream fileOutput;
std::vector<Particle> bodies;
std::string line;
std::string argument;
std::string fileName;
const char delimiter = ','; //To split the numbers.
//Variables for the struct.
MPI_Datatype particletype, oldtypes[1];
MPI_Aint offsets[1];
int blockcounts[1];
//Start MPI.
int rc = MPI_Init(&argc, &argv);
//Variables for MPI.
int rank, numtasks;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
//Initialize variables for Type_struct.
offsets[0] = 0;
oldtypes[0] = MPI_FLOAT;
blockcounts[0] = 5;
//Define the struct.
MPI_Type_create_struct(1, blockcounts, offsets, oldtypes, &particletype);
MPI_Type_commit(&particletype);
//Introduce inputs
for(int i = 0; i < argc; i++){ //Walk through the arguments in the command line.
argument = argv[i]; //Argument in the command line.
if(argument == "-f"){
while(fileInput) //While the file is opened.
{
std::vector<float> vector (3);
while(std::getline(fileInput,line)){ //Process each line.
std::stringstream splitlines;
splitlines << line;
std::string word;
int counter = 0;
while(std::getline(splitlines,word,delimiter)){ //Process each word in each line.
float number = std::stof(word);
vector[counter] = number;
counter++;
}
bodies.push_back(Particle(vector[0],vector[1],vector[2])); //Add particle to the vector.
}
}
}
std::vector<Particle> particles;
std::vector<Particle> final;
if (rank == 0)
final.resize(bodies.size());
particles.resize(bodies.size() / numtasks);
/*if(bodies.size()%numtasks!=0){
MPI_Abort(MPI_COMM_WORLD, MPI_ERR_SIZE);
}*/
//Start the timer.
double start = MPI_Wtime();
for (int iteration = 0; iteration < maxIteration; ++iteration)
{
MPI_Scatter(bodies.data(), particles.size(), particletype, particles.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
ComputeForces(particles, gTerm, deltaT);
MoveBodies(particles, deltaT);
MPI_Gather(particles.data(), particles.size(), particletype, final.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
if(enableOutput && rank == 0){
fileOutput.str(std::string());
fileOutput << "nbody_" << iteration << ".txt";
PersistPositions(fileOutput.str(), final);
}
}
//Finish the timer.
double finish = MPI_Wtime();
//Calculate and show the time.
if(rank == 0){
double elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed << " s\n";
}
//End MPI.
MPI_Finalize();
return 0;
}
当 运行 多于一个 MPI 等级时,您的算法正在解决完全不同的问题,因此您会得到不同的结果。
您正在通过将粒子阵列分成几部分来执行域分解,然后每个等级计算的力仅考虑其自己子域中的粒子:
MPI_Scatter(bodies.data(), particles.size(), particletype,
particles.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
ComputeForces(particles, gTerm, deltaT); // <---- only knows about its own particles
MoveBodies(particles, deltaT);
MPI_Gather(particles.data(), particles.size(), particletype,
final.data(), particles.size(), particletype, 0, MPI_COMM_WORLD);
整个计算过程中不考虑域间交互。因此,您正在模拟许多非相互作用粒子簇的轨迹,这与原始问题不同。一开始差异很小,但随后集群开始漂移,差异呈指数增长。
相比之下,当您 运行 使用单个 MPI 等级时,只有一个包含所有粒子的子域,您仍在解决原始问题。这同样适用于 OpenMP 版本,其中每个线程都可以看到系统中的所有粒子。