主要作业正常终止,但 1 个进程返回非零退出代码。根据用户指示,作业已中止
Primary job terminated normally, but 1 process returned a non-zero exit code. Per user-direction, the job has been aborted
#include <stdio.h>
#include <iostream>
using namespace std;
void findCoords(int elem_num, int n, int& i, int& j){
int d=(elem_num+1)/n;
int q=(elem_num+1)%n;
i=d-1+int(q!=0);
j=(q-1+n)%n;
}
int main(int argc, char const *argv[])
{
const int m=10,n=10,o=10,p=10;
double A[m][n]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double B[o][p]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double C[m][p];
int size=10;
for (int rank=0;rank<size;rank++){
int oneD_idx=rank;
int i, j;
int elements_number=((m*p-1)-rank)/size+1;
double values_coords[elements_number][3];
for (int a=0;a<elements_number;a++){
findCoords(oneD_idx, p, i, j);
oneD_idx+=size;
double s=0;
for (int k=0;k<n;k++){
s+=A[i][k]*B[k][j];
}
values_coords[a][0]=s;
values_coords[a][1]=i;
values_coords[a][2]=j;
}
for (int x=0;x<elements_number;x++){
i=values_coords[x][1];
j=values_coords[x][2];
double value=values_coords[x][0];
C[i][j]=value;
}
}
for (int i=0;i<m;i++){
for (int j=0;j<p;j++){
cout << C[i][j]<<" ";
}
cout<<endl;
}
return 0;
}
以上代码通过循环模拟MPI。它用于矩阵乘法。想法是对于每个等级都有必须计算的坐标,它适用于 2-m*p 过程,其中 m 和 p 是输出矩阵的维度。代码运行良好。
然而,当我将下面的代码与 MPI 一起使用时,我不断收到在第二个代码之后显示的错误。
#include <stdio.h>
#include <mpi.h>
#include <iostream>
using namespace std;
void findCoords(int oneD_idx, int n, int& i, int& j){
int d=(oneD_idx+1)/n;
int q=(oneD_idx+1)%n;
i=d-1+int(q!=0);
j=(q-1+n)%n;
}
int main( int argc, char *argv[])
{
int rank, size;
/*const int m=10,n=10,o=10,p=10;
double A[m][n]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double B[o][p]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};*/
const int m=3,n=3,o=3,p=3;
double A[m][n]={
{1,2,3},
{1,2,3},
{1,2,3}
};
double B[o][p]={
{1,2,3},
{1,2,3},
{1,2,3}
};
if (n!=o)
{
printf("Can not multiply because of the wrong shape!");
return 0;
}
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int oneD_idx=rank;
int i, j;
int elements_number=((m*p-1)-rank)/size+1;
double values_coords[elements_number][3];
for (int a=0;a<elements_number;a++){
findCoords(oneD_idx, p, i, j);
oneD_idx+=size;
double s=0;
for (int k=0;k<n;k++){
s+=A[i][k]*B[k][j];
}
values_coords[a][0]=s;
values_coords[a][1]=i;
values_coords[a][2]=j;
}
MPI_Status status1, status2;
if (rank==0)
{
double C[m][p];
for (int x=0;x<elements_number;x++){
i=values_coords[x][1];
j=values_coords[x][2];
double value=values_coords[x][0];
C[i][j]=value;
}
for (int r=1;r<size;r++){
int recv_elements_number;
MPI_Recv( &recv_elements_number , 1 , MPI_INT , r , 403 , MPI_COMM_WORLD , &status1);
int recv_values_coords[recv_elements_number][3];
MPI_Recv( &recv_values_coords , recv_elements_number*3 , MPI_DOUBLE , r , 404 , MPI_COMM_WORLD , &status2);
for (int x=0;x<recv_elements_number;x++){
i=recv_values_coords[x][1];
j=recv_values_coords[x][2];
double value=recv_values_coords[x][0];
C[i][j]=value;
}
}
for (int i=0;i<m;i++){
for (int j=0;j<p;j++){
cout << C[i][j]<<" ";
}
cout<<endl;
}
}
else
{
MPI_Send( &elements_number , 1 , MPI_INT , 0 , 403 , MPI_COMM_WORLD);
MPI_Send( &values_coords , elements_number*3 , MPI_DOUBLE , 0 , 404 , MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
我使用 mpi_send 和 mpi_recv 来完成这项任务。好像是沟通有问题,我被堆积了。
正如 gilles-gouaillardet 在评论中所说,从 int 到 double 的转换存在问题,我应该使用 struct
#include <stdio.h>
#include <iostream>
using namespace std;
void findCoords(int elem_num, int n, int& i, int& j){
int d=(elem_num+1)/n;
int q=(elem_num+1)%n;
i=d-1+int(q!=0);
j=(q-1+n)%n;
}
int main(int argc, char const *argv[])
{
const int m=10,n=10,o=10,p=10;
double A[m][n]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double B[o][p]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double C[m][p];
int size=10;
for (int rank=0;rank<size;rank++){
int oneD_idx=rank;
int i, j;
int elements_number=((m*p-1)-rank)/size+1;
double values_coords[elements_number][3];
for (int a=0;a<elements_number;a++){
findCoords(oneD_idx, p, i, j);
oneD_idx+=size;
double s=0;
for (int k=0;k<n;k++){
s+=A[i][k]*B[k][j];
}
values_coords[a][0]=s;
values_coords[a][1]=i;
values_coords[a][2]=j;
}
for (int x=0;x<elements_number;x++){
i=values_coords[x][1];
j=values_coords[x][2];
double value=values_coords[x][0];
C[i][j]=value;
}
}
for (int i=0;i<m;i++){
for (int j=0;j<p;j++){
cout << C[i][j]<<" ";
}
cout<<endl;
}
return 0;
}
以上代码通过循环模拟MPI。它用于矩阵乘法。想法是对于每个等级都有必须计算的坐标,它适用于 2-m*p 过程,其中 m 和 p 是输出矩阵的维度。代码运行良好。
然而,当我将下面的代码与 MPI 一起使用时,我不断收到在第二个代码之后显示的错误。
#include <stdio.h>
#include <mpi.h>
#include <iostream>
using namespace std;
void findCoords(int oneD_idx, int n, int& i, int& j){
int d=(oneD_idx+1)/n;
int q=(oneD_idx+1)%n;
i=d-1+int(q!=0);
j=(q-1+n)%n;
}
int main( int argc, char *argv[])
{
int rank, size;
/*const int m=10,n=10,o=10,p=10;
double A[m][n]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double B[o][p]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};*/
const int m=3,n=3,o=3,p=3;
double A[m][n]={
{1,2,3},
{1,2,3},
{1,2,3}
};
double B[o][p]={
{1,2,3},
{1,2,3},
{1,2,3}
};
if (n!=o)
{
printf("Can not multiply because of the wrong shape!");
return 0;
}
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int oneD_idx=rank;
int i, j;
int elements_number=((m*p-1)-rank)/size+1;
double values_coords[elements_number][3];
for (int a=0;a<elements_number;a++){
findCoords(oneD_idx, p, i, j);
oneD_idx+=size;
double s=0;
for (int k=0;k<n;k++){
s+=A[i][k]*B[k][j];
}
values_coords[a][0]=s;
values_coords[a][1]=i;
values_coords[a][2]=j;
}
MPI_Status status1, status2;
if (rank==0)
{
double C[m][p];
for (int x=0;x<elements_number;x++){
i=values_coords[x][1];
j=values_coords[x][2];
double value=values_coords[x][0];
C[i][j]=value;
}
for (int r=1;r<size;r++){
int recv_elements_number;
MPI_Recv( &recv_elements_number , 1 , MPI_INT , r , 403 , MPI_COMM_WORLD , &status1);
int recv_values_coords[recv_elements_number][3];
MPI_Recv( &recv_values_coords , recv_elements_number*3 , MPI_DOUBLE , r , 404 , MPI_COMM_WORLD , &status2);
for (int x=0;x<recv_elements_number;x++){
i=recv_values_coords[x][1];
j=recv_values_coords[x][2];
double value=recv_values_coords[x][0];
C[i][j]=value;
}
}
for (int i=0;i<m;i++){
for (int j=0;j<p;j++){
cout << C[i][j]<<" ";
}
cout<<endl;
}
}
else
{
MPI_Send( &elements_number , 1 , MPI_INT , 0 , 403 , MPI_COMM_WORLD);
MPI_Send( &values_coords , elements_number*3 , MPI_DOUBLE , 0 , 404 , MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
我使用 mpi_send 和 mpi_recv 来完成这项任务。好像是沟通有问题,我被堆积了。
正如 gilles-gouaillardet 在评论中所说,从 int 到 double 的转换存在问题,我应该使用 struct