C中的MPI高斯消除，每个处理器从文件中读取

Question

我正在尝试使用 MPI 执行高斯消元法，我对此还很陌生。据我所知，前向消除是可以并行完成的，而后向替换由于通信成本可以在主处理器上完成。

在下面的代码中，我尝试实现它，但似乎在执行正向消除时出现某种通信错误。输入矩阵是 N*(N+1) 的，因为它读作增广矩阵。每个处理器都应该读取它在矩阵中的份额，因此处理器需要相应地发送值。

输入矩阵被指定为以下格式的文件：

6.807000 5.249000 0.073000 3.658000 8.930000 1.272000 7.544000 0.878000 1.000000
7.709000 4.440000 8.165000 4.492000 3.042000 7.987000 2.503000 2.327000 2.000000
8.840000 2.612000 4.303000 3.169000 7.709000 7.157000 9.560000 0.933000 3.000000
0.278000 1.816000 5.335000 9.097000 7.826000 3.512000 9.267000 3.810000 4.000000
0.979000 9.149000 6.579000 8.821000 1.967000 0.672000 1.393000 9.336000 5.000000
1.745000 5.228000 4.091000 0.194000 6.357000 5.001000 1.153000 6.708000 6.000000
5.668000 1.490000 8.124000 2.196000 9.530000 0.903000 7.722000 4.666000 7.000000
8.024000 7.801000 6.853000 0.977000 7.408000 8.228000 4.933000 0.298000 8.000000

生成的矩阵应该是这样的：

1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -0.733315 
0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.806801 
0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.843402 
0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 -0.912938 
0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 -0.146581 
0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 -0.048743 
0.000000 0.000000 0.000000 0.000000 0.000000 -0.000000 1.000000 0.000000 0.849379 
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 -0.002284

然而，我得到的是：

1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -nan 
0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -nan 
7.431000 2.624000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -nan 
0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 -nan 
3.714586 -0.915696 -1.032000 -5.928000 1.000000 0.000000 0.000000 0.000000 -nan 
0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 -nan 
-8.653789 -0.773373 1.057501 6.074484 0.000000 0.000000 1.000000 0.000000 -nan 
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 -nan

任何人都可以提供一些关于出了什么问题的见解吗？我想使用障碍来确保处理器在所有进程都从文件中读取矩阵份额之前不会启动。前向淘汰后还有另一个障碍，以确保所有处理器在执行后向替换之前都已完成。需要这些障碍吗？

#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include "math.h"
#include "mpi.h"

#define SUCCESS 0
#define ERROR -1

#define EPSILON 0.000001

void read_matrix_size_from_file(char *filename, int *rows, int *columns);
double ** read_user_matrix_from_file(char *filename, int rows, int columns, int rank, int nprocs);
double ** allocate_matrix(int, int);
void free_matrix(double **matrix, int rows);
void divide_by_max(double **, int, int);
void input_clicking_probabilities(double **, int, int, double *);
void write_clicking_probabilities_to_file(double *cp, int rows);
void print_best_acceptance_threshold(double *, int);
void print_matrix(double **, int, int);

int main (int argc, char** argv)
{
    if (argc != 2)
    {
        printf("please provide a user matrix!\n");
        return ERROR;
    }

    /* init */
    int rank, nprocs;
    MPI_Init( &argc, &argv);
    MPI_Comm_rank( MPI_COMM_WORLD, &rank);
    MPI_Comm_size( MPI_COMM_WORLD, &nprocs);
    clock_t begin1, end1, begin2, end2;

    /* setup */
    int rows, columns;
    double **A;
    double *cp;
    if (rank == 0)
    {
        printf("malloc cp process %d\n", rank); 
        read_matrix_size_from_file(argv[1], &rows, &columns);
        cp = malloc(columns * sizeof(double));
    }
    /* Each process will read its own subset of the larger matrix */


    MPI_Bcast(&rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
    MPI_Bcast(&columns, 1, MPI_INT, 0, MPI_COMM_WORLD);


    /* Split the matrix into submatrixs */
    //int rpp = rows / rank;
    //int rrpp = rows % rank;
    printf("WE reached here process %d\n", rank);

    int i,j,k;
    int *map = malloc(sizeof(int) * rows);

    int stop = 0;
    while(1)
    {
        for(i=0;i<nprocs; i++)
        {
            if (rank == i)
            {
                printf("Reading for process %d\n", rank);
                A = read_user_matrix_from_file(argv[1], rows, columns, rank, nprocs);
                stop = 1;
            }   
        }
        if (stop)
        {
            break;
        }
    }




    MPI_Barrier(MPI_COMM_WORLD);

    for (i = 0; i < rows; i++)
    {
        map[i] = i % nprocs;
    }
    printf("Process %d is here\n", rank);

    for(k=0; k<rows; k++)
    {
        //printf("Broadcast recieved/sent in process %d from %d\n", rank, map[k]);
        MPI_Bcast (&A[k][k],rows-k,MPI_DOUBLE,map[k],MPI_COMM_WORLD);
        printf("process %d, broadcast b[%d] as %G\n", rank,k,A[k][columns-1] );
        MPI_Bcast (&A[k][columns-1],1,MPI_DOUBLE,map[k],MPI_COMM_WORLD);
        double pivot;
        for(i= k+1; i<rows; i++) 
    {
      if(map[i] == rank)
      {
          pivot = A[i][k]/A[k][k];
      }
    }               
    for(i= k+1; i<rows; i++) 
    {       
      if(map[i] == rank)
      {
        for(j=0;j<rows;j++)
        {
            A[i][j]=A[i][j]-( pivot * A[k][j] );
        }
        A[i][columns-1]= A[i][columns-1]-( pivot * A[k][columns-1] );
        printf("b[%d] is %G\n", i, A[i][columns-1]);
      }
    }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    printf("Process %d finished\n", rank);

    if (rank == 0)
    {
            /* Back-substitution */
        int row,row2;
        for (row = rows-1; row >= 0; row--) {
            A[row][columns-1] = A[row][columns-1] / A[row][row];
            printf("divide %G by %G\n",A[row][columns-1] , A[row][row]);
            A[row][row] = 1;
            for (row2 = row-1; row2 >= 0; row2--) {
                A[row2][columns-1] += A[row2][row]*A[row][columns-1];
                A[row2][row] = 0;
            }
        }
    }

    if (rank == 0)
    {
        print_matrix(A, rows, columns);
        divide_by_max(A, rows, columns);
        /* results */
        input_clicking_probabilities(A, rows, columns, cp);
        print_best_acceptance_threshold(cp, rows);
        write_clicking_probabilities_to_file(cp, rows);
        printf("Free cp process %d\n", rank);
        free(cp);
        printf("Freed cp\n");
    }

    /* results */
    free_matrix(A, rows);
    MPI_Finalize();
    return SUCCESS;

}


void read_matrix_size_from_file(char *filename, int *rows, int *columns)
{
    FILE *file;
    file = fopen(filename, "r");

    /* get number of rows and columns*/
    *rows = 1;
    *columns = 1;
    char c;
    int columns_known = 0;
    while(!feof(file)) {
        c = fgetc(file);
        if (c == ' ') {
            if (!columns_known) (*columns)++;
        } 

        if (c == '\n') {
            (*rows)++;
            columns_known = 1;
            continue;
        }
    }

    printf("There are %d rows and %d columns\n", *rows, *columns);

    fclose(file);
}

double ** read_user_matrix_from_file(char *filename, int rows, int columns, int rank, int nprocs)
{
    FILE *file;
    file = fopen(filename, "r");

    /* read values into an array */
    //rewind(file);
    printf("Rank is %d\n", rank);
    printf("Nprocess is %d\n", nprocs);
    printf("Rows is %d and columns is %d\n", rows, columns);
    double **matrix = allocate_matrix(rows, columns);
    int i,j;
    for (i = 0; i < rows; i++)
    {
        if (rank == i % nprocs)
        {
            for (j = 0; j < columns; j++)
            {
                fscanf(file, "%lf", &matrix[i][j]);
            }
        }

    }
    fclose(file);

    return matrix;
}

double ** allocate_matrix(int rows, int columns)
{
    double ** matrix = (double **) malloc(sizeof(double *) * rows);
    int i;
    for (i = 0; i < rows; i++)
    {
        matrix[i] = (double *) malloc(sizeof(double) * columns);
    }

    return matrix;
}

void free_matrix(double **matrix, int rows)
{
    int i;
    for (i = 0; i < rows; i++)
    {
        free(matrix[i]);
    }
    free(matrix);
}

void input_clicking_probabilities(double **matrix, int rows, int columns, double *cp) {
    int row;
    for (row = 0; row < rows; row++) {
        cp[row] = matrix[row][columns-1];
    }
}

void write_clicking_probabilities_to_file(double *cp, int rows)
{
    /* write clicking probabilities to file */ 
    FILE *output_file;
    int row;
    output_file = fopen("clicking_probabilities.txt","w");
    for (row = 0; row < rows; row++) {
        fprintf(output_file, "%lf\n", cp[row]);
    }

    fclose(output_file);
}

void print_matrix(double **matrix, int rows, int columns)
{
    FILE *output_file;
    int row, column;
    output_file = fopen("row_reduced_matrix.txt","w");
    for (row = 0; row < rows; row++) {
        for (column = 0; column < columns; column++) {
            fprintf(output_file,"%lf ",matrix[row][column]);
        }
        fprintf(output_file,"\n");
    }   
    fclose(output_file);
}

void print_best_acceptance_threshold(double *cp, int rows) {

}

void divide_by_max(double **matrix, int rows, int columns) {
    double max = 0; 
    int row, column;

    /* get max so we can divide by this later to get probabilities */
    for (row = 0; row < rows; row++) {      
        if (max < fabs(matrix[row][columns-1])) max = fabs(matrix[row][columns-1]);
    }

    /* divide by max and take abs */
    for (row = 0; row < rows; row++) {
        /* check for division by zero */
        if (equals(max,0)) {
            matrix[row][columns-1] = 0;
        } else {
            matrix[row][columns-1] = fabs (matrix[row][columns-1]) / max;
        }
    }
}

int equals(double a, double b) {
    if (fabs(a-b) < EPSILON) return 1;
    else return 0;
}

Answer 1

上述结果（NAN）的原因是从文件中读取。在 read_user_matrix_from_file 函数中，有一个带有 fscanf 的 for 循环。请注意，递增循环确实 NOT 递增文件指针。通过读取行（而不是文件指针）来增加文件指针来解决问题。可能有更有效的方法来做到这一点，但为了简单起见，这就是可以做到的。另请注意，由于 MPI_Bcast 是阻塞调用，因此不需要障碍。从文件中读取的循环应如下所示：

for (i = 0; i < rows; i++)
    {
        printf("process %d, %d mod %d = %d\n", world_rank, i, nprocs, i% nprocs);
        if (world_rank == i % nprocs)
        {
            for (j = 0; j < columns; j++)
            {
                fscanf(file, "%lf", &matrix[i][j]);
            }
        }else {
            for ( j= 0; j < columns; j++)
            {
                fscanf(file, "%lf", &buf[j]);
                matrix[i][j] = 0;
            }
        }

    }

C中的MPI高斯消除，每个处理器从文件中读取

MPI Gaussian elimination in C with each processor reading from file

c

mpi