MPI 应用程序中的分段和中止陷阱错误

Question

我正在尝试并行化一些使用 OpenMPI 计算 Mandelbrot 集的串行代码。作为第一步，我试图像这样在不同的进程之间划分工作：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include "mpi.h"

// Main program
int main(int argc, char* argv[])
 {
    /* screen ( integer) coordinate */
    int iX,iY,i,j;
    const int iXmax = 5; // default
    const int iYmax = 5; // default

    /* world ( double) coordinate = parameter plane*/
    double Cx, Cy;
    const double CxMin = -2.5;
    const double CxMax = 1.5;
    const double CyMin = -2.0;
    const double CyMax = 2.0; 

    /* */
    double PixelWidth = (CxMax - CxMin)/iXmax;
    double PixelHeight = (CyMax - CyMin)/iYmax;


    int linePerProcess, remainingLines, processMinY,  processMaxY, lastProcessMaxY, result_offset;
    int my_rank, processors;
    unsigned char (*resultBuffer)[3] = NULL;
    unsigned char (*resultBufferTwo)[3] = NULL;
    unsigned char (*finalResultBuffer)[3] = NULL;
    MPI_Status stat;


    /* color component ( R or G or B) is coded from 0 to 255 */
    /* it is 24 bit color RGB file */
    const int MaxColorComponentValue = 255; 


    // RGB color array
    unsigned char color[3];

    /* Z = Zx + Zy*i;   Z0 = 0 */
    double Zx, Zy;
    double Zx2, Zy2; /* Zx2 = Zx*Zx;  Zy2 = Zy*Zy  */
    /*  */
    int Iteration;
    const int IterationMax = 1000; // default

    /* bail-out value , radius of circle ;  */
    const double EscapeRadius = 400;
    double ER2 = EscapeRadius * EscapeRadius;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &processors);

    linePerProcess = iYmax / processors;
    remainingLines = iYmax % processors;
    processMinY = my_rank * linePerProcess;
    processMaxY = processMinY + linePerProcess;
    lastProcessMaxY = processMaxY + remainingLines;


    if (my_rank == 0) {

        finalResultBuffer = malloc(iXmax * iYmax * sizeof(color));

        for(iY = processMinY; iY < processMaxY; iY++) {

            Cy = CyMin + (iY * PixelHeight);
            if (fabs(Cy) < (PixelHeight / 2))
            {
                Cy = 0.0; /* Main antenna */
            }

            for(iX = 0; iX < iXmax; iX++)
            {

                Cx = CxMin + (iX * PixelWidth);
                /* initial value of orbit = critical point Z= 0 */
                Zx = 0.0;
                Zy = 0.0;
                Zx2 = Zx * Zx;
                Zy2 = Zy * Zy;

            /* */
                for(Iteration = 0; Iteration < IterationMax && ((Zx2 + Zy2) < ER2); Iteration++)
                {
                    Zy = (2 * Zx * Zy) + Cy;
                    Zx = Zx2 - Zy2 + Cx;
                    Zx2 = Zx * Zx;
                    Zy2 = Zy * Zy;
                };

            /* compute  pixel color (24 bit = 3 bytes) */
                if (Iteration == IterationMax)
                {
                    // Point within the set. Mark it as black
                    color[0] = 0;
                    color[1] = 0;
                    color[2] = 0;
                }
                else 
                {
                    // Point outside the set. Mark it as white
                    double c = 3*log((double)Iteration)/log((double)(IterationMax) - 1.0);
                    if (c < 1)
                    {
                        color[0] = 0;
                        color[1] = 0;
                        color[2] = 255*c;
                    }
                    else if (c < 2)
                    {
                        color[0] = 0;
                        color[1] = 255*(c-1);
                        color[2] = 255;
                    }
                    else
                    {
                        color[0] = 255*(c-2);
                        color[1] = 255;
                        color[2] = 255;
                    }
                }

                finalResultBuffer[(iY*iXmax)+iX][0] = color[0];
                finalResultBuffer[(iY*iXmax)+iX][1] = color[1];
                finalResultBuffer[(iY*iXmax)+iX][2] = color[2];


            }
        }
        free(finalResultBuffer);


    } else if (my_rank == processors - 1) {

        resultBuffer = malloc((linePerProcess + remainingLines) * iXmax * sizeof(color));

        for(iY = processMinY; iY < lastProcessMaxY; iY++) {

            Cy = CyMin + (iY * PixelHeight);
            if (fabs(Cy) < (PixelHeight / 2))
            {
                Cy = 0.0; /* Main antenna */
            }
            for(iX = 0; iX < iXmax; iX++)
            {

                Cx = CxMin + (iX * PixelWidth);
                /* initial value of orbit = critical point Z= 0 */
                Zx = 0.0;
                Zy = 0.0;
                Zx2 = Zx * Zx;
                Zy2 = Zy * Zy;

            /* */
                for(Iteration = 0; Iteration < IterationMax && ((Zx2 + Zy2) < ER2); Iteration++)
                {
                    Zy = (2 * Zx * Zy) + Cy;
                    Zx = Zx2 - Zy2 + Cx;
                    Zx2 = Zx * Zx;
                    Zy2 = Zy * Zy;
                };

            /* compute  pixel color (24 bit = 3 bytes) */
                if (Iteration == IterationMax)
                {
                    // Point within the set. Mark it as black
                    color[0] = 0;
                    color[1] = 0;
                    color[2] = 0;
                }
                else 
                {
                    // Point outside the set. Mark it as white
                    double c = 3*log((double)Iteration)/log((double)(IterationMax) - 1.0);
                    if (c < 1)
                    {
                        color[0] = 0;
                        color[1] = 0;
                        color[2] = 255*c;
                    }
                    else if (c < 2)
                    {
                        color[0] = 0;
                        color[1] = 255*(c-1);
                        color[2] = 255;
                    }
                    else
                    {
                        color[0] = 255*(c-2);
                        color[1] = 255;
                        color[2] = 255;
                    }
                }
                resultBuffer[(iY*iXmax)+iX][0] = color[0];
                resultBuffer[(iY*iXmax)+iX][1] = color[1];
                resultBuffer[(iY*iXmax)+iX][2] = color[2];


            }
        }
        free(resultBuffer);

    } else {

        resultBufferTwo = malloc(linePerProcess * iXmax * sizeof(color));

        for(iY = processMinY; iY < processMaxY; iY++) {
            Cy = CyMin + (iY * PixelHeight);
            if (fabs(Cy) < (PixelHeight / 2))
            {
                Cy = 0.0; /* Main antenna */
            }
            for(iX = 0; iX < iXmax; iX++)
            {

                Cx = CxMin + (iX * PixelWidth);
                /* initial value of orbit = critical point Z= 0 */
                Zx = 0.0;
                Zy = 0.0;
                Zx2 = Zx * Zx;
                Zy2 = Zy * Zy;

            /* */
                for(Iteration = 0; Iteration < IterationMax && ((Zx2 + Zy2) < ER2); Iteration++)
                {
                    Zy = (2 * Zx * Zy) + Cy;
                    Zx = Zx2 - Zy2 + Cx;
                    Zx2 = Zx * Zx;
                    Zy2 = Zy * Zy;
                };

            /* compute  pixel color (24 bit = 3 bytes) */
                if (Iteration == IterationMax)
                {
                    // Point within the set. Mark it as black
                    color[0] = 0;
                    color[1] = 0;
                    color[2] = 0;
                }
                else 
                {
                    // Point outside the set. Mark it as white
                    double c = 3*log((double)Iteration)/log((double)(IterationMax) - 1.0);
                    if (c < 1)
                    {
                        color[0] = 0;
                        color[1] = 0;
                        color[2] = 255*c;
                    }
                    else if (c < 2)
                    {
                        color[0] = 0;
                        color[1] = 255*(c-1);
                        color[2] = 255;
                    }
                    else
                    {
                        color[0] = 255*(c-2);
                        color[1] = 255;
                        color[2] = 255;
                    }
                }

                resultBufferTwo[(iY*iXmax)+iX][0] = color[0];
                resultBufferTwo[(iY*iXmax)+iX][1] = color[1];
                resultBufferTwo[(iY*iXmax)+iX][2] = color[2];

            }
        }
        free(resultBufferTwo);

    }

    MPI_Finalize();
    return 0;
 }

当我运行这样做时，我得到分段错误 (11) 以及中止陷阱 (6)，这仅发生在排名 1 及以上的进程中。 0号进程没有问题，谁能帮我理解这里的问题？

Answer 1

对于高于 0 的排名，您对结果缓冲区的索引是错误的。

缓冲区始终从索引 0 开始，无论代码在哪个级别运行。但是，对于高于 0 的排名，您将从 processMinY * iXmax 开始索引。然后这会覆盖任意内存，这可能会导致段错误。

要解决此问题，您应该在计算缓冲区中的索引时从 iY 中减去 processMinY，例如resultBufferTwo[((iY-processMinY)*iXmax)+iX][0] = color[0];.

我还建议您尽可能统一不同级别的代码。现在，您的大部分代码都是重复的，尽管实际上唯一不同的行是 malloc 调用和写入缓冲区。这使得代码难以遵循。您应该为传递给 malloc 的大小和缓冲区创建一个变量，然后您将在循环中使用它。这样只有这两个变量的初始化需要在等级之间有所不同。

MPI 应用程序中的分段和中止陷阱错误

Segmentation and Abort Trap error in MPI app

c

parallel-processing

mpi

openmpi