使用 C 和 OpenMP 进行并行编程

Parallel programming with C and OpenMP

我正在尝试将其转换为 openMP,但我无法正确转换它,我们将不胜感激。我现在可以将简单循环转换为 openMp,但不能使用函数。所以试图了解它是如何工作的。谢谢!

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)

float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);

int main(int argc, char *argv[])
{
    #pragma omp parallel
    {
        int i, j, k, nc, id;
        struct timeval start, stop, elapse;

        float fmax = (float)RAND_MAX;
        #pragma omp parallel for
        for (k = 0; k < FILTERLENGTH; k++)
            f[k] = k - (FILTERLENGTH - 1) / 2.0;
        for (j = 0; j < TRACE_COUNT; j++)
        {
            #pragma omp parallel for
            for (i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }
        gettimeofday(&start, NULL);
        filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &elapse);

        fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
        fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
        fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
        fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
    }
}

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL])
{
    int i, j, k, nc;
    for (j = 0; j < NT; j++)
        for (i = 0; i < TL; i++)
            for (k = 0; k < FL; k++)
                out[j][i] = filt[k] * traces[j][i + k];
}

您的代码存在一些问题,在函数 main 中,循环的迭代未按您的意愿分配给线程。因为您再次将 parallel 子句添加到 #pragma omp for,并假设您禁用了嵌套并行性(默认情况下是禁用的),所以在外部并行区域中创建的每个线程都将“顺序”执行该区域内的代码。有关它的更多详细信息,请阅读此 .

此外,这段代码:

        for (int j = 0; j < TRACE_COUNT; j++)
        {
            #pragma omp for
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;

            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }

可以通过将 #pragma omp for 移动到外循环来改进:

        #pragma omp for
        for (int j = 0; j < TRACE_COUNT; j++)
        {
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }

并且并行区域的范围应该缩小。所有东西放在一起:

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)

float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);

int main(int argc, char *argv[])
{
    #pragma omp parallel
    {
        float fmax = (float)RAND_MAX;
        #pragma omp for nowait
        for (int k = 0; k < FILTERLENGTH; k++)
            f[k] = k - (FILTERLENGTH - 1) / 2.0;

        #pragma omp for nowait
        for (int j = 0; j < TRACE_COUNT; j++)
        {    
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }
     }
     struct timeval start, stop, elapse;
     gettimeofday(&start, NULL);
     filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
     gettimeofday(&stop, NULL);
     timersub(&stop, &start, &elapse);

     fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
     fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
     fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
     fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);

}

您仍然可以尝试并行化函数 filter