使用 C 和 OpenMP 进行并行编程
Parallel programming with C and OpenMP
我正在尝试将其转换为 openMP,但我无法正确转换它,我们将不胜感激。我现在可以将简单循环转换为 openMp,但不能使用函数。所以试图了解它是如何工作的。谢谢!
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)
float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);
int main(int argc, char *argv[])
{
#pragma omp parallel
{
int i, j, k, nc, id;
struct timeval start, stop, elapse;
float fmax = (float)RAND_MAX;
#pragma omp parallel for
for (k = 0; k < FILTERLENGTH; k++)
f[k] = k - (FILTERLENGTH - 1) / 2.0;
for (j = 0; j < TRACE_COUNT; j++)
{
#pragma omp parallel for
for (i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
gettimeofday(&start, NULL);
filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &elapse);
fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
}
}
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL])
{
int i, j, k, nc;
for (j = 0; j < NT; j++)
for (i = 0; i < TL; i++)
for (k = 0; k < FL; k++)
out[j][i] = filt[k] * traces[j][i + k];
}
您的代码存在一些问题,在函数 main
中,循环的迭代未按您的意愿分配给线程。因为您再次将 parallel
子句添加到 #pragma omp for
,并假设您禁用了嵌套并行性(默认情况下是禁用的),所以在外部并行区域中创建的每个线程都将“顺序”执行该区域内的代码。有关它的更多详细信息,请阅读此 .
此外,这段代码:
for (int j = 0; j < TRACE_COUNT; j++)
{
#pragma omp for
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
可以通过将 #pragma omp for
移动到外循环来改进:
#pragma omp for
for (int j = 0; j < TRACE_COUNT; j++)
{
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
并且并行区域的范围应该缩小。所有东西放在一起:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)
float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);
int main(int argc, char *argv[])
{
#pragma omp parallel
{
float fmax = (float)RAND_MAX;
#pragma omp for nowait
for (int k = 0; k < FILTERLENGTH; k++)
f[k] = k - (FILTERLENGTH - 1) / 2.0;
#pragma omp for nowait
for (int j = 0; j < TRACE_COUNT; j++)
{
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
}
struct timeval start, stop, elapse;
gettimeofday(&start, NULL);
filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &elapse);
fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
}
您仍然可以尝试并行化函数 filter
。
我正在尝试将其转换为 openMP,但我无法正确转换它,我们将不胜感激。我现在可以将简单循环转换为 openMp,但不能使用函数。所以试图了解它是如何工作的。谢谢!
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)
float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);
int main(int argc, char *argv[])
{
#pragma omp parallel
{
int i, j, k, nc, id;
struct timeval start, stop, elapse;
float fmax = (float)RAND_MAX;
#pragma omp parallel for
for (k = 0; k < FILTERLENGTH; k++)
f[k] = k - (FILTERLENGTH - 1) / 2.0;
for (j = 0; j < TRACE_COUNT; j++)
{
#pragma omp parallel for
for (i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
gettimeofday(&start, NULL);
filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &elapse);
fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
}
}
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL])
{
int i, j, k, nc;
for (j = 0; j < NT; j++)
for (i = 0; i < TL; i++)
for (k = 0; k < FL; k++)
out[j][i] = filt[k] * traces[j][i + k];
}
您的代码存在一些问题,在函数 main
中,循环的迭代未按您的意愿分配给线程。因为您再次将 parallel
子句添加到 #pragma omp for
,并假设您禁用了嵌套并行性(默认情况下是禁用的),所以在外部并行区域中创建的每个线程都将“顺序”执行该区域内的代码。有关它的更多详细信息,请阅读此
此外,这段代码:
for (int j = 0; j < TRACE_COUNT; j++)
{
#pragma omp for
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
可以通过将 #pragma omp for
移动到外循环来改进:
#pragma omp for
for (int j = 0; j < TRACE_COUNT; j++)
{
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
并且并行区域的范围应该缩小。所有东西放在一起:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)
float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);
int main(int argc, char *argv[])
{
#pragma omp parallel
{
float fmax = (float)RAND_MAX;
#pragma omp for nowait
for (int k = 0; k < FILTERLENGTH; k++)
f[k] = k - (FILTERLENGTH - 1) / 2.0;
#pragma omp for nowait
for (int j = 0; j < TRACE_COUNT; j++)
{
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
}
struct timeval start, stop, elapse;
gettimeofday(&start, NULL);
filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &elapse);
fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
}
您仍然可以尝试并行化函数 filter
。