具有加速功能的示例 openmp 程序

A sample openmp program with speedup

有人可以提供一个 OpenMP 程序,与没有它相比,加速是可见的。我发现实现加速非常困难。即使是这个简单的程序在 OpenMP 下运行速度也会变慢。我的处理器是 Intel® Core™ i3-2370M CPU @ 2.40GHz × 4 运行 on Linux (Ubuntu 14.10)

#include <cmath>
#include <stdio.h>
#include <time.h> 
int main() {
   clock_t t;
   t = clock();
   const int size = 4;
   long long int k;

    #pragma omp parallel for num_threads(4)
    for(int n=0; n<size; ++n) {
       for(int j=0;j<100000000;j++){ 
       }
       printf("\n");
    }

    t = clock() - t;
    printf ("It took me %d clicks (%f seconds).\n",t,((float)t)/CLOCKS_PER_SEC);

    return 0;
}

计算积分是经典的,调整零件常数增加执行时间,看得更清楚runtime,零件越多,执行时间越多。在启用 OpenMP 的情况下获得 21.3 秒,在单核双线程 Intel pentium 4 上获得 26.7 秒:

#include <math.h>
#include <stdio.h>
#include <omp.h>

#define from 0.0f
#define to 2.0f
#define parts 999999999
#define step ((to - from) / parts)
#define x (from + (step / 2.0f))

int main()
{
        double integralSum = 0;
        int i;
        #pragma omp parallel for reduction(+:integralSum)
        for (i = 1; i < (parts+1); ++i)
        {
                integralSum = integralSum + (step * fabs(pow((x + (step * i)),2) + 4));
        }

        printf("%f\n", integralSum);

        return 0;
}

计算x^2 + 4从0到2的定积分

我遇到了与此相关的问题,我想在其中找到数组的最大值。我犯了和你一样的错误,我用时钟来测量经过的时间。为了解决这个问题,我改用了 clock_gettime(),现在可以了。

关于可测量加速比的示例代码(请注意,您可能想要更改 N 的值):

#include <omp.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>

struct timespec diff(struct timespec start, struct timespec end)
{
struct timespec temp;

if(end.tv_sec - start.tv_sec == 0)
{
    temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
else
{
    temp.tv_nsec = ((end.tv_sec - start.tv_sec)*1000000000) + end.tv_nsec - start.tv_nsec;
}

return temp;
}

int main()
{
unsigned int N;
struct timespec t_start, t_end;
clock_t start, end;

srand(time(NULL));

FILE *f = fopen("out.txt", "w");
if(f == NULL)
{
    printf("Could not open output\n");
    return -1;
}

for(N = 1000000; N < 100000000; N += 1000000)
{
    fprintf(f, "%d\t", N);
    int* array = (int*)malloc(sizeof(int)*N);
    if(array == NULL)
    {
        printf("Not enough space\n");
        return -1;
    }
    for(unsigned int i = 0; i<N; i++) array[i] = rand();

    int max_val = 0.0;

    clock_gettime(CLOCK_MONOTONIC, &t_start);

    #pragma omp parallel for reduction(max:max_val)
    for(unsigned int i=0; i<N; i++)
    {
        if(array[i] > max_val) max_val = array[i];
    }

    clock_gettime(CLOCK_MONOTONIC, &t_end);

    fprintf(f, "%lf\t", (double)(diff(t_start, t_end).tv_nsec / 1000000000.0));

    max_val = 0.0;

    clock_gettime(CLOCK_MONOTONIC, &t_start);
    for(unsigned int i = 0; i<N; i++)
    {
        if(array[i] > max_val) max_val = array[i];
    }
    clock_gettime(CLOCK_MONOTONIC, &t_end);

    fprintf(f, "%lf\n", (double)(diff(t_start, t_end).tv_nsec / 1000000000.0));

    free(array);
}

fclose(f);

return 0;
}