使用 pragma omp parallel for 计算 pi 的问题
Problem using pragma omp parallel for to compute pi
我编写了以下代码来计算 pi 的值并且它有效:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
double step;
#define NUM_THREADS 16
int main()
{
int i, nthreads;
double tdata, pi, sum[NUM_THREADS];
omp_set_num_threads(NUM_THREADS);
step = 1.0 / (double)num_steps;
tdata = omp_get_wtime();
#pragma omp parallel
{
int i, id, nthrds;
double x;
id = omp_get_thread_num();
nthrds = omp_get_num_threads();
if (id == 0)
nthreads = nthrds;
for (i = id, sum[id] = 0.0; i < num_steps; i = i + nthrds)
{
x = (i + 0.5) * step;
sum[id] = sum[id] + 4.0 / (1.0 + x * x);
}
}
tdata = omp_get_wtime() - tdata;
for (i = 0, pi = 0.0; i < nthreads; i++)
{
pi = pi + sum[i] * step;
}
printf("pi=%f and it took %f seconds", pi, tdata);
}
然后我了解到我可以使用 #pragma omp parallel for
然后我就不必手动将计算中断到不同的线程。所以我写了这个:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
double step;
#define NUM_THREADS 16
int main()
{
int i;
double tdata, pi, x, sum = 0.0;
omp_set_num_threads(NUM_THREADS);
step = 1.0 / (double)num_steps;
tdata = omp_get_wtime();
#pragma omp parallel for
{
for (i = 0; i < num_steps; i++)
{
x = (i + 0.5) * step;
sum = sum + 4.0 / (1.0 + x * x);
}
}
tdata = omp_get_wtime() - tdata;
pi = sum * step;
printf("pi = %f and compute time = %f seconds", pi, tdata);
}
然而,这不起作用并且输出了错误的圆周率值。我究竟做错了什么?谢谢。
您的代码中有两个竞争条件导致了不正确的结果:
- 评论中已经指出了一个,即变量
sum
的更新(在线程之间共享 ),可以用reduction子句解决;
- 另一个是变量
x
的更新,它也是线程共享的。这种竞争条件可以通过简单地使该变量 private 到线程来解决。
两种可能的解决方案:
使用 OpenMP 的 private 构造函数
#pragma omp parallel for reduction(+:sum) private(x)
for (i = 0; i < num_steps; i++)
{
x = (i + 0.5) * step;
sum = sum + 4.0 / (1.0 + x * x);
}
在范围内声明变量'x'并为
#pragma omp parallel for reduction(+:sum)
for (i = 0; i < num_steps; i++)
{
double x = (i + 0.5) * step;
sum = sum + 4.0 / (1.0 + x * x);
}
最终代码可能如下所示:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
#define NUM_THREADS 16
int main()
{
double sum = 0.0;
omp_set_num_threads(NUM_THREADS);
double step = 1.0 / (double)num_steps;
double tdata = omp_get_wtime();
#pragma omp parallel for reduction(+:sum)
for (int i = 0; i < num_steps; i++)
{
double x = (i + 0.5) * step;
sum = sum + 4.0 / (1.0 + x * x);
}
tdata = omp_get_wtime() - tdata;
double pi = sum * step;
printf("pi = %f and compute time = %f seconds", pi, tdata);
}
我编写了以下代码来计算 pi 的值并且它有效:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
double step;
#define NUM_THREADS 16
int main()
{
int i, nthreads;
double tdata, pi, sum[NUM_THREADS];
omp_set_num_threads(NUM_THREADS);
step = 1.0 / (double)num_steps;
tdata = omp_get_wtime();
#pragma omp parallel
{
int i, id, nthrds;
double x;
id = omp_get_thread_num();
nthrds = omp_get_num_threads();
if (id == 0)
nthreads = nthrds;
for (i = id, sum[id] = 0.0; i < num_steps; i = i + nthrds)
{
x = (i + 0.5) * step;
sum[id] = sum[id] + 4.0 / (1.0 + x * x);
}
}
tdata = omp_get_wtime() - tdata;
for (i = 0, pi = 0.0; i < nthreads; i++)
{
pi = pi + sum[i] * step;
}
printf("pi=%f and it took %f seconds", pi, tdata);
}
然后我了解到我可以使用 #pragma omp parallel for
然后我就不必手动将计算中断到不同的线程。所以我写了这个:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
double step;
#define NUM_THREADS 16
int main()
{
int i;
double tdata, pi, x, sum = 0.0;
omp_set_num_threads(NUM_THREADS);
step = 1.0 / (double)num_steps;
tdata = omp_get_wtime();
#pragma omp parallel for
{
for (i = 0; i < num_steps; i++)
{
x = (i + 0.5) * step;
sum = sum + 4.0 / (1.0 + x * x);
}
}
tdata = omp_get_wtime() - tdata;
pi = sum * step;
printf("pi = %f and compute time = %f seconds", pi, tdata);
}
然而,这不起作用并且输出了错误的圆周率值。我究竟做错了什么?谢谢。
您的代码中有两个竞争条件导致了不正确的结果:
- 评论中已经指出了一个,即变量
sum
的更新(在线程之间共享 ),可以用reduction子句解决; - 另一个是变量
x
的更新,它也是线程共享的。这种竞争条件可以通过简单地使该变量 private 到线程来解决。
两种可能的解决方案:
使用 OpenMP 的 private 构造函数
#pragma omp parallel for reduction(+:sum) private(x) for (i = 0; i < num_steps; i++) { x = (i + 0.5) * step; sum = sum + 4.0 / (1.0 + x * x); }
在范围内声明变量'x'并为
#pragma omp parallel for reduction(+:sum) for (i = 0; i < num_steps; i++) { double x = (i + 0.5) * step; sum = sum + 4.0 / (1.0 + x * x); }
最终代码可能如下所示:
#include <omp.h>
#include <stdio.h>
static long num_steps = 1000000;
#define NUM_THREADS 16
int main()
{
double sum = 0.0;
omp_set_num_threads(NUM_THREADS);
double step = 1.0 / (double)num_steps;
double tdata = omp_get_wtime();
#pragma omp parallel for reduction(+:sum)
for (int i = 0; i < num_steps; i++)
{
double x = (i + 0.5) * step;
sum = sum + 4.0 / (1.0 + x * x);
}
tdata = omp_get_wtime() - tdata;
double pi = sum * step;
printf("pi = %f and compute time = %f seconds", pi, tdata);
}