使用 openmp 和私有子句的梯形规则集成
Trapezoidal rule integration using openmp and private clauses
我正在更改用于串行执行的代码,将其调整为并行执行 (openmp),但我得到的期望结果(pi 值)的近似值很差。我在下面显示了两个代码。
有什么问题吗?
program trap
use omp_lib
implicit none
double precision::suma=0.d0 ! sum is a scalar
double precision:: h,x,lima,limb
integer::n,i, istart, iend, thread_num, total_threads=4, ppt
integer(kind=8):: tic, toc, rate
double precision:: time
double precision, dimension(4):: pi= 0.d0
call system_clock(count_rate = rate)
call system_clock(tic)
lima=0.0d0; limb=1.0d0; suma=0.0d0; n=10000000
h=(limb-lima)/n
suma=h*(f(lima)+f(limb))*0.5d0 !first and last points
ppt= n/total_threads
!$ call omp_set_num_threads(total_threads)
!$omp parallel private (istart, iend, thread_num, i)
thread_num = omp_get_thread_num()
!$ istart = thread_num*ppt +1
!$ iend = min(thread_num*ppt + ppt, n-1)
do i=istart,iend ! this will control the loop in different images
x=lima+i*h
suma=suma+f(x)
pi(thread_num+1)=suma
enddo
!$omp end parallel
suma=sum(pi)
suma=suma*h
print *,"The value of pi is= ",suma ! print once from the first image
!print*, 'pi=' , pi
call system_clock(toc)
time = real(toc-tic)/real(rate)
print*, 'Time ', time, 's'
contains
double precision function f(y)
double precision:: y
f=4.0d0/(1.0d0+y*y)
end function f
end program trap
!----------------------------------------------------------------------------------
program trap
implicit none
double precision::sum ! sum is a scalar
double precision:: h,x,lima,limb
integer::n,i
integer(kind=8):: tic, toc, rate
double precision:: time
call system_clock(count_rate = rate)
call system_clock(tic)
lima=0.0d0; limb=1.0d0; sum=0.0d0; n=10000000
h=(limb-lima)/n
sum=h*(f(lima)+f(limb))*0.5d0 !first and last points
do i=1,n-1 ! this will control the loop in different images
x=lima+i*h
sum=sum+f(x)
enddo
sum=sum*h
print *,"The value of pi is (serial exe)= ",sum ! print once from the first image
call system_clock(toc)
time = real(toc-tic)/real(rate)
print*, 'Time serial execution', time, 's'
contains
double precision function f(y)
double precision:: y
f=4.0d0/(1.0d0+y*y)
end function f
end program trap
编译使用:
$ gfortran -fopenmp -Wall -Wextra -O2 -Wall -o prog.exe test.f90
$ ./prog.exe
和
$ gfortran -Wall -Wextra -O2 -Wall -o prog.exe testserial.f90
$ ./prog.exe
在串行执行中我得到了 pi (3.1415) 的良好近似值但是使用并行我得到了(我展示了几个并行执行):
The value of pi is= 3.6731101425922810
Time 3.3386986702680588E-002 s
-------------------------------------------------------
The value of pi is= 3.1556004791445953
Time 8.3681479096412659E-002 s
------------------------------------------------------
The value of pi is= 3.2505952856717966
Time 5.1473543047904968E-002 s
你的openmp parallel语句有问题。
您不断累加到变量 suma
。
因此,您需要指定一个 reduction
语句。
此外,您没有将变量 x
指定为私有。
我还更改了您的代码的更多部分
- 您明确告诉每个线程它应该使用哪个索引范围。大多数情况下,编译器可以自己更有效地解决这个问题。为此,我将
parallel
更改为 parallel do
。
- 最好将 openmp 并行区域中的变量属性设置为
default(none)
。您需要明确设置每个变量属性。
program trap
use omp_lib
implicit none
double precision :: suma,h,x,lima,limb, time
integer :: n, i
integer, parameter :: total_threads=5
integer(kind=8) :: tic, toc, rate
call system_clock(count_rate = rate)
call system_clock(tic)
lima=0.0d0; limb=1.0d0; suma=0.0d0; n=10000000
h=(limb-lima)/n
suma=h*(f(lima)+f(limb))*0.5d0 !first and last points
call omp_set_num_threads(total_threads)
!$omp parallel do default(none) private(i, x) shared(lima, h, n) reduction(+: suma)
do i = 1, n
x=lima+i*h
suma=suma+f(x)
end do
!$omp end parallel do
suma=suma*h
print *,"The value of pi is= ", suma ! print once from the first image
call system_clock(toc)
time = real(toc-tic)/real(rate)
print*, 'Time ', time, 's'
contains
double precision function f(y)
double precision:: y
f=4.0d0/(1.0d0+y*y)
end function
end program
我正在更改用于串行执行的代码,将其调整为并行执行 (openmp),但我得到的期望结果(pi 值)的近似值很差。我在下面显示了两个代码。
有什么问题吗?
program trap
use omp_lib
implicit none
double precision::suma=0.d0 ! sum is a scalar
double precision:: h,x,lima,limb
integer::n,i, istart, iend, thread_num, total_threads=4, ppt
integer(kind=8):: tic, toc, rate
double precision:: time
double precision, dimension(4):: pi= 0.d0
call system_clock(count_rate = rate)
call system_clock(tic)
lima=0.0d0; limb=1.0d0; suma=0.0d0; n=10000000
h=(limb-lima)/n
suma=h*(f(lima)+f(limb))*0.5d0 !first and last points
ppt= n/total_threads
!$ call omp_set_num_threads(total_threads)
!$omp parallel private (istart, iend, thread_num, i)
thread_num = omp_get_thread_num()
!$ istart = thread_num*ppt +1
!$ iend = min(thread_num*ppt + ppt, n-1)
do i=istart,iend ! this will control the loop in different images
x=lima+i*h
suma=suma+f(x)
pi(thread_num+1)=suma
enddo
!$omp end parallel
suma=sum(pi)
suma=suma*h
print *,"The value of pi is= ",suma ! print once from the first image
!print*, 'pi=' , pi
call system_clock(toc)
time = real(toc-tic)/real(rate)
print*, 'Time ', time, 's'
contains
double precision function f(y)
double precision:: y
f=4.0d0/(1.0d0+y*y)
end function f
end program trap
!----------------------------------------------------------------------------------
program trap
implicit none
double precision::sum ! sum is a scalar
double precision:: h,x,lima,limb
integer::n,i
integer(kind=8):: tic, toc, rate
double precision:: time
call system_clock(count_rate = rate)
call system_clock(tic)
lima=0.0d0; limb=1.0d0; sum=0.0d0; n=10000000
h=(limb-lima)/n
sum=h*(f(lima)+f(limb))*0.5d0 !first and last points
do i=1,n-1 ! this will control the loop in different images
x=lima+i*h
sum=sum+f(x)
enddo
sum=sum*h
print *,"The value of pi is (serial exe)= ",sum ! print once from the first image
call system_clock(toc)
time = real(toc-tic)/real(rate)
print*, 'Time serial execution', time, 's'
contains
double precision function f(y)
double precision:: y
f=4.0d0/(1.0d0+y*y)
end function f
end program trap
编译使用:
$ gfortran -fopenmp -Wall -Wextra -O2 -Wall -o prog.exe test.f90
$ ./prog.exe
和
$ gfortran -Wall -Wextra -O2 -Wall -o prog.exe testserial.f90
$ ./prog.exe
在串行执行中我得到了 pi (3.1415) 的良好近似值但是使用并行我得到了(我展示了几个并行执行):
The value of pi is= 3.6731101425922810
Time 3.3386986702680588E-002 s
-------------------------------------------------------
The value of pi is= 3.1556004791445953
Time 8.3681479096412659E-002 s
------------------------------------------------------
The value of pi is= 3.2505952856717966
Time 5.1473543047904968E-002 s
你的openmp parallel语句有问题。
您不断累加到变量 suma
。
因此,您需要指定一个 reduction
语句。
此外,您没有将变量 x
指定为私有。
我还更改了您的代码的更多部分
- 您明确告诉每个线程它应该使用哪个索引范围。大多数情况下,编译器可以自己更有效地解决这个问题。为此,我将
parallel
更改为parallel do
。 - 最好将 openmp 并行区域中的变量属性设置为
default(none)
。您需要明确设置每个变量属性。
program trap
use omp_lib
implicit none
double precision :: suma,h,x,lima,limb, time
integer :: n, i
integer, parameter :: total_threads=5
integer(kind=8) :: tic, toc, rate
call system_clock(count_rate = rate)
call system_clock(tic)
lima=0.0d0; limb=1.0d0; suma=0.0d0; n=10000000
h=(limb-lima)/n
suma=h*(f(lima)+f(limb))*0.5d0 !first and last points
call omp_set_num_threads(total_threads)
!$omp parallel do default(none) private(i, x) shared(lima, h, n) reduction(+: suma)
do i = 1, n
x=lima+i*h
suma=suma+f(x)
end do
!$omp end parallel do
suma=suma*h
print *,"The value of pi is= ", suma ! print once from the first image
call system_clock(toc)
time = real(toc-tic)/real(rate)
print*, 'Time ', time, 's'
contains
double precision function f(y)
double precision:: y
f=4.0d0/(1.0d0+y*y)
end function
end program