将 do-while 循环替换为 Fortran OpenMP 中的收敛查询
Replacing do-while loops as convergence query in Fortran OpenMP
我已经构建了一个程序,该程序使用差分进化来优化原子在成对势方面的位置,现在想将其与我还很陌生的 OpenMP 并行化。差分进化使用一个整体的 do-while 循环,其中收敛查询被用作退出条件。
这意味着
- 我知道我不能简单地
!$OMP PARALLEL DO
do-while 循环
- 我无法预测循环在什么时候终止
后续迭代也满足条件。以下是
我无与伦比的代码:
implicit none
integer, parameter :: dp = selected_real_kind(15, 307)
real(kind = dp) :: sum_dif, sum, sum_old, final_toler, F, CR, d1, d2, d3, max_step, this_step, scale, randomtest
integer :: pop, dim, arfa, beta, gama, delt, bi, jrand, kf, ki, kj, kg, dim_low, i, g, num_Ti, idum
logical :: monitor_progress, history
integer, dimension(0:) :: index
real(kind=dp), dimension(0:,0:) :: depop, tempop
real(kind=dp), dimension(0:) :: best,temp_best, proj, d_fx, t_fx
real(kind=dp) :: midpoint
logical :: best_DE, use_maxstep
integer*2 :: best_DE_num
sum_dif = 0.0
do while ((abs(sum_old - sum + sum_dif)) > final_toler) !UTILIZE DIFFERENTIAL EVOLUTION until convergence
! ↑ enclosing convergence loop
sum_old = sum
!**initialize DE-Parameters**
if (best_DE) then
F = 0.2_dp + 0.6_dp * ran2(idum)
CR = 0.6_dp + 0.4_dp * ran2(idum)
else
F = 0.4_dp + 0.4_dp * ran2(idum)
CR = 0.7_dp + 0.2_dp * ran2(idum)
end if
!******
!** Create Mutant Population AND Perform Population Update**
do i = 0, pop - 1
do
arfa = RNGgen(pop)
if (arfa /= i) exit
end do
do
beta = RNGgen(pop)
if (beta /= arfa) then
if (beta /= i) exit
end if
end do
do
gama = RNGgen(pop)
if (gama /= beta) then
if (gama /= arfa) then
if (gama /= i) exit
end if
end if
end do
do
delt = RNGgen(pop)
if (delt /= gama) then
if (delt /= beta) then
if (delt /= arfa) then
if (delt /= i) exit !loops will be continued until arfa!=beta!=gama!=delt!=i
end if
end if
end if
end do
jrand = RNGgen(dim + 1)/3
!**Create mutant population per atom not per dim
kf = 0
do while (kf < dim)
randomtest = ran2(idum)
if ((randomtest <= CR).or.(kf == jrand)) then
!**Create hybrid child**
d1 = F * (depop(arfa, kf) - depop(gama, kf) + best_DE_num * (depop(beta, kf) - depop(delt, kf)))
d2 = F * (depop(arfa, kf + 1) - depop(gama, kf + 1) + best_DE_num * (depop(beta, kf + 1) - depop(delt, kf + 1)))
d3 = F * (depop(arfa, kf + 2) - depop(gama, kf + 2) + best_DE_num * (depop(beta, kf + 2) - depop(delt, kf + 2)))
!******
if (use_maxstep) then
this_step = d1 * d1 + d2 * d2 + d3 * d3!norm^2
if (this_step > max_step) then
scale = sqrt(max_step/this_step)
d1 = d1 * scale
d2 = d2 * scale
d3 = d3 * scale
end if
end if !end if use_maxstep
tempop(i, kf) = best_DE_num * best(kf)+(1 - best_DE_num) * depop(beta, kf) + d1
tempop(i, kf + 1) = best_DE_num * best(kf + 1)+(1 - best_DE_num) * depop(beta, kf + 1) + d2
tempop(i, kf + 2) = best_DE_num * best(kf + 2)+(1 - best_DE_num) * depop(beta, kf + 2) + d3
else
tempop(i, kf) = depop(i, kf)
tempop(i, kf + 1) = depop(i, kf + 1)
tempop(i, kf + 2) = depop(i, kf + 2)
end if
kf = kf + 3
end do !end dim do loop
!******
call trans_to_cent(tempop(i,:), midpoint, midpoint, midpoint, 0, dim_low)
!******
!**Evaluate Objective Function for Mutant
tempop(i, dim) = Analytic_U(num_Ti, tempop(i,:))
t_fx(i) = tempop(i, dim) !store tempop fx values
!******
end do !end pop do loop
do i = 0, pop - 1
d_fx(i) = depop(i, dim) !store depop fx values
end do
!******
!**SORTED MUTANT REPLACEMENT**
index = Max_DelF(d_fx, t_fx)
do kg = 0, pop - 1
if (tempop(index(kg), dim) < depop(kg, dim)) then
depop(kg,:) = tempop(index(kg),:)
end if
d_fx(kg) = depop(kg, dim)
end do
!******
!**Obtain total cost function values for tolerance comparison**
sum = 0
do ki = 0, pop - 1
sum = sum + depop(ki, dim)
end do
sum = sum/pop !calculate average energy value
!******
!**Obtain global best**
do kj = 0, pop - 1
if (best(dim) > depop(kj, dim)) then
best = depop(kj,:)
bi = kj
end if !determine "best" vector
end do
!******
if (monitor_progress) then
print*, "Progress (min = 1.0): ", (abs(sum_old - sum + sum_dif))/final_toler
end if
g = g + 1 !increment iteration counter
end do !end generation (while) loop
在这里,顶部的循环就是有问题的循环。当一次迭代与下一次迭代的能量差低于某个阈值时,退出条件就会触发。该代码在该代码中包含其他几个 do 循环,但它们应该可以并行化而不会出现重大问题。
我现在的问题是:是否可以在不放弃大部分性能提升的情况下将封闭循环并行化,或者如果我尝试将其排除在并行区域之外,是否仍然会有提升?
如果我这样做,我也可以排除突变种群变量 arfa, beta, gama, delt
的生成,因为它们的生成必须用 !$OMP CRITICAL
来完成,无论如何都要有相当大的开销,因为它们是用 arfa!=beta!=gama!=delt!
,对吧?我在我的 RNGgen
函数中使用带有随机种子的 random_number
内在函数。我的编译器是 gfortran.
您提供的示例不完整:未编译。我决定建一个小
(完整的)程序,做一些我认为相似的事情。希望对您有所帮助!
程序启动一个并行会话,在其中发现新的种群。
每当发现比迄今为止最好的种群更好的种群时,最好的种群
已更新。当全局计算花费太多迭代时迭代停止
在连续改进之间。
在这个程序中,每个下一个种群都是完全从头开始构建的。
在你的程序中,有更高级的下一代人口。偏袒
'better' 人口超过 'worse' 人口。
在简单的并行化中,每个线程将按照自己的路径进行搜索
space,根据其他线程发现的内容,它不会 'learn'。
要在线程之间交换搜索信息,需要设计一个方法,然后
编程吧。这样做似乎超出了这个问题的范围。
程序来了:
program optimize_population
use Population ! contains the target function
use omp_lib
implicit none
integer, parameter :: really_long = 100000
real(kind=dp) :: best, targetFun
real(kind=dp) :: pop(2,npop), best_pop(2,npop)
integer, allocatable :: iter(:)
integer :: i, nt, it, ierr, last_improvement
call initTarget() ! initialize the target function
! Allocate an iteration count for every thread
nt = omp_get_max_threads()
allocate(iter(nt), stat=ierr)
if (ierr/=0) stop('allocation error')
iter = 0
best = -1e10
last_improvement = 0
!$OMP PARALLEL PRIVATE(it, pop, i, targetFun)
it = omp_get_thread_num()+1 ! thread number
do
iter(it) = iter(it) + 1
! Create a new population
do i = 1,npop
pop(1,i) = rand()
pop(2,i) = rand()
end do
! Evaluate target function
targetFun = popFun(pop)
if (targetFun>best) then
! If this is the best population so far,
! then register this
!$OMP CRITICAL
best_pop = pop
best = targetFun
print '(a,i0,a,i7,a,1p,e13.5)', '[',it,'] iteration ',sum(iter),' Best score until now: ',TargetFun
last_improvement = sum(iter) ! remember the global iteration count for the last time an improved population was found
!$OMP END CRITICAL
end if
! Done when further improvement takes too long
if (last_improvement < sum(iter) - really_long) exit
end do
!$OMP END PARALLEL
! Report the best population found
targetFun = popFun(best_pop)
print '(a,1p,e13.5)', 'Best score found: ',targetFun
print '(a,1p,e13.5)', ' best population found:'
do i = 1,npop
print '(1p,10(a,e13.5))', ' (',best_pop(1,i),',',best_pop(2,i),')'
end do
end program optimize_population
程序需要目标函数,由Population模块提供,如下:
module Population
integer, parameter :: npop = 20, ncenter = 3
integer, parameter :: dp = kind(1d0)
real(kind=dp) :: center(2,ncenter)
contains
subroutine initTarget()
implicit none
integer :: i
do i = 1,ncenter
center(1,i) = rand()
center(2,i) = rand()
end do
print '(a,i0,a)', &
'Looking for a population of ',npop,' points in the unit square,'
print '(a,i0,a)', &
'equally spread out in space, but clustered around the points'
print '(1p,10(a,e13.5))', &
' (',center(1,1),',',center(2,1), &
('), (',center(1,i),',',center(2,i), i=2,ncenter-1), &
') and (',center(1,ncenter),',',center(2,ncenter),')'
end subroutine initTarget
function popFun(pop) result(targetFun)
implicit none
real(kind=dp), intent(in) :: pop(:,:)
real(kind=dp) :: targetFun
integer :: i,j
real(kind=dp) :: sum_center, sum_dist
sum_dist = 0
sum_center = 0
do i = 1,npop
do j = i+1,npop
sum_dist = sum_dist + (pop(1,i)-pop(1,j))**2 + (pop(2,i)-pop(2,j))**2
end do
do j = 1,ncenter
sum_center = sum_center + (pop(1,i)-center(1,j))**2 + (pop(2,i)-center(2,j))**2
end do
end do
targetFun = sum_dist - sum_center
end function popFun
end module Population
我已经构建了一个程序,该程序使用差分进化来优化原子在成对势方面的位置,现在想将其与我还很陌生的 OpenMP 并行化。差分进化使用一个整体的 do-while 循环,其中收敛查询被用作退出条件。
这意味着
- 我知道我不能简单地
!$OMP PARALLEL DO
do-while 循环 - 我无法预测循环在什么时候终止
后续迭代也满足条件。以下是 我无与伦比的代码:
implicit none integer, parameter :: dp = selected_real_kind(15, 307) real(kind = dp) :: sum_dif, sum, sum_old, final_toler, F, CR, d1, d2, d3, max_step, this_step, scale, randomtest integer :: pop, dim, arfa, beta, gama, delt, bi, jrand, kf, ki, kj, kg, dim_low, i, g, num_Ti, idum logical :: monitor_progress, history integer, dimension(0:) :: index real(kind=dp), dimension(0:,0:) :: depop, tempop real(kind=dp), dimension(0:) :: best,temp_best, proj, d_fx, t_fx real(kind=dp) :: midpoint logical :: best_DE, use_maxstep integer*2 :: best_DE_num sum_dif = 0.0 do while ((abs(sum_old - sum + sum_dif)) > final_toler) !UTILIZE DIFFERENTIAL EVOLUTION until convergence ! ↑ enclosing convergence loop sum_old = sum !**initialize DE-Parameters** if (best_DE) then F = 0.2_dp + 0.6_dp * ran2(idum) CR = 0.6_dp + 0.4_dp * ran2(idum) else F = 0.4_dp + 0.4_dp * ran2(idum) CR = 0.7_dp + 0.2_dp * ran2(idum) end if !****** !** Create Mutant Population AND Perform Population Update** do i = 0, pop - 1 do arfa = RNGgen(pop) if (arfa /= i) exit end do do beta = RNGgen(pop) if (beta /= arfa) then if (beta /= i) exit end if end do do gama = RNGgen(pop) if (gama /= beta) then if (gama /= arfa) then if (gama /= i) exit end if end if end do do delt = RNGgen(pop) if (delt /= gama) then if (delt /= beta) then if (delt /= arfa) then if (delt /= i) exit !loops will be continued until arfa!=beta!=gama!=delt!=i end if end if end if end do jrand = RNGgen(dim + 1)/3 !**Create mutant population per atom not per dim kf = 0 do while (kf < dim) randomtest = ran2(idum) if ((randomtest <= CR).or.(kf == jrand)) then !**Create hybrid child** d1 = F * (depop(arfa, kf) - depop(gama, kf) + best_DE_num * (depop(beta, kf) - depop(delt, kf))) d2 = F * (depop(arfa, kf + 1) - depop(gama, kf + 1) + best_DE_num * (depop(beta, kf + 1) - depop(delt, kf + 1))) d3 = F * (depop(arfa, kf + 2) - depop(gama, kf + 2) + best_DE_num * (depop(beta, kf + 2) - depop(delt, kf + 2))) !****** if (use_maxstep) then this_step = d1 * d1 + d2 * d2 + d3 * d3!norm^2 if (this_step > max_step) then scale = sqrt(max_step/this_step) d1 = d1 * scale d2 = d2 * scale d3 = d3 * scale end if end if !end if use_maxstep tempop(i, kf) = best_DE_num * best(kf)+(1 - best_DE_num) * depop(beta, kf) + d1 tempop(i, kf + 1) = best_DE_num * best(kf + 1)+(1 - best_DE_num) * depop(beta, kf + 1) + d2 tempop(i, kf + 2) = best_DE_num * best(kf + 2)+(1 - best_DE_num) * depop(beta, kf + 2) + d3 else tempop(i, kf) = depop(i, kf) tempop(i, kf + 1) = depop(i, kf + 1) tempop(i, kf + 2) = depop(i, kf + 2) end if kf = kf + 3 end do !end dim do loop !****** call trans_to_cent(tempop(i,:), midpoint, midpoint, midpoint, 0, dim_low) !****** !**Evaluate Objective Function for Mutant tempop(i, dim) = Analytic_U(num_Ti, tempop(i,:)) t_fx(i) = tempop(i, dim) !store tempop fx values !****** end do !end pop do loop do i = 0, pop - 1 d_fx(i) = depop(i, dim) !store depop fx values end do !****** !**SORTED MUTANT REPLACEMENT** index = Max_DelF(d_fx, t_fx) do kg = 0, pop - 1 if (tempop(index(kg), dim) < depop(kg, dim)) then depop(kg,:) = tempop(index(kg),:) end if d_fx(kg) = depop(kg, dim) end do !****** !**Obtain total cost function values for tolerance comparison** sum = 0 do ki = 0, pop - 1 sum = sum + depop(ki, dim) end do sum = sum/pop !calculate average energy value !****** !**Obtain global best** do kj = 0, pop - 1 if (best(dim) > depop(kj, dim)) then best = depop(kj,:) bi = kj end if !determine "best" vector end do !****** if (monitor_progress) then print*, "Progress (min = 1.0): ", (abs(sum_old - sum + sum_dif))/final_toler end if g = g + 1 !increment iteration counter end do !end generation (while) loop
在这里,顶部的循环就是有问题的循环。当一次迭代与下一次迭代的能量差低于某个阈值时,退出条件就会触发。该代码在该代码中包含其他几个 do 循环,但它们应该可以并行化而不会出现重大问题。
我现在的问题是:是否可以在不放弃大部分性能提升的情况下将封闭循环并行化,或者如果我尝试将其排除在并行区域之外,是否仍然会有提升?
如果我这样做,我也可以排除突变种群变量 arfa, beta, gama, delt
的生成,因为它们的生成必须用 !$OMP CRITICAL
来完成,无论如何都要有相当大的开销,因为它们是用 arfa!=beta!=gama!=delt!
,对吧?我在我的 RNGgen
函数中使用带有随机种子的 random_number
内在函数。我的编译器是 gfortran.
您提供的示例不完整:未编译。我决定建一个小 (完整的)程序,做一些我认为相似的事情。希望对您有所帮助!
程序启动一个并行会话,在其中发现新的种群。 每当发现比迄今为止最好的种群更好的种群时,最好的种群 已更新。当全局计算花费太多迭代时迭代停止 在连续改进之间。
在这个程序中,每个下一个种群都是完全从头开始构建的。 在你的程序中,有更高级的下一代人口。偏袒 'better' 人口超过 'worse' 人口。
在简单的并行化中,每个线程将按照自己的路径进行搜索 space,根据其他线程发现的内容,它不会 'learn'。 要在线程之间交换搜索信息,需要设计一个方法,然后 编程吧。这样做似乎超出了这个问题的范围。
程序来了:
program optimize_population
use Population ! contains the target function
use omp_lib
implicit none
integer, parameter :: really_long = 100000
real(kind=dp) :: best, targetFun
real(kind=dp) :: pop(2,npop), best_pop(2,npop)
integer, allocatable :: iter(:)
integer :: i, nt, it, ierr, last_improvement
call initTarget() ! initialize the target function
! Allocate an iteration count for every thread
nt = omp_get_max_threads()
allocate(iter(nt), stat=ierr)
if (ierr/=0) stop('allocation error')
iter = 0
best = -1e10
last_improvement = 0
!$OMP PARALLEL PRIVATE(it, pop, i, targetFun)
it = omp_get_thread_num()+1 ! thread number
do
iter(it) = iter(it) + 1
! Create a new population
do i = 1,npop
pop(1,i) = rand()
pop(2,i) = rand()
end do
! Evaluate target function
targetFun = popFun(pop)
if (targetFun>best) then
! If this is the best population so far,
! then register this
!$OMP CRITICAL
best_pop = pop
best = targetFun
print '(a,i0,a,i7,a,1p,e13.5)', '[',it,'] iteration ',sum(iter),' Best score until now: ',TargetFun
last_improvement = sum(iter) ! remember the global iteration count for the last time an improved population was found
!$OMP END CRITICAL
end if
! Done when further improvement takes too long
if (last_improvement < sum(iter) - really_long) exit
end do
!$OMP END PARALLEL
! Report the best population found
targetFun = popFun(best_pop)
print '(a,1p,e13.5)', 'Best score found: ',targetFun
print '(a,1p,e13.5)', ' best population found:'
do i = 1,npop
print '(1p,10(a,e13.5))', ' (',best_pop(1,i),',',best_pop(2,i),')'
end do
end program optimize_population
程序需要目标函数,由Population模块提供,如下:
module Population
integer, parameter :: npop = 20, ncenter = 3
integer, parameter :: dp = kind(1d0)
real(kind=dp) :: center(2,ncenter)
contains
subroutine initTarget()
implicit none
integer :: i
do i = 1,ncenter
center(1,i) = rand()
center(2,i) = rand()
end do
print '(a,i0,a)', &
'Looking for a population of ',npop,' points in the unit square,'
print '(a,i0,a)', &
'equally spread out in space, but clustered around the points'
print '(1p,10(a,e13.5))', &
' (',center(1,1),',',center(2,1), &
('), (',center(1,i),',',center(2,i), i=2,ncenter-1), &
') and (',center(1,ncenter),',',center(2,ncenter),')'
end subroutine initTarget
function popFun(pop) result(targetFun)
implicit none
real(kind=dp), intent(in) :: pop(:,:)
real(kind=dp) :: targetFun
integer :: i,j
real(kind=dp) :: sum_center, sum_dist
sum_dist = 0
sum_center = 0
do i = 1,npop
do j = i+1,npop
sum_dist = sum_dist + (pop(1,i)-pop(1,j))**2 + (pop(2,i)-pop(2,j))**2
end do
do j = 1,ncenter
sum_center = sum_center + (pop(1,i)-center(1,j))**2 + (pop(2,i)-center(2,j))**2
end do
end do
targetFun = sum_dist - sum_center
end function popFun
end module Population