OpenMP:如何保护阵列免受竞争条件的影响
OpenMP: how to protect an array from race condition
这是问题 36182486、41421437 和其他几个问题的跟进。我想通过使用多个处理器并行处理单个元素来加快 FEM 计算的偏度和质量矩阵的组装。这个小 MWE 显示了操作的胆量。
!! compile with gfortran -fopenmp -o FEMassembly FEMassembly.f90
Program FEMassembly
use, intrinsic :: iso_c_binding
implicit none
real (c_double) :: arrayM(3,3)=reshape((/2.d0,1.d0,1.d0,1.d0,&
&2.d0,1.d0,1.d0,1.d0,2.d0/),(/3,3/)) ! contrib from one element
integer (c_int) :: ke,ne=4,kx,nx=6,nodes(3)
real (c_double) :: L(6,6)
integer (c_int) :: t(4,3)=reshape((/1,2,5,6,2,3,4,5,4,5,2,3/),(/4,3/))
!! first, no OMP
do ke=1,ne ! for each triangular element
nodes=t(ke,:)
L(nodes,nodes)=L(nodes,nodes)+arrayM
end do
print *,'L no OMP'
write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx)
L=0
!$omp parallel do private (nodes)
do ke=1,ne ! for each triangular element
nodes=t(ke,:)
!! !$omp atomic
L(nodes,nodes)=L(nodes,nodes)+arrayM
!! !$omp end atomic
end do
!$omp end parallel do
print *,'L with OMP and race'
write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx)
End Program FEMassembly
注释掉原子指令后,数组 L 包含几个错误值,可能是因为我试图通过原子指令避免竞争条件。结果是:
L no OMP
2. 1. 0. 1. 0. 0.
1. 6. 1. 2. 2. 0.
0. 1. 4. 0. 2. 1.
1. 2. 0. 4. 1. 0.
0. 2. 2. 1. 6. 1.
0. 0. 1. -0. 1. 2.
L with OMP and race
2. 1. 0. 1. 0. 0.
1. 6. 1. 2. 2. 0.
0. 1. 2. 0. 2. 1.
1. 2. 0. 4. 1. 0.
0. 2. 2. 1. 6. 1.
0. 0. 1. 0. 1. 2.
如果 "atomic" 指令未注释,编译器 return 错误:
错误:!$OMP ATOMIC 语句必须在 (1) 处设置内部类型的标量变量
其中 (1) 指向 L(nodes,nodes) 行中的 arrayM.....
我希望实现的是让每个元素(这里是微不足道的 arrayM)的耗时贡献并行发生,但是由于多个线程处理同一个矩阵元素,所以必须做一些事情才能求和以有序的方式。任何人都可以建议一种方法吗?
在 Fortran 中,最简单的方法是使用归约。这是因为 OpenMP for Fortran 支持对数组进行归约。以下是我认为您正在尝试做的事情,但请谨慎对待,因为
- 您没有提供正确的输出,因此很难测试
这么小的数组有时很难找到竞争条件
!! compile with gfortran -fopenmp -o FEMassembly FEMassembly.f90
Program FEMassembly
use, intrinsic :: iso_c_binding
Use omp_lib, Only : omp_get_num_threads
implicit none
real (c_double) :: arrayM(3,3)=reshape((/2.d0,1.d0,1.d0,1.d0,&
&2.d0,1.d0,1.d0,1.d0,2.d0/),(/3,3/)) ! contrib from one element
integer (c_int) :: ke,ne=4,nodes(3)
real (c_double) :: L(6,6)
integer (c_int) :: t(4,3)=reshape((/1,2,5,6,2,3,4,5,4,5,2,3/),(/4,3/))
! Not declared in original program
Integer :: nx, kx
! Not set in original program
nx = Size( L, Dim = 1 )
!$omp parallel default( none ) private ( ke, nodes ) shared( ne, t, L, arrayM )
!$omp single
Write( *, * ) 'Working on ', omp_get_num_threads(), ' threads'
!$omp end single
!$omp do reduction( +:L )
do ke=1,ne ! for each triangular element
nodes=t(ke,:)
L(nodes,nodes)=L(nodes,nodes)+arrayM
end do
!$omp end do
!$omp end parallel
write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx)
End Program FEMassembly
这是问题 36182486、41421437 和其他几个问题的跟进。我想通过使用多个处理器并行处理单个元素来加快 FEM 计算的偏度和质量矩阵的组装。这个小 MWE 显示了操作的胆量。
!! compile with gfortran -fopenmp -o FEMassembly FEMassembly.f90
Program FEMassembly
use, intrinsic :: iso_c_binding
implicit none
real (c_double) :: arrayM(3,3)=reshape((/2.d0,1.d0,1.d0,1.d0,&
&2.d0,1.d0,1.d0,1.d0,2.d0/),(/3,3/)) ! contrib from one element
integer (c_int) :: ke,ne=4,kx,nx=6,nodes(3)
real (c_double) :: L(6,6)
integer (c_int) :: t(4,3)=reshape((/1,2,5,6,2,3,4,5,4,5,2,3/),(/4,3/))
!! first, no OMP
do ke=1,ne ! for each triangular element
nodes=t(ke,:)
L(nodes,nodes)=L(nodes,nodes)+arrayM
end do
print *,'L no OMP'
write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx)
L=0
!$omp parallel do private (nodes)
do ke=1,ne ! for each triangular element
nodes=t(ke,:)
!! !$omp atomic
L(nodes,nodes)=L(nodes,nodes)+arrayM
!! !$omp end atomic
end do
!$omp end parallel do
print *,'L with OMP and race'
write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx)
End Program FEMassembly
注释掉原子指令后,数组 L 包含几个错误值,可能是因为我试图通过原子指令避免竞争条件。结果是:
L no OMP
2. 1. 0. 1. 0. 0.
1. 6. 1. 2. 2. 0.
0. 1. 4. 0. 2. 1.
1. 2. 0. 4. 1. 0.
0. 2. 2. 1. 6. 1.
0. 0. 1. -0. 1. 2.
L with OMP and race
2. 1. 0. 1. 0. 0.
1. 6. 1. 2. 2. 0.
0. 1. 2. 0. 2. 1.
1. 2. 0. 4. 1. 0.
0. 2. 2. 1. 6. 1.
0. 0. 1. 0. 1. 2.
如果 "atomic" 指令未注释,编译器 return 错误: 错误:!$OMP ATOMIC 语句必须在 (1) 处设置内部类型的标量变量 其中 (1) 指向 L(nodes,nodes) 行中的 arrayM.....
我希望实现的是让每个元素(这里是微不足道的 arrayM)的耗时贡献并行发生,但是由于多个线程处理同一个矩阵元素,所以必须做一些事情才能求和以有序的方式。任何人都可以建议一种方法吗?
在 Fortran 中,最简单的方法是使用归约。这是因为 OpenMP for Fortran 支持对数组进行归约。以下是我认为您正在尝试做的事情,但请谨慎对待,因为
- 您没有提供正确的输出,因此很难测试
这么小的数组有时很难找到竞争条件
!! compile with gfortran -fopenmp -o FEMassembly FEMassembly.f90 Program FEMassembly use, intrinsic :: iso_c_binding Use omp_lib, Only : omp_get_num_threads implicit none real (c_double) :: arrayM(3,3)=reshape((/2.d0,1.d0,1.d0,1.d0,& &2.d0,1.d0,1.d0,1.d0,2.d0/),(/3,3/)) ! contrib from one element integer (c_int) :: ke,ne=4,nodes(3) real (c_double) :: L(6,6) integer (c_int) :: t(4,3)=reshape((/1,2,5,6,2,3,4,5,4,5,2,3/),(/4,3/)) ! Not declared in original program Integer :: nx, kx ! Not set in original program nx = Size( L, Dim = 1 ) !$omp parallel default( none ) private ( ke, nodes ) shared( ne, t, L, arrayM ) !$omp single Write( *, * ) 'Working on ', omp_get_num_threads(), ' threads' !$omp end single !$omp do reduction( +:L ) do ke=1,ne ! for each triangular element nodes=t(ke,:) L(nodes,nodes)=L(nodes,nodes)+arrayM end do !$omp end do !$omp end parallel write(*,fmt="(6(1x,f3.0))")(L(kx,1:6),kx=1,nx) End Program FEMassembly