OpenMP 计算错误

OpenMP computation errors

当我在下面的代码中启用 OpenMP 行时,我并不经常得到正确的解决方案(所以我怀疑并行化有问题)。我把代码翻了一遍又一遍,还是没找到问题出在哪里。

!$OMP PARALLEL SHARED(w, h, u, v, hu, hv, d)                                &
!$OMP& SHARED(nxw, nyw)                                                     &
!$OMP& SHARED(rx, ry, rxg, ryg)                                             &
!$OMP& SHARED(ispans, ispane, jspans, jspane, chunk)                        &
!$OMP& SHARED(a)                                                            &
!$OMP& PRIVATE(i, j, it)                                                    &
!$OMP& PRIVATE(ip1, im1, im2, jp1, jm1, jm2, iu1, jv1)                      &
!$OMP& PRIVATE(ww, hh, uu, vv, hu1, hu2, hv1, hv2, dd, df)                  &
!$OMP& PRIVATE(xvv, xuu, xve, xue, advx, advy)                              &
!$OMP& PRIVATE(c, dwdt_i, dwdt_f, dwdt, ref, coef)                          &
!$OMP& PRIVATE(du, dv, noflux)
ompstart = OMP_GET_WTIME()
do it = 1, itlast
    !$OMP DO SCHEDULE(DYNAMIC, CHUNK)
    do j = 2, nyw-1
        do i = 2, nxw-1
            if (d(i,j) .gt. rpmax) then
                ww = w(i,j,1) - rx*(u(i,j,1) - u(i-1,j,1))                  &
                                - ry*(v(i,j,1) - v(i,j-1,1))
                if (abs(ww) .lt. eps) ww = 0.0
                hh = ww + d(i,j)
                if (hh .ge. gx) then
                    h(i,j,2) = hh
                    w(i,j,2) = ww
                else
                    h(i,j,2) = 0.0
                    w(i,j,2) = -d(i,j)
                end if
            else
                h(i,j,2) = 0.0
                w(i,j,2) = -d(i,j)
            end if
        end do
    end do
    !$OMP END DO

    !$OMP DO SCHEDULE(DYNAMIC, CHUNK)
    do j = 1, nyw
        do i = 1, nxw-1
            hu1 = 0.25*(h(i,j,2) + h(i+1,j,2) + h(i,j,1) + h(i+1,j,1))
            hu2 = 0.50*(h(i,j,2) + h(i+1,j,2))
            if (hu1 .lt. gx) hu1 = 0.0
            if (hu2 .lt. gx) hu2 = 0.0
            hu(i,j,1) = hu1
            hu(i,j,2) = hu2
        end do
    end do
    !$OMP END DO

    !$OMP DO SCHEDULE(DYNAMIC, CHUNK)
    do j = 1, nyw-1
        do i = 1, nxw
            hv1 = 0.25*(h(i,j,2) + h(i,j+1,2) + h(i,j,1) + h(i,j+1,1))
            hv2 = 0.50*(h(i,j,2) + h(i,j+1,2))
            if (hv1 .lt. gx) hv1 = 0.0
            if (hv2 .lt. gx) hv2 = 0.0
            hv(i,j,1) = hv1
            hv(i,j,2) = hv2
        end do
    end do
    !$OMP END DO

    !$OMP DO SCHEDULE(DYNAMIC, CHUNK)
    do i = 1, nxw-1
        ip1 = i+1
        im1 = i-1
        if (im1 .le. 1) im1 = 1
        if (ip1 .ge. nxw-1) ip1 = nxw-1
        do j = 1, nyw
            noflux = 0
            jp1 = j+1
            jm1 = j-1
            jv1 = j
            if (jm1 .le. 1) jm1 = 1
            if (jp1 .ge. nyw) jp1 = nyw
            if (jv1 .ge. nyw-1) jv1 = nyw-1

            du = 0.5*(d(i,j) + d(i+1,j))
            if (d(i,j) .le. rpmax .or. du .le. rpmax) then
                u(i,j,2) = 0.0
                noflux = 1
            else                
                if (h(i,j,2) .gt. gx .and. h(i+1,j,2) .gt. gx) then         
                    dd = hu(i,j,2)
                    df = hu(i,j,1)
                else if (h(i,j,2) .gt. gx .and. h(i+1,j,2) .le. gx          &
                           .and. d(i+1,j) + w(i,j,2) .gt. gx) then
                    dd = 0.5*h(i,j,2)
                    df = dd
                else if (h(i,j,2) .le. gx .and. h(i+1,j,2) .gt. gx          &
                           .and. d(i,j) + w(i+1,j,2) .gt. gx) then
                    dd = 0.5*h(i+1,j,2)
                    df = dd
                else 
                    u(i,j,2) = 0.0
                    noflux = 1
                end if

                if (dd .lt. gx) then
                    u(i,j,2) = 0.0
                    noflux = 1
                end if
            end if

            if (noflux .ne. 1) then
                xvv = 0.25*(v(i,jv1,1) + v(i+1,jv1,1) +                     &
                            v(i,jm1,1) + v(i+1,jm1,1))
                uu = u(i,j,1) - rxg*dd*(w(i+1,j,2) - w(i,j,2))

                if (hu(i,j,1) .ge. gx .and.                                 &
                    (i .gt. ispans .and. i .lt. ispane .and.                &
                    j .gt. jspans .and. j .lt. jspane)) then
                    advx = 0.0
                    advy = 0.0

                    if (u(i,j,1) .lt. zero) then
                        if (hu(ip1,j,1) .lt. gx .or.                        &
                            h(ip1,j,2) .lt. gx) then
                            advx = rx*(-u(i,j,1)**2.0/hu(i,j,1))
                        else
                            advx = rx*(u(ip1,j,1)**2.0/hu(ip1,j,1)          &
                                        - u(i,j,1)**2.0/hu(i,j,1))
                        end if
                    else
                        if (hu(im1,j,1) .lt. gx .or.                        &
                            h(i,j,2) .lt. gx) then
                            advx = rx*(u(i,j,1)**2.0/hu(i,j,1))
                        else
                            advx = rx*(u(i,j,1)**2.0/hu(i,j,1)              &
                                        - u(im1,j,1)**2.0/hu(im1,j,1))
                        end if
                    end if

                    if (xvv .lt. zero) then
                        if (h(i,jp1,2) .lt. gx .or.                         &
                            h(ip1,jp1,2) .lt. gx) then
                            advy = ry*(-u(i,j,1)*xvv/hu(i,j,1))
                        else if (hu(i,jp1,1) .lt. gx) then
                            advy = ry*(-u(i,j,1)*xvv/hu(i,j,1))
                        else
                            xve = 0.25*(v(i,jp1,1) + v(ip1,jp1,1)           &
                                            + v(i,j,1) + v(ip1,j,1))
                            advy = ry*(u(i,jp1,1)*xve/hu(i,jp1,1)           &
                                            - u(i,j,1)*xvv/hu(i,j,1))
                        end if
                    else
                        if (h(i,jm1,2) .lt. gx .or.                         &
                            h(ip1,jm1,2) .lt. gx) then
                            advy = ry*(u(i,j,1)*xvv/hu(i,j,1))
                        else if (hu(i,jm1,1) .lt. gx) then
                            advy = ry*(u(i,j,1)*xvv/hu(i,j,1))
                        else
                            jm2 = j-2
                            if (jm2 .le. 1) jm2 = 1
                            xve = 0.25*(v(i,jm1,1) + v(ip1,jm1,1)           &
                                            + v(i,jm2,1) + v(ip1,jm2,1))
                            advy = ry*(u(i,j,1)*xvv/hu(i,j,1)               &
                                        - u(i,jm1,1)*xve/hu(i,jm1,1))
                        end if
                    end if

                    C = SQRT(GRAV*H(I,J,2))
                    DWDT_F = 0.08*C
                    DWDT_I = 0.65*C
                    DWDT = ABS(W(I,J,2) - W(I,J,1))/DELT
                    IF (DWDT .GT. DWDT_F) THEN
                        REF = (DWDT - DWDT_F)/(DWDT_I - DWDT_F)
                        A = 1.0
                        COEF = EXP(-A*REF)
                        ADVX = COEF*ADVX
                        ADVY = COEF*ADVY
                    END IF
                    uu = uu - advx - advy
                end if
                if (abs(uu) .lt. eps) uu = 0.0
                if (uu .gt. 20.0*dd) uu = 20.0*dd
                if (uu .lt. -20.0*dd) uu = -20.0*dd
                u(i,j,2) = uu
            else
                u(i,j,2) = 0.0
            end if
        end do
    end do
    !$OMP END DO

    !$OMP DO SCHEDULE(DYNAMIC, CHUNK)
    do i = 1, nxw
        ip1 = i+1
        im1 = i-1
        iu1 = i
        if (im1 .le. 1) im1 = 1
        if (ip1 .ge. nxw) ip1 = nxw
        if (iu1 .ge. nxw-1) iu1 = nxw-1
        do j = 1, nyw-1
            noflux = 0
            jp1 = j+1
            jm1 = j-1
            if (jm1 .le. 1) jm1 = 1
            if (jp1 .ge. nyw-1) jp1 = nyw-1

            dv = 0.5*(d(i,j) + d(i,j+1))
            if (d(i,j) .le. rpmax .or. dv .le. rpmax) then
                v(i,j,2) = 0.0
                noflux = 1
            else
                if (h(i,j,2) .gt. gx .and. h(i,j+1,2) .gt. gx) then
                    dd = hv(i,j,2)
                    df = hv(i,j,1)
                else if (h(i,j,2) .gt. gx .and. h(i,j+1,2) .le. gx          &
                            .and. d(i,j+1) + w(i,j,2) .gt. gx) then
                    dd = 0.5*h(i,j,2)
                    df = dd
                else if (h(i,j,2) .le. gx .and. h(i,j+1,2) .gt. gx          &
                            .and. d(i,j) + w(i,j+1,2) .gt. gx) then
                    dd = 0.5*h(i,j+1,2)
                    df = dd
                else
                    v(i,j,2) = 0.0
                    noflux = 1
                end if

                if (dd .lt. gx) then
                    v(i,j,2) = 0.0
                    noflux = 1
                end if
            end if

            if (noflux .ne. 1) then
                xuu = 0.25*(u(iu1,j,1) + u(iu1,jp1,1) +                     &
                            u(im1,j,1) + u(im1,jp1,1))
                vv = v(i,j,1) - ryg*dd*(w(i,j+1,2) - w(i,j,2))

                if (hv(i,j,1) .ge. gx .and.                                 &
                    (i .gt. ispans .and. i .lt. ispane .and.                &
                    j .gt. jspans .and. j .lt. jspane)) then
                    advx = 0.0
                    advy = 0.0

                    if (v(i,j,1) .lt. zero) then
                        if (hv(i,jp1,1) .lt. gx .or.                        &
                            h(i,jp1,2) .lt. gx) then
                            advy = ry*(-v(i,j,1)**2.0/hv(i,j,1))
                        else
                            advy = ry*(v(i,jp1,1)**2.0/hv(i,jp1,1)          &
                                        - v(i,j,1)**2.0/hv(i,j,1))
                        end if
                    else
                        if (hv(i,jm1,1) .lt. gx .or.                        &
                            h(i,j,2) .lt. gx) then
                            advy = ry*(v(i,j,1)**2.0/hv(i,j,1))
                        else
                            advy = ry*(v(i,j,1)**2.0/hv(i,j,1)              &
                                        - v(i,jm1,1)**2.0/hv(i,jm1,1))
                        end if
                    end if

                    if (xuu .lt. zero) then
                        if (h(ip1,j,2) .lt. gx .or.                         &
                            h(ip1,jp1,2) .lt. gx) then
                            advx = rx*(-v(i,j,1)*xuu/hv(i,j,1))
                        else if (hv(ip1,j,1) .lt. gx) then
                            advx = rx*(-v(i,j,1)*xuu/hv(i,j,1))
                        else
                            xue = 0.25*(u(ip1,j,1) + u(ip1,jp1,1)           &
                                            + u(i,j,1) + u(i,jp1,1))
                            advx = rx*(v(ip1,j,1)*xue/hv(ip1,j,1)           &
                                            - v(i,j,1)*xuu/hv(i,j,1))
                        end if
                    else
                        if (h(im1,j,2) .lt. gx .or.                         &
                            h(im1,jp1,2) .lt. gx) then
                            advx = rx*(v(i,j,1)*xuu/hv(i,j,1))
                        else if (hv(im1,j,1) .lt. gx) then
                            advx = rx*(v(i,j,1)*xuu/hv(i,j,1))
                        else
                            im2 = i-2
                            if (im2 .le. 1) im2 = 1
                            xue = 0.25*(u(im1,j,1) + u(im1,jp1,1)           &
                                            + u(im2,j,1) + u(im2,jp1,1))
                            advx = rx*(v(i,j,1)*xuu/hv(i,j,1)               &
                                            - v(im1,j,1)*xue/hv(im1,j,1))
                        end if
                    end if

                    C = SQRT(GRAV*H(I,J,2))
                    DWDT_F = 0.08*C
                    DWDT_I = 0.65*C
                    DWDT = ABS(W(I,J,2) - W(I,J,1))/DELT
                    IF (DWDT .GT. DWDT_F) THEN
                        REF = (DWDT - DWDT_F)/(DWDT_I - DWDT_F)
                        A = 1.0
                        COEF = EXP(-A*REF)
                        ADVX = COEF*ADVX
                        ADVY = COEF*ADVY
                    END IF
                    vv = vv - advx - advy
                end if
                if (abs(vv) .lt. eps) vv = 0.0
                if (vv .gt. 20.0*dd) vv = 20.0*dd
                if (vv .lt. -20.0*dd) vv = -20.0*dd
                v(i,j,2) = vv
            else
                v(i,j,2) = 0.0
            end if
        end do
    end do
    !$OMP END DO

    !$OMP SINGLE
    call tuna_openbc
    !$OMP END SINGLE

    !$OMP SINGLE
    call tuna_update
    !$OMP END SINGLE
end do
ompend = OMP_GET_WTIME()
!$OMP END PARALLEL

以上代码为计算部分(代码较长)。我确保所有循环计数器 it, i, j 和虚拟循环计数器 ip1, im1, jp1, jm1 等都设置为私有。此外,所有虚拟变量 ww, uu, vv 等都设置为私有。我不确定我哪里做错了。我所有的变量和常量都在一个模块中声明,如下所示。

real, parameter :: gx = 1.0e-5
real, parameter :: eps = 1.0e-10
real, parameter :: zero = 0.0
real, parameter :: rpmax = -20.0
real, parameter :: grav = 9.807

real, dimension(:,:,:), allocatable :: w
real, dimension(:,:,:), allocatable :: h
real, dimension(:,:,:), allocatable :: u
real, dimension(:,:,:), allocatable :: v
real, dimension(:,:,:), allocatable :: hu
real, dimension(:,:,:), allocatable :: hv
real, dimension(:,:), allocatable :: d

integer :: nxw, nyw
real :: delx, dely, delt

如有不妥之处请分享。

AFAICS,您的代码暴露了两个奇怪的地方:

  • 变量 a 被声明为 shared 而它看起来应该被声明为 private
  • ompstartompend 这两个变量可能应该在 parallel 区域之外初始化。否则,所有线程都会尝试并发更新它们。

所有其他变量对我来说都很好。此外,我尝试检查 uv 的各种索引之间的潜在依赖关系,但没有发现任何依赖关系。所以也许声明 a private 可能足以修复你的代码。