如何在 Fortran 中编写 returns 分配的共享内存数组的函数?

How to write a function which returns an allocated shared memory array in fortran?

我想创建一个子例程,它采用 ALLOCATABLE 数组和 returns MPI 共享内存数组。

我有一堆用 MPI 编写的代码,我们在其中使用 ALLOCATABLE 数组。现在这些数组中的许多在节点之间是相同的,因此最好将它们存储在某种类型的共享内存对象中。现在我发现这个示例 (MPI Fortran code: how to share data on node via openMP?) 可以作为独立代码使用,但是当我尝试将其作为子例程实现时,我从 C_F_POINTER 调用中得到了分段错误。

驱动例程看起来像

PROGRAM TEST_SUBROUTINE
   ! Libraries
   USE MPI

   IMPLICIT NONE

   ! Variables
   INTEGER :: ier, myid, numprocs
   INTEGER :: myid_shar, numprocs_shar
   INTEGER :: MPI_COMM_SHARMEM, win_a
   DOUBLE PRECISION, POINTER :: A(:)

   ! Code
    CALL MPI_INIT(ier)
    CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ier )
    CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ier)
    myid_shar=0
    CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, MPI_COMM_SHARMEM, ier)
    CALL MPI_COMM_RANK( MPI_COMM_SHARMEM, myid_shar, ier )
    CALL MPI_COMM_SIZE( MPI_COMM_SHARMEM, numprocs_shar, ier)

    CALL mpialloc_1d_dbl(A,numprocs_shar,myid_shar,0,MPI_COMM_SHARMEM,win_a)

    A(myid_shar+1) = myid_shar
    CALL MPI_WIN_FENCE(0, win_a, ier)

    IF (myid == 0) THEN
       PRINT *,A(1)
       PRINT *,A(2)
       PRINT *,A(3)
       PRINT *,A(4)
    END IF

    ! FREE Window
    CALL MPI_WIN_FENCE(0, win_a, ier)
    CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
    CALL MPI_WIN_FREE(win_a,ier)

    ! FREE MPI_COMM_SHARMEM
    CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
    CALL MPI_COMM_FREE(MPI_COMM_SHARMEM,ier)

    ! END MPI
    CALL MPI_BARRIER(MPI_COMM_WORLD, ier)
    CALL MPI_FINALIZE(ier)

END PROGRAM TEST_SUBROUTINE

子例程看起来像(注意我试过使用数组变量但运气不佳)

SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
    ! Libraries
    USE MPI
    USE ISO_C_BINDING

    IMPLICIT NONE

    ! Arguments
    DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
    INTEGER, INTENT(in) :: n1
    INTEGER, INTENT(in) :: subid
    INTEGER, INTENT(in) :: mymaster
    INTEGER, INTENT(inout) :: share_comm
    INTEGER, INTENT(inout) :: win

    ! Variables
    INTEGER :: disp_unit, ier
    INTEGER :: array_shape(1)
    INTEGER(KIND=MPI_ADDRESS_KIND) :: window_size
    TYPE(C_PTR) :: baseptr

    ier = 0
    array_shape(1) = n1
    disp_unit = 8_MPI_ADDRESS_KIND
    window_size = 0_MPI_ADDRESS_KIND
    IF (subid == mymaster) window_size = INT(n1,MPI_ADDRESS_KIND)
    CALL MPI_BARRIER(share_comm, ier)
    CALL MPI_WIN_ALLOCATE_SHARED(window_size, disp_unit, MPI_INFO_NULL, share_comm, baseptr, win ,ier)
    IF (subid /= mymaster) CALL MPI_WIN_SHARED_QUERY(win, 0, window_size, disp_unit, baseptr, ier)
    CALL C_F_POINTER(baseptr, array, array_shape)
    CALL MPI_WIN_FENCE(0, win, ier)

    RETURN

END SUBROUTINE mpialloc_1d_dbl

我想要的是一个子例程,其行为类似于简单的 ALLOCATE 语句,返回共享内存指针和 FENCE 调用的 window 变量。

好的,这里的错误与调用 Fortran 90 样式的子例程有关。请参阅此 link 以获得部分解释 (http://www.cs.rpi.edu/~szymansk/OOF90/bugs.html#8) 现在,在上面的示例中,我实际上只是将子例程放在程序的末尾。这具有充当隐式接口语句的效果(至少在 GFORTRAN 和 INTEL 编译器中)。所以我的伪代码工作正常,但是在我的生产代码中,子例程作为通用库的一部分添加,许多其他代码片段调用。如果我 link 将我的伪代码编辑到这个库,它是伪代码中子例程的复制粘贴,代码将像生产代码中一样崩溃。但是,如果我添加一个 INTERFACE 块,一切正常。

那我会怎样呢?好吧,'reasons' 我不想再写一个专门的模块,但似乎我还是不得不写,并将所有各种共享内存子例程放入其中。另一种选择是将接口块添加到共享内存分配子代码的每一位(blagh)。

这里是固定代码,但是你需要单独编译子程序和程序,link看看有/没有INTERFACE块的效果。

主程序:

PROGRAM TEST_SUBROUTINE
   ! Libraries
   USE MPI

   IMPLICIT NONE

   INTERFACE
      SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
      DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
      INTEGER, INTENT(in) :: n1
      INTEGER, INTENT(in) :: subid
      INTEGER, INTENT(in) :: mymaster
      INTEGER, INTENT(inout) :: share_comm
      INTEGER, INTENT(inout) :: win
      END SUBROUTINE mpialloc_1d_dbl
   END INTERFACE

   ! Variables
   INTEGER :: ier, myid, numprocs
   INTEGER :: myid_shar, numprocs_shar
   INTEGER :: MPI_COMM_SHARMEM, win_a
   DOUBLE PRECISION, POINTER :: A(:)

   ! Code
    CALL MPI_INIT(ier)
    CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ier )
    CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ier)
    myid_shar=0
    CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, MPI_COMM_SHARMEM, ier)
    CALL MPI_COMM_RANK( MPI_COMM_SHARMEM, myid_shar, ier )
    CALL MPI_COMM_SIZE( MPI_COMM_SHARMEM, numprocs_shar, ier)

    CALL mpialloc_1d_dbl(A,numprocs_shar,myid_shar,0,MPI_COMM_SHARMEM,win_a)

    A(myid_shar+1) = myid_shar
    CALL MPI_WIN_FENCE(0, win_a, ier)

    IF (myid == 0) THEN
       PRINT *,A(1)
       PRINT *,A(2)
       PRINT *,A(3)
       PRINT *,A(4)
    END IF

    ! FREE Window
    CALL MPI_WIN_FENCE(0, win_a, ier)
    CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
    CALL MPI_WIN_FREE(win_a,ier)

    ! FREE MPI_COMM_SHARMEM
    CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
    CALL MPI_COMM_FREE(MPI_COMM_SHARMEM,ier)

    ! END MPI
    CALL MPI_BARRIER(MPI_COMM_WORLD, ier)
    CALL MPI_FINALIZE(ier)

END PROGRAM TEST_SUBROUTINE

子程序:

SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
    ! Libraries
    USE MPI
    USE ISO_C_BINDING

    IMPLICIT NONE

    ! Arguments
    DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
    INTEGER, INTENT(in) :: n1
    INTEGER, INTENT(in) :: subid
    INTEGER, INTENT(in) :: mymaster
    INTEGER, INTENT(inout) :: share_comm
    INTEGER, INTENT(inout) :: win

    ! Variables
    INTEGER :: disp_unit, ier
    INTEGER :: array_shape(1)
    INTEGER(KIND=MPI_ADDRESS_KIND) :: window_size
    TYPE(C_PTR) :: baseptr

    ier = 0
    array_shape(1) = n1
    disp_unit = 8_MPI_ADDRESS_KIND
    window_size = 0_MPI_ADDRESS_KIND
    IF (subid == mymaster) window_size = INT(n1,MPI_ADDRESS_KIND)
    CALL MPI_BARRIER(share_comm, ier)
    CALL MPI_WIN_ALLOCATE_SHARED(window_size, disp_unit, MPI_INFO_NULL, share_comm, baseptr, win ,ier)
    IF (subid /= mymaster) CALL MPI_WIN_SHARED_QUERY(win, 0, window_size, disp_unit, baseptr, ier)
    CALL C_F_POINTER(baseptr, array, array_shape)
    CALL MPI_WIN_FENCE(0, win, ier)

    RETURN

END SUBROUTINE mpialloc_1d_dbl