如何在 Fortran 中编写 returns 分配的共享内存数组的函数?
How to write a function which returns an allocated shared memory array in fortran?
我想创建一个子例程,它采用 ALLOCATABLE
数组和 returns MPI 共享内存数组。
我有一堆用 MPI 编写的代码,我们在其中使用 ALLOCATABLE
数组。现在这些数组中的许多在节点之间是相同的,因此最好将它们存储在某种类型的共享内存对象中。现在我发现这个示例 (MPI Fortran code: how to share data on node via openMP?) 可以作为独立代码使用,但是当我尝试将其作为子例程实现时,我从 C_F_POINTER
调用中得到了分段错误。
驱动例程看起来像
PROGRAM TEST_SUBROUTINE
! Libraries
USE MPI
IMPLICIT NONE
! Variables
INTEGER :: ier, myid, numprocs
INTEGER :: myid_shar, numprocs_shar
INTEGER :: MPI_COMM_SHARMEM, win_a
DOUBLE PRECISION, POINTER :: A(:)
! Code
CALL MPI_INIT(ier)
CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ier )
CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ier)
myid_shar=0
CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, MPI_COMM_SHARMEM, ier)
CALL MPI_COMM_RANK( MPI_COMM_SHARMEM, myid_shar, ier )
CALL MPI_COMM_SIZE( MPI_COMM_SHARMEM, numprocs_shar, ier)
CALL mpialloc_1d_dbl(A,numprocs_shar,myid_shar,0,MPI_COMM_SHARMEM,win_a)
A(myid_shar+1) = myid_shar
CALL MPI_WIN_FENCE(0, win_a, ier)
IF (myid == 0) THEN
PRINT *,A(1)
PRINT *,A(2)
PRINT *,A(3)
PRINT *,A(4)
END IF
! FREE Window
CALL MPI_WIN_FENCE(0, win_a, ier)
CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
CALL MPI_WIN_FREE(win_a,ier)
! FREE MPI_COMM_SHARMEM
CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
CALL MPI_COMM_FREE(MPI_COMM_SHARMEM,ier)
! END MPI
CALL MPI_BARRIER(MPI_COMM_WORLD, ier)
CALL MPI_FINALIZE(ier)
END PROGRAM TEST_SUBROUTINE
子例程看起来像(注意我试过使用数组变量但运气不佳)
SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
! Libraries
USE MPI
USE ISO_C_BINDING
IMPLICIT NONE
! Arguments
DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
INTEGER, INTENT(in) :: n1
INTEGER, INTENT(in) :: subid
INTEGER, INTENT(in) :: mymaster
INTEGER, INTENT(inout) :: share_comm
INTEGER, INTENT(inout) :: win
! Variables
INTEGER :: disp_unit, ier
INTEGER :: array_shape(1)
INTEGER(KIND=MPI_ADDRESS_KIND) :: window_size
TYPE(C_PTR) :: baseptr
ier = 0
array_shape(1) = n1
disp_unit = 8_MPI_ADDRESS_KIND
window_size = 0_MPI_ADDRESS_KIND
IF (subid == mymaster) window_size = INT(n1,MPI_ADDRESS_KIND)
CALL MPI_BARRIER(share_comm, ier)
CALL MPI_WIN_ALLOCATE_SHARED(window_size, disp_unit, MPI_INFO_NULL, share_comm, baseptr, win ,ier)
IF (subid /= mymaster) CALL MPI_WIN_SHARED_QUERY(win, 0, window_size, disp_unit, baseptr, ier)
CALL C_F_POINTER(baseptr, array, array_shape)
CALL MPI_WIN_FENCE(0, win, ier)
RETURN
END SUBROUTINE mpialloc_1d_dbl
我想要的是一个子例程,其行为类似于简单的 ALLOCATE
语句,返回共享内存指针和 FENCE 调用的 window 变量。
好的,这里的错误与调用 Fortran 90 样式的子例程有关。请参阅此 link 以获得部分解释 (http://www.cs.rpi.edu/~szymansk/OOF90/bugs.html#8) 现在,在上面的示例中,我实际上只是将子例程放在程序的末尾。这具有充当隐式接口语句的效果(至少在 GFORTRAN 和 INTEL 编译器中)。所以我的伪代码工作正常,但是在我的生产代码中,子例程作为通用库的一部分添加,许多其他代码片段调用。如果我 link 将我的伪代码编辑到这个库,它是伪代码中子例程的复制粘贴,代码将像生产代码中一样崩溃。但是,如果我添加一个 INTERFACE
块,一切正常。
那我会怎样呢?好吧,'reasons' 我不想再写一个专门的模块,但似乎我还是不得不写,并将所有各种共享内存子例程放入其中。另一种选择是将接口块添加到共享内存分配子代码的每一位(blagh)。
这里是固定代码,但是你需要单独编译子程序和程序,link看看有/没有INTERFACE
块的效果。
主程序:
PROGRAM TEST_SUBROUTINE
! Libraries
USE MPI
IMPLICIT NONE
INTERFACE
SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
INTEGER, INTENT(in) :: n1
INTEGER, INTENT(in) :: subid
INTEGER, INTENT(in) :: mymaster
INTEGER, INTENT(inout) :: share_comm
INTEGER, INTENT(inout) :: win
END SUBROUTINE mpialloc_1d_dbl
END INTERFACE
! Variables
INTEGER :: ier, myid, numprocs
INTEGER :: myid_shar, numprocs_shar
INTEGER :: MPI_COMM_SHARMEM, win_a
DOUBLE PRECISION, POINTER :: A(:)
! Code
CALL MPI_INIT(ier)
CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ier )
CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ier)
myid_shar=0
CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, MPI_COMM_SHARMEM, ier)
CALL MPI_COMM_RANK( MPI_COMM_SHARMEM, myid_shar, ier )
CALL MPI_COMM_SIZE( MPI_COMM_SHARMEM, numprocs_shar, ier)
CALL mpialloc_1d_dbl(A,numprocs_shar,myid_shar,0,MPI_COMM_SHARMEM,win_a)
A(myid_shar+1) = myid_shar
CALL MPI_WIN_FENCE(0, win_a, ier)
IF (myid == 0) THEN
PRINT *,A(1)
PRINT *,A(2)
PRINT *,A(3)
PRINT *,A(4)
END IF
! FREE Window
CALL MPI_WIN_FENCE(0, win_a, ier)
CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
CALL MPI_WIN_FREE(win_a,ier)
! FREE MPI_COMM_SHARMEM
CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
CALL MPI_COMM_FREE(MPI_COMM_SHARMEM,ier)
! END MPI
CALL MPI_BARRIER(MPI_COMM_WORLD, ier)
CALL MPI_FINALIZE(ier)
END PROGRAM TEST_SUBROUTINE
子程序:
SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
! Libraries
USE MPI
USE ISO_C_BINDING
IMPLICIT NONE
! Arguments
DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
INTEGER, INTENT(in) :: n1
INTEGER, INTENT(in) :: subid
INTEGER, INTENT(in) :: mymaster
INTEGER, INTENT(inout) :: share_comm
INTEGER, INTENT(inout) :: win
! Variables
INTEGER :: disp_unit, ier
INTEGER :: array_shape(1)
INTEGER(KIND=MPI_ADDRESS_KIND) :: window_size
TYPE(C_PTR) :: baseptr
ier = 0
array_shape(1) = n1
disp_unit = 8_MPI_ADDRESS_KIND
window_size = 0_MPI_ADDRESS_KIND
IF (subid == mymaster) window_size = INT(n1,MPI_ADDRESS_KIND)
CALL MPI_BARRIER(share_comm, ier)
CALL MPI_WIN_ALLOCATE_SHARED(window_size, disp_unit, MPI_INFO_NULL, share_comm, baseptr, win ,ier)
IF (subid /= mymaster) CALL MPI_WIN_SHARED_QUERY(win, 0, window_size, disp_unit, baseptr, ier)
CALL C_F_POINTER(baseptr, array, array_shape)
CALL MPI_WIN_FENCE(0, win, ier)
RETURN
END SUBROUTINE mpialloc_1d_dbl
我想创建一个子例程,它采用 ALLOCATABLE
数组和 returns MPI 共享内存数组。
我有一堆用 MPI 编写的代码,我们在其中使用 ALLOCATABLE
数组。现在这些数组中的许多在节点之间是相同的,因此最好将它们存储在某种类型的共享内存对象中。现在我发现这个示例 (MPI Fortran code: how to share data on node via openMP?) 可以作为独立代码使用,但是当我尝试将其作为子例程实现时,我从 C_F_POINTER
调用中得到了分段错误。
驱动例程看起来像
PROGRAM TEST_SUBROUTINE
! Libraries
USE MPI
IMPLICIT NONE
! Variables
INTEGER :: ier, myid, numprocs
INTEGER :: myid_shar, numprocs_shar
INTEGER :: MPI_COMM_SHARMEM, win_a
DOUBLE PRECISION, POINTER :: A(:)
! Code
CALL MPI_INIT(ier)
CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ier )
CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ier)
myid_shar=0
CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, MPI_COMM_SHARMEM, ier)
CALL MPI_COMM_RANK( MPI_COMM_SHARMEM, myid_shar, ier )
CALL MPI_COMM_SIZE( MPI_COMM_SHARMEM, numprocs_shar, ier)
CALL mpialloc_1d_dbl(A,numprocs_shar,myid_shar,0,MPI_COMM_SHARMEM,win_a)
A(myid_shar+1) = myid_shar
CALL MPI_WIN_FENCE(0, win_a, ier)
IF (myid == 0) THEN
PRINT *,A(1)
PRINT *,A(2)
PRINT *,A(3)
PRINT *,A(4)
END IF
! FREE Window
CALL MPI_WIN_FENCE(0, win_a, ier)
CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
CALL MPI_WIN_FREE(win_a,ier)
! FREE MPI_COMM_SHARMEM
CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
CALL MPI_COMM_FREE(MPI_COMM_SHARMEM,ier)
! END MPI
CALL MPI_BARRIER(MPI_COMM_WORLD, ier)
CALL MPI_FINALIZE(ier)
END PROGRAM TEST_SUBROUTINE
子例程看起来像(注意我试过使用数组变量但运气不佳)
SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
! Libraries
USE MPI
USE ISO_C_BINDING
IMPLICIT NONE
! Arguments
DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
INTEGER, INTENT(in) :: n1
INTEGER, INTENT(in) :: subid
INTEGER, INTENT(in) :: mymaster
INTEGER, INTENT(inout) :: share_comm
INTEGER, INTENT(inout) :: win
! Variables
INTEGER :: disp_unit, ier
INTEGER :: array_shape(1)
INTEGER(KIND=MPI_ADDRESS_KIND) :: window_size
TYPE(C_PTR) :: baseptr
ier = 0
array_shape(1) = n1
disp_unit = 8_MPI_ADDRESS_KIND
window_size = 0_MPI_ADDRESS_KIND
IF (subid == mymaster) window_size = INT(n1,MPI_ADDRESS_KIND)
CALL MPI_BARRIER(share_comm, ier)
CALL MPI_WIN_ALLOCATE_SHARED(window_size, disp_unit, MPI_INFO_NULL, share_comm, baseptr, win ,ier)
IF (subid /= mymaster) CALL MPI_WIN_SHARED_QUERY(win, 0, window_size, disp_unit, baseptr, ier)
CALL C_F_POINTER(baseptr, array, array_shape)
CALL MPI_WIN_FENCE(0, win, ier)
RETURN
END SUBROUTINE mpialloc_1d_dbl
我想要的是一个子例程,其行为类似于简单的 ALLOCATE
语句,返回共享内存指针和 FENCE 调用的 window 变量。
好的,这里的错误与调用 Fortran 90 样式的子例程有关。请参阅此 link 以获得部分解释 (http://www.cs.rpi.edu/~szymansk/OOF90/bugs.html#8) 现在,在上面的示例中,我实际上只是将子例程放在程序的末尾。这具有充当隐式接口语句的效果(至少在 GFORTRAN 和 INTEL 编译器中)。所以我的伪代码工作正常,但是在我的生产代码中,子例程作为通用库的一部分添加,许多其他代码片段调用。如果我 link 将我的伪代码编辑到这个库,它是伪代码中子例程的复制粘贴,代码将像生产代码中一样崩溃。但是,如果我添加一个 INTERFACE
块,一切正常。
那我会怎样呢?好吧,'reasons' 我不想再写一个专门的模块,但似乎我还是不得不写,并将所有各种共享内存子例程放入其中。另一种选择是将接口块添加到共享内存分配子代码的每一位(blagh)。
这里是固定代码,但是你需要单独编译子程序和程序,link看看有/没有INTERFACE
块的效果。
主程序:
PROGRAM TEST_SUBROUTINE
! Libraries
USE MPI
IMPLICIT NONE
INTERFACE
SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
INTEGER, INTENT(in) :: n1
INTEGER, INTENT(in) :: subid
INTEGER, INTENT(in) :: mymaster
INTEGER, INTENT(inout) :: share_comm
INTEGER, INTENT(inout) :: win
END SUBROUTINE mpialloc_1d_dbl
END INTERFACE
! Variables
INTEGER :: ier, myid, numprocs
INTEGER :: myid_shar, numprocs_shar
INTEGER :: MPI_COMM_SHARMEM, win_a
DOUBLE PRECISION, POINTER :: A(:)
! Code
CALL MPI_INIT(ier)
CALL MPI_COMM_RANK( MPI_COMM_WORLD, myid, ier )
CALL MPI_COMM_SIZE( MPI_COMM_WORLD, numprocs, ier)
myid_shar=0
CALL MPI_COMM_SPLIT_TYPE(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, MPI_COMM_SHARMEM, ier)
CALL MPI_COMM_RANK( MPI_COMM_SHARMEM, myid_shar, ier )
CALL MPI_COMM_SIZE( MPI_COMM_SHARMEM, numprocs_shar, ier)
CALL mpialloc_1d_dbl(A,numprocs_shar,myid_shar,0,MPI_COMM_SHARMEM,win_a)
A(myid_shar+1) = myid_shar
CALL MPI_WIN_FENCE(0, win_a, ier)
IF (myid == 0) THEN
PRINT *,A(1)
PRINT *,A(2)
PRINT *,A(3)
PRINT *,A(4)
END IF
! FREE Window
CALL MPI_WIN_FENCE(0, win_a, ier)
CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
CALL MPI_WIN_FREE(win_a,ier)
! FREE MPI_COMM_SHARMEM
CALL MPI_BARRIER(MPI_COMM_SHARMEM, ier)
CALL MPI_COMM_FREE(MPI_COMM_SHARMEM,ier)
! END MPI
CALL MPI_BARRIER(MPI_COMM_WORLD, ier)
CALL MPI_FINALIZE(ier)
END PROGRAM TEST_SUBROUTINE
子程序:
SUBROUTINE mpialloc_1d_dbl(array,n1,subid,mymaster,share_comm,win)
! Libraries
USE MPI
USE ISO_C_BINDING
IMPLICIT NONE
! Arguments
DOUBLE PRECISION, POINTER, INTENT(inout) :: array(:)
INTEGER, INTENT(in) :: n1
INTEGER, INTENT(in) :: subid
INTEGER, INTENT(in) :: mymaster
INTEGER, INTENT(inout) :: share_comm
INTEGER, INTENT(inout) :: win
! Variables
INTEGER :: disp_unit, ier
INTEGER :: array_shape(1)
INTEGER(KIND=MPI_ADDRESS_KIND) :: window_size
TYPE(C_PTR) :: baseptr
ier = 0
array_shape(1) = n1
disp_unit = 8_MPI_ADDRESS_KIND
window_size = 0_MPI_ADDRESS_KIND
IF (subid == mymaster) window_size = INT(n1,MPI_ADDRESS_KIND)
CALL MPI_BARRIER(share_comm, ier)
CALL MPI_WIN_ALLOCATE_SHARED(window_size, disp_unit, MPI_INFO_NULL, share_comm, baseptr, win ,ier)
IF (subid /= mymaster) CALL MPI_WIN_SHARED_QUERY(win, 0, window_size, disp_unit, baseptr, ier)
CALL C_F_POINTER(baseptr, array, array_shape)
CALL MPI_WIN_FENCE(0, win, ier)
RETURN
END SUBROUTINE mpialloc_1d_dbl