mpi_gather 中使用 Fortran 派生数据类型的分段错误
Segmentation fault in mpi_gather using fortran derived datatypes
我一直在尝试编写一个程序来计算数百万个偶极-偶极相互作用张量及其导数。因为这些张量可以简单地并行化,而且经常退化,所以我决定构建一个查找 table (LUT) 并分配工作。最终,它们将组合成一个大矩阵并进行对角化(我最终将为此使用 scalapack。现在,diag 适合 nersc 的节点)。为了跟踪 fortran 中的所有索引,我构建了几个派生数据类型。
type dtlrdr_lut
sequence
integer p
integer q
integer s
integer i
integer ind
real(dp), dimension(3,3) :: dtlrdr
end type dtlrdr_lut
type dtlrdh_lut
sequence
integer p
integer q
integer ind
real(dp), dimension(3, 3) :: TLR
real(dp), dimension(3, 3, 3, 3) :: dTLRdh
end type dtlrdh_lut
在我想要并行化所有这些的子例程中,我有:
type(dtlrdr_lut), dimension(:), allocatable :: my_dtlrdr, collected_dtlrdr
type(dtlrdh_lut), dimension(:), allocatable :: my_dtlrdh, collected_dtlrdh
integer :: dh_dtype, dr_dtype, dh_types(5), dr_types(6), dh_blocks(5), dr_blocks(6)
INTEGER(KIND=MPI_ADDRESS_KIND) :: dh_offsets(5), dr_offsets(6)
if(.not.allocated(my_dtlrdh)) allocate(my_dtlrdh(my_num_pairs))
if(.not.allocated(my_dtlrdr)) allocate(my_dtlrdr(my_num_pairs*3*nat))
if(me_image.eq.root_image) then
if(.not.allocated(collected_dtlrdh)) allocate(collected_dtlrdh(num_pairs))
if(.not.allocated(collected_dtlrdr)) allocate(collected_dtlrdr(num_pairs*3*nat))
end if
call mpi_get_address(my_dtlrdr(1)%p, dr_offsets(1), ierr)
call mpi_get_address(my_dtlrdr(1)%q, dr_offsets(2), ierr)
call mpi_get_address(my_dtlrdr(1)%s, dr_offsets(3), ierr)
call mpi_get_address(my_dtlrdr(1)%i, dr_offsets(4), ierr)
call mpi_get_address(my_dtlrdr(1)%ind, dr_offsets(5), ierr)
call mpi_get_address(my_dtlrdr(1)%dtlrdr, dr_offsets(6), ierr)
do i = 2, size(dr_offsets)
dr_offsets(i) = dr_offsets(i) - dr_offsets(1)
end do
dr_offsets(1) = 0
dr_types = (/MPI_INTEGER, MPI_INTEGER, MPI_INTEGER, MPI_INTEGER, MPI_INTEGER, MPI_DOUBLE_PRECISION/)
dr_blocks = (/1, 1, 1, 1, 1, 3*3/)
call mpi_type_struct(6, dr_blocks, dr_offsets, dr_types, dr_dtype, ierr)
call mpi_type_commit(dr_dtype, ierr)
call mpi_get_address(my_dtlrdh(1)%p, dh_offsets(1), ierr)
call mpi_get_address(my_dtlrdh(1)%q, dh_offsets(2), ierr)
call mpi_get_address(my_dtlrdh(1)%ind, dh_offsets(3), ierr)
call mpi_get_address(my_dtlrdh(1)%TLR, dh_offsets(4), ierr)
call mpi_get_address(my_dtlrdh(1)%dTLRdh, dh_offsets(5), ierr)
do i = 2, size(dh_offsets)
dh_offsets(i) = dh_offsets(i) - dh_offsets(1)
end do
dh_offsets(1) = 0
dh_types = (/MPI_INTEGER, MPI_INTEGER, MPI_INTEGER, MPI_DOUBLE_PRECISION, MPI_DOUBLE_PRECISION/)
dh_blocks = (/1, 1, 1, 3*3, 3*3*3*3/)
call mpi_type_struct(5, dh_blocks, dh_offsets, dh_types, dh_dtype, ierr)
call mpi_type_commit(dh_dtype, ierr)
call mpi_gather(my_dtlrdh, my_num_pairs, dh_dtype, &
collected_dtlrdh, num_pairs, dh_dtype, &
root_image, intra_image_comm)
call mp_barrier(intra_image_comm)
call mpi_gather(my_dtlrdr, my_num_pairs*3*nat, dr_dtype, &
collected_dtlrdr, num_pairs*3*nat, dr_dtype, &
root_image, intra_image_comm)
代码的结果是什么?好吧,根进程聚集并成为屏障,然后 seg faulted:
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
#0 0x10bac04f2
#1 0x10bac0cae
#2 0x7fff8d7c1f19
在此进程 0 上的模拟中:
size(my_dtlrdh) = 97
size(collected_dtlrdh) = 194
size(my_dtlrdr) = 873
size(collected_dtlrdr) = 1746
和进程 1
size(my_dtlrdh) = 97
size(collected_dtlrdh) = 3
size(my_dtlrdr) = 873
size(collected_dtlrdr) = 1650521
当我打印进程 0 的偏移量、块等时,我得到:
printing dr vars 0
dr_blocks = 1 1 1 1 1 9
dr_offsets = 0 4 8 12 16 24
dr_types = 7 7 7 7 7 17
dr_dtype = 73
printing dh vars 0
dr_blocks = 1 1 1 9 81
dr_offsets = 0 4 8 16 88
dr_types = 7 7 7 17 17
dr_dtype = 74
对于流程 1,我得到:
printing dr vars 1
dr_blocks = 1 1 1 1 1 9
dr_offsets = 0 4 8 12 16 24
dr_types = 7 7 7 7 7 17
dr_dtype = 73
printing dh vars 1
dr_blocks = 1 1 1 9 81
dr_offsets = 0 4 8 16 88
dr_types = 7 7 7 17 17
dr_dtype = 74
不过,proc1 上 dtlrdr 的随机大小应该无关紧要,因为它实际上没有收到任何东西。我似乎无法弄清楚发生了什么,或者为什么进程 1 在没有无效内存引用的情况下无法通过收集。有任何想法吗?如果你们需要我提供更多信息,请告诉我。
您忘记了您共享的最后 3 个子例程(即最后一个参数 ierr)中的错误状态标志。
我敢打赌您使用的是 Fortran 头文件 mpif.h 而不是 mpi 模块。如果你做了后者,你会自动检查参数的数量并收到一条错误消息
"There is no matching specific subroutine for this generic subroutine call."
由于参数数量不正确。
我一直在尝试编写一个程序来计算数百万个偶极-偶极相互作用张量及其导数。因为这些张量可以简单地并行化,而且经常退化,所以我决定构建一个查找 table (LUT) 并分配工作。最终,它们将组合成一个大矩阵并进行对角化(我最终将为此使用 scalapack。现在,diag 适合 nersc 的节点)。为了跟踪 fortran 中的所有索引,我构建了几个派生数据类型。
type dtlrdr_lut
sequence
integer p
integer q
integer s
integer i
integer ind
real(dp), dimension(3,3) :: dtlrdr
end type dtlrdr_lut
type dtlrdh_lut
sequence
integer p
integer q
integer ind
real(dp), dimension(3, 3) :: TLR
real(dp), dimension(3, 3, 3, 3) :: dTLRdh
end type dtlrdh_lut
在我想要并行化所有这些的子例程中,我有:
type(dtlrdr_lut), dimension(:), allocatable :: my_dtlrdr, collected_dtlrdr
type(dtlrdh_lut), dimension(:), allocatable :: my_dtlrdh, collected_dtlrdh
integer :: dh_dtype, dr_dtype, dh_types(5), dr_types(6), dh_blocks(5), dr_blocks(6)
INTEGER(KIND=MPI_ADDRESS_KIND) :: dh_offsets(5), dr_offsets(6)
if(.not.allocated(my_dtlrdh)) allocate(my_dtlrdh(my_num_pairs))
if(.not.allocated(my_dtlrdr)) allocate(my_dtlrdr(my_num_pairs*3*nat))
if(me_image.eq.root_image) then
if(.not.allocated(collected_dtlrdh)) allocate(collected_dtlrdh(num_pairs))
if(.not.allocated(collected_dtlrdr)) allocate(collected_dtlrdr(num_pairs*3*nat))
end if
call mpi_get_address(my_dtlrdr(1)%p, dr_offsets(1), ierr)
call mpi_get_address(my_dtlrdr(1)%q, dr_offsets(2), ierr)
call mpi_get_address(my_dtlrdr(1)%s, dr_offsets(3), ierr)
call mpi_get_address(my_dtlrdr(1)%i, dr_offsets(4), ierr)
call mpi_get_address(my_dtlrdr(1)%ind, dr_offsets(5), ierr)
call mpi_get_address(my_dtlrdr(1)%dtlrdr, dr_offsets(6), ierr)
do i = 2, size(dr_offsets)
dr_offsets(i) = dr_offsets(i) - dr_offsets(1)
end do
dr_offsets(1) = 0
dr_types = (/MPI_INTEGER, MPI_INTEGER, MPI_INTEGER, MPI_INTEGER, MPI_INTEGER, MPI_DOUBLE_PRECISION/)
dr_blocks = (/1, 1, 1, 1, 1, 3*3/)
call mpi_type_struct(6, dr_blocks, dr_offsets, dr_types, dr_dtype, ierr)
call mpi_type_commit(dr_dtype, ierr)
call mpi_get_address(my_dtlrdh(1)%p, dh_offsets(1), ierr)
call mpi_get_address(my_dtlrdh(1)%q, dh_offsets(2), ierr)
call mpi_get_address(my_dtlrdh(1)%ind, dh_offsets(3), ierr)
call mpi_get_address(my_dtlrdh(1)%TLR, dh_offsets(4), ierr)
call mpi_get_address(my_dtlrdh(1)%dTLRdh, dh_offsets(5), ierr)
do i = 2, size(dh_offsets)
dh_offsets(i) = dh_offsets(i) - dh_offsets(1)
end do
dh_offsets(1) = 0
dh_types = (/MPI_INTEGER, MPI_INTEGER, MPI_INTEGER, MPI_DOUBLE_PRECISION, MPI_DOUBLE_PRECISION/)
dh_blocks = (/1, 1, 1, 3*3, 3*3*3*3/)
call mpi_type_struct(5, dh_blocks, dh_offsets, dh_types, dh_dtype, ierr)
call mpi_type_commit(dh_dtype, ierr)
call mpi_gather(my_dtlrdh, my_num_pairs, dh_dtype, &
collected_dtlrdh, num_pairs, dh_dtype, &
root_image, intra_image_comm)
call mp_barrier(intra_image_comm)
call mpi_gather(my_dtlrdr, my_num_pairs*3*nat, dr_dtype, &
collected_dtlrdr, num_pairs*3*nat, dr_dtype, &
root_image, intra_image_comm)
代码的结果是什么?好吧,根进程聚集并成为屏障,然后 seg faulted:
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
#0 0x10bac04f2
#1 0x10bac0cae
#2 0x7fff8d7c1f19
在此进程 0 上的模拟中:
size(my_dtlrdh) = 97
size(collected_dtlrdh) = 194
size(my_dtlrdr) = 873
size(collected_dtlrdr) = 1746
和进程 1
size(my_dtlrdh) = 97
size(collected_dtlrdh) = 3
size(my_dtlrdr) = 873
size(collected_dtlrdr) = 1650521
当我打印进程 0 的偏移量、块等时,我得到:
printing dr vars 0
dr_blocks = 1 1 1 1 1 9
dr_offsets = 0 4 8 12 16 24
dr_types = 7 7 7 7 7 17
dr_dtype = 73
printing dh vars 0
dr_blocks = 1 1 1 9 81
dr_offsets = 0 4 8 16 88
dr_types = 7 7 7 17 17
dr_dtype = 74
对于流程 1,我得到:
printing dr vars 1
dr_blocks = 1 1 1 1 1 9
dr_offsets = 0 4 8 12 16 24
dr_types = 7 7 7 7 7 17
dr_dtype = 73
printing dh vars 1
dr_blocks = 1 1 1 9 81
dr_offsets = 0 4 8 16 88
dr_types = 7 7 7 17 17
dr_dtype = 74
不过,proc1 上 dtlrdr 的随机大小应该无关紧要,因为它实际上没有收到任何东西。我似乎无法弄清楚发生了什么,或者为什么进程 1 在没有无效内存引用的情况下无法通过收集。有任何想法吗?如果你们需要我提供更多信息,请告诉我。
您忘记了您共享的最后 3 个子例程(即最后一个参数 ierr)中的错误状态标志。
我敢打赌您使用的是 Fortran 头文件 mpif.h 而不是 mpi 模块。如果你做了后者,你会自动检查参数的数量并收到一条错误消息
"There is no matching specific subroutine for this generic subroutine call."
由于参数数量不正确。