fortran90 deallocate 语句的内存损坏
Memory corruption with fortran90 deallocate statement
我已经构建了一个使用 MPI_PACK
、MPI_SEND
、MPI_RECV
分发 Fortran 派生类型的最小示例,并且还交换了它们的边界以测试 MPI_SENDRECV
为 MPI_PACKED
派生类型。
代码运行良好,但它显示出一些奇怪的行为,如果我将 deallocate
语句放在代码中间,而我将其归因于某些内存损坏,而代码在 [=19 下运行良好=] 语句在代码末尾。 dellocate
语句在 main
脚本的左侧用 (*)
标记。
代码的流程是,
1) MPI_PACK
整个派生类型。
2) 使用 MPI_SEND
、MPI_RECV
和 MPI_UNPACK
分发以恢复派生类型
结构。
3) MPI_PACK
分布式本地派生类型的边界。
4) 使用 MPI_SENDRECV
在相邻处理器之间交换边界
我已经放置了我测试过的完全相同的代码,因此它们可以像 mpif90 mod_data_structure.f90 main.f90 -o main
一样很好地编译,并且问题将完全可重现。下面的结果是 mpirun -np 2 main
.
的输出
module mod_data_structure
implicit none
type type_cell
real(selected_real_kind(15,307)):: xc(2)
real(selected_real_kind(15,307)):: values_c(8)
integer :: flag_boundary
end type type_cell
type type_cell_list
type(type_cell) :: cell(13,13)
end type type_cell_list
type type_cell_list_local
type(type_cell),allocatable :: cell(:,:)
end type type_cell_list_local
end module mod_data_structure
program main
use MPI
use mod_data_structure
implicit none
integer,parameter :: nxmax = 9, nymax = 9, nbc = 2
integer :: i, j, k, ii, jj
type(type_cell_list) :: A
type(type_cell_list_local) :: A_local
type(type_cell) :: acell
character(len=20) :: write_fmt
! MPI variables
integer :: n_proc, my_id, ierr, source, dest
integer :: tag, tag_send, tag_recv
integer :: status ( MPI_STATUS_SIZE ), &
status_l ( MPI_STATUS_SIZE ), &
status_r ( MPI_STATUS_SIZE )
integer,allocatable :: local_size(:), local_start(:)
real(selected_real_kind(15,307)):: tmp
character,allocatable :: buffer(:), buffer_l(:), buffer_lg(:), buffer_r(:), buffer_rg(:)
integer :: bufsize, bufsize_gc
integer :: left_proc, right_proc
integer :: DBL_SIZE, INT_SIZE, position_local
integer :: position_l, position_r
integer,allocatable :: position(:)
call MPI_INIT ( ierr )
call MPI_COMM_RANK ( MPI_COMM_WORLD, my_id, ierr )
call MPI_COMM_SIZE ( MPI_COMM_WORLD, n_proc, ierr )
call MPI_PACK_SIZE(1,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,DBL_SIZE,ierr)
call MPI_PACK_SIZE(1,MPI_INTEGER ,MPI_COMM_WORLD,INT_SIZE,ierr)
! Construct the derived data types
if ( my_id .eq. 0 ) then
do i = 1,nxmax+2*nbc
do j = 1,nymax+2*nbc
A%cell(i,j)%flag_boundary = 0
do k = 1,8
A%cell(i,j)%values_c(k) = 0.d0
enddo
do k = 1,2
A%cell(i,j)%xc(k) = 0.d0
enddo
enddo
enddo
do i = 1+nbc,nxmax+nbc
do j = 1+nbc,nymax+nbc
ii = i - nbc
jj = j - nbc
A%cell(i,j)%flag_boundary = 10*ii + jj
do k = 1,8
A%cell(i,j)%values_c(k) = 1.d1*ii + jj + 0.1d0*k
enddo
do k = 1,2
A%cell(i,j)%xc(k) = 1.d1*ii + jj + 0.1d0*k
enddo
enddo
enddo
write(write_fmt, '(a,i,a)') '(',nymax+2*nbc,'i3)'
write(*,*) 'my_id ', my_id
write(*,*) 'Total flag_boundary'
do i = 1,nxmax+2*nbc
write(*,write_fmt) A%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!*** Test MPI_PACK and MPI_SEND / MPI_RECV
! Prepare for the distribution
allocate ( local_size(n_proc), local_start(n_proc), position(n_proc) )
local_size = 0
local_start = 1
tmp = (nymax+2*nbc) / n_proc
! 'local_size'
do i = 1,n_proc-1
local_size(i) = ceiling(tmp)
enddo
local_size(n_proc) = nymax + 2*nbc - (n_proc - 1)*ceiling(tmp)
allocate ( A_local%cell(nxmax+2*nbc,local_size(my_id+1)) ) ! ###
! 'local_start'
do i = 1,n_proc-1
local_start(i+1:n_proc) = local_start(i+1:n_proc) + local_size(i)
enddo
! allocate 'buffer'
bufsize = maxval(local_size) * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
allocate ( buffer(bufsize) )
position = 0
if ( my_id .eq. 0 ) then
! Assign 'A_local' for 'my_id .eq. 0' itself
do j = 1, local_size(my_id+1)
do i = 1, nxmax+2*nbc
A_local%cell(i,j) = A%cell(i,j)
enddo
enddo
do k = 2, n_proc ! w/o 'my_id .eq. 0' itself
do j = local_start(k), local_start(k) + local_size(k) - 1
do i = 1,nxmax+2*nbc
acell = A%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
enddo
enddo
dest = k-1 ! ###
tag = k-1
call MPI_SEND (buffer, bufsize, MPI_PACKED, dest, tag, MPI_COMM_WORLD, ierr )
enddo
else ! ( my_id .ne. 0 ) then
source = 0
tag = my_id
call MPI_RECV (buffer, bufsize, MPI_PACKED, source, tag, MPI_COMM_WORLD, status, ierr )
position_local = 0
do j = 1, local_size(my_id+1)
do i = 1, nxmax+2*nbc
call MPI_UNPACK (buffer, bufsize, position_local, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer, bufsize, position_local, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer, bufsize, position_local, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
endif
(*)!deallocate ( buffer )
do k = 1,n_proc
if ( my_id .eq. (k-1) ) then
write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
write(*,*) ' Before MPI_SENDRECV'
write(*,*) 'my_id ', my_id
write(*,*) 'cols ', local_size(my_id+1)
do i = 1,nxmax+2*nbc
write(*,write_fmt) A_local%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!call MPI_BARRIER ( MPI_COMM_WORLD, ierr )
enddo
! Test MPI_SENDRECV
bufsize_gc = nbc * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
allocate ( buffer_l(bufsize_gc), buffer_lg(bufsize_gc), buffer_r(bufsize_gc), buffer_rg(bufsize_gc) )
! 'left_proc'
if ( my_id .eq. 0 ) then
left_proc = MPI_PROC_NULL
else ! ( my_id .ne. 0 ) then
left_proc = my_id - 1
endif
! 'right_proc'
if ( my_id .eq. n_proc-1 ) then
right_proc = MPI_PROC_NULL
else ! ( my_id .ne. n_proc - 1 )
right_proc = my_id + 1
endif
! pack 'buffer_l' & 'buffer_r'
position_l = 0
do j = 1,nbc
do i = 1,nxmax+2*nbc
acell = A_local%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
enddo
enddo
position_r = 0
do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
do i = 1,nxmax+2*nbc
acell = A_local%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
enddo
enddo
tag_send = my_id
tag_recv = right_proc
call MPI_SENDRECV (buffer_l, bufsize_gc, MPI_PACKED, left_proc, 0, &
buffer_rg, bufsize_gc, MPI_PACKED, right_proc, 0, &
MPI_COMM_WORLD, status_l, ierr )
tag_send = my_id
tag_recv = left_proc
call MPI_SENDRECV (buffer_r, bufsize_gc, MPI_PACKED, right_proc, 0, &
buffer_lg, bufsize_gc, MPI_PACKED, left_proc, 0, &
MPI_COMM_WORLD, status_r, ierr )
! fill left boundary
position_l = 0
do j = 1,nbc
do i = 1,nxmax+2*nbc
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
! fill right boundary
position_r = 0
do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
do i = 1,nxmax+2*nbc
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
do k = 1,n_proc
if ( my_id .eq. (k-1) ) then
write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
write(*,*) ' After MPI_SENDRECV'
write(*,*) 'my_id ', my_id
write(*,*) 'cols ', local_size(my_id+1)
do i = 1,nxmax+2*nbc
write(*,write_fmt) A_local%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!call MPI_BARRIER ( MPI_COMM_WORLD, ierr )
enddo
(*)deallocate ( buffer )
deallocate ( buffer_l, buffer_lg, buffer_r, buffer_rg )
call MPI_FINALIZE ( ierr )
end program
代码中间有 deallocate(buffer)
,部分输出如下所示,符合我的预期。
After MPI_SENDRECV
my_id 0
cols 6
0 0 0 0 0 0
0 0 0 0 0 0
0 0 11 12 15 16
0 0 21 22 25 26
0 0 31 32 35 36
0 0 41 42 45 46
0 0 51 52 55 56
0 0 61 62 65 66
0 0 71 72 75 76
0 0 81 82 85 86
0 0 91 92 95 96
0 0 0 0 0 0
0 0 0 0 0 0
但是如果我在代码中间找到 deallocate(buffer)
,输出的相同部分看起来像这样。
After MPI_SENDRECV
my_id 0
cols 6
0 0 0 0 0 0
****** 0 0 0 0
****** 11 12 15 16
****** 21 22 25 26
****** 31 32 35 36
****** 41 42 45 46
****** 51 52 55 56
****** 61 62 65 66
****** 71 72 75 76
****** 81 82 85 86
0 0 91 92 95 96
0 0 0 0 0 0
0 0 0 0 0 0
如果我更改 write
格式以显示更多整数位数,它们是 10 位整数,如 1079533568
.
我在 Segmentation Fault using MPI_Sendrecv with a 2D contiguous array 看到过这种问题,但是没有明确的答案来说明为什么要放置 deallocate
变量声明,我不会在其余部分使用代码中间的代码会出现这样的问题。
这个问题的根源在哪里?
我不确定我是否公平地回答了这个问题,但我对派生类型的实际经验是,使用不同的 MPI 实现处理它们的最安全方法是不使用任何高级 MPI 构造并保留所有派生类型在 Fortran 方面工作。
例如,我会编写 pure
函数来打包和扩展您的数据类型:
integer, parameter :: TYPE_CELL_BUFSIZE = 11
pure function type_cell_pack(this) result(buffer)
class(type_cell), intent(in) :: this
real(real64) :: buffer(TYPE_CELL_BUFSIZE)
buffer(1:8) = this%values_c
buffer(9:10) = this%xc
! It will be faster to not use a separate MPI command for this only
buffer(11) = real(this%flag_boundary,real64)
end function type_cell_pack
pure type(type_cell) function type_cell_unpack(buffer) result(this)
real(real64), intent(in) :: buffer(TYPE_CELL_BUFSIZE)
this%values_c = buffer(1:8)
this%xc = buffer(9:10)
this%flag_boundary = nint(buffer(11))
end function type_cell_unpack
然后仅使用 MPI_send 和 MPI_recv 为 MPI 通信编写两个包装器,就像标量一样:
subroutine type_cell_send_scalar(this,fromCpu,toCpu,mpiWorld)
type(type_cell), intent(inout) :: this
integer, intent(in) :: fromCpu,toCpu,mpiWorld
real(real64) :: mpibuf(TYPE_CELL_BUFSIZE)
if (cpuid==fromCpu) then
mpibuf = type_cell_pack(this)
call mpi_send(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
elseif (cpuid==toCpu) then
call mpi_recv(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
this = type_cell_unpack(mpibuf)
endif
end subroutine type_cell_send_scalar
数组数量如下:
subroutine type_cell_send_array(these,fromCpu,toCpu,mpiWorld)
type(type_cell), intent(inout) :: these(:)
integer, intent(in) :: fromCpu,toCpu,mpiWorld
integer :: i,ncell,bufsize
real(real64) :: mpibuf(TYPE_CELL_BUFSIZE*size(these))
ncell = size(these)
bufsize = ncell*TYPE_CELL_BUFSIZE
if (cpuid==fromCpu) then
do i=1,ncell
mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE) = type_cell_pack(these(i))
end do
call mpi_send(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
elseif (cpuid==toCpu) then
call mpi_recv(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
do i=1,ncell
these(i) = type_cell_unpack(mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE))
end do
endif
end subroutine type_cell_send_array
我已经构建了一个使用 MPI_PACK
、MPI_SEND
、MPI_RECV
分发 Fortran 派生类型的最小示例,并且还交换了它们的边界以测试 MPI_SENDRECV
为 MPI_PACKED
派生类型。
代码运行良好,但它显示出一些奇怪的行为,如果我将 deallocate
语句放在代码中间,而我将其归因于某些内存损坏,而代码在 [=19 下运行良好=] 语句在代码末尾。 dellocate
语句在 main
脚本的左侧用 (*)
标记。
代码的流程是,
1) MPI_PACK
整个派生类型。
2) 使用 MPI_SEND
、MPI_RECV
和 MPI_UNPACK
分发以恢复派生类型
结构。
3) MPI_PACK
分布式本地派生类型的边界。
4) 使用 MPI_SENDRECV
我已经放置了我测试过的完全相同的代码,因此它们可以像 mpif90 mod_data_structure.f90 main.f90 -o main
一样很好地编译,并且问题将完全可重现。下面的结果是 mpirun -np 2 main
.
module mod_data_structure
implicit none
type type_cell
real(selected_real_kind(15,307)):: xc(2)
real(selected_real_kind(15,307)):: values_c(8)
integer :: flag_boundary
end type type_cell
type type_cell_list
type(type_cell) :: cell(13,13)
end type type_cell_list
type type_cell_list_local
type(type_cell),allocatable :: cell(:,:)
end type type_cell_list_local
end module mod_data_structure
program main
use MPI
use mod_data_structure
implicit none
integer,parameter :: nxmax = 9, nymax = 9, nbc = 2
integer :: i, j, k, ii, jj
type(type_cell_list) :: A
type(type_cell_list_local) :: A_local
type(type_cell) :: acell
character(len=20) :: write_fmt
! MPI variables
integer :: n_proc, my_id, ierr, source, dest
integer :: tag, tag_send, tag_recv
integer :: status ( MPI_STATUS_SIZE ), &
status_l ( MPI_STATUS_SIZE ), &
status_r ( MPI_STATUS_SIZE )
integer,allocatable :: local_size(:), local_start(:)
real(selected_real_kind(15,307)):: tmp
character,allocatable :: buffer(:), buffer_l(:), buffer_lg(:), buffer_r(:), buffer_rg(:)
integer :: bufsize, bufsize_gc
integer :: left_proc, right_proc
integer :: DBL_SIZE, INT_SIZE, position_local
integer :: position_l, position_r
integer,allocatable :: position(:)
call MPI_INIT ( ierr )
call MPI_COMM_RANK ( MPI_COMM_WORLD, my_id, ierr )
call MPI_COMM_SIZE ( MPI_COMM_WORLD, n_proc, ierr )
call MPI_PACK_SIZE(1,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,DBL_SIZE,ierr)
call MPI_PACK_SIZE(1,MPI_INTEGER ,MPI_COMM_WORLD,INT_SIZE,ierr)
! Construct the derived data types
if ( my_id .eq. 0 ) then
do i = 1,nxmax+2*nbc
do j = 1,nymax+2*nbc
A%cell(i,j)%flag_boundary = 0
do k = 1,8
A%cell(i,j)%values_c(k) = 0.d0
enddo
do k = 1,2
A%cell(i,j)%xc(k) = 0.d0
enddo
enddo
enddo
do i = 1+nbc,nxmax+nbc
do j = 1+nbc,nymax+nbc
ii = i - nbc
jj = j - nbc
A%cell(i,j)%flag_boundary = 10*ii + jj
do k = 1,8
A%cell(i,j)%values_c(k) = 1.d1*ii + jj + 0.1d0*k
enddo
do k = 1,2
A%cell(i,j)%xc(k) = 1.d1*ii + jj + 0.1d0*k
enddo
enddo
enddo
write(write_fmt, '(a,i,a)') '(',nymax+2*nbc,'i3)'
write(*,*) 'my_id ', my_id
write(*,*) 'Total flag_boundary'
do i = 1,nxmax+2*nbc
write(*,write_fmt) A%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!*** Test MPI_PACK and MPI_SEND / MPI_RECV
! Prepare for the distribution
allocate ( local_size(n_proc), local_start(n_proc), position(n_proc) )
local_size = 0
local_start = 1
tmp = (nymax+2*nbc) / n_proc
! 'local_size'
do i = 1,n_proc-1
local_size(i) = ceiling(tmp)
enddo
local_size(n_proc) = nymax + 2*nbc - (n_proc - 1)*ceiling(tmp)
allocate ( A_local%cell(nxmax+2*nbc,local_size(my_id+1)) ) ! ###
! 'local_start'
do i = 1,n_proc-1
local_start(i+1:n_proc) = local_start(i+1:n_proc) + local_size(i)
enddo
! allocate 'buffer'
bufsize = maxval(local_size) * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
allocate ( buffer(bufsize) )
position = 0
if ( my_id .eq. 0 ) then
! Assign 'A_local' for 'my_id .eq. 0' itself
do j = 1, local_size(my_id+1)
do i = 1, nxmax+2*nbc
A_local%cell(i,j) = A%cell(i,j)
enddo
enddo
do k = 2, n_proc ! w/o 'my_id .eq. 0' itself
do j = local_start(k), local_start(k) + local_size(k) - 1
do i = 1,nxmax+2*nbc
acell = A%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
enddo
enddo
dest = k-1 ! ###
tag = k-1
call MPI_SEND (buffer, bufsize, MPI_PACKED, dest, tag, MPI_COMM_WORLD, ierr )
enddo
else ! ( my_id .ne. 0 ) then
source = 0
tag = my_id
call MPI_RECV (buffer, bufsize, MPI_PACKED, source, tag, MPI_COMM_WORLD, status, ierr )
position_local = 0
do j = 1, local_size(my_id+1)
do i = 1, nxmax+2*nbc
call MPI_UNPACK (buffer, bufsize, position_local, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer, bufsize, position_local, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer, bufsize, position_local, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
endif
(*)!deallocate ( buffer )
do k = 1,n_proc
if ( my_id .eq. (k-1) ) then
write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
write(*,*) ' Before MPI_SENDRECV'
write(*,*) 'my_id ', my_id
write(*,*) 'cols ', local_size(my_id+1)
do i = 1,nxmax+2*nbc
write(*,write_fmt) A_local%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!call MPI_BARRIER ( MPI_COMM_WORLD, ierr )
enddo
! Test MPI_SENDRECV
bufsize_gc = nbc * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
allocate ( buffer_l(bufsize_gc), buffer_lg(bufsize_gc), buffer_r(bufsize_gc), buffer_rg(bufsize_gc) )
! 'left_proc'
if ( my_id .eq. 0 ) then
left_proc = MPI_PROC_NULL
else ! ( my_id .ne. 0 ) then
left_proc = my_id - 1
endif
! 'right_proc'
if ( my_id .eq. n_proc-1 ) then
right_proc = MPI_PROC_NULL
else ! ( my_id .ne. n_proc - 1 )
right_proc = my_id + 1
endif
! pack 'buffer_l' & 'buffer_r'
position_l = 0
do j = 1,nbc
do i = 1,nxmax+2*nbc
acell = A_local%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
enddo
enddo
position_r = 0
do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
do i = 1,nxmax+2*nbc
acell = A_local%cell(i,j)
call MPI_PACK(acell%xc, 2, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%values_c, 8, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER , buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
enddo
enddo
tag_send = my_id
tag_recv = right_proc
call MPI_SENDRECV (buffer_l, bufsize_gc, MPI_PACKED, left_proc, 0, &
buffer_rg, bufsize_gc, MPI_PACKED, right_proc, 0, &
MPI_COMM_WORLD, status_l, ierr )
tag_send = my_id
tag_recv = left_proc
call MPI_SENDRECV (buffer_r, bufsize_gc, MPI_PACKED, right_proc, 0, &
buffer_lg, bufsize_gc, MPI_PACKED, left_proc, 0, &
MPI_COMM_WORLD, status_r, ierr )
! fill left boundary
position_l = 0
do j = 1,nbc
do i = 1,nxmax+2*nbc
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
! fill right boundary
position_r = 0
do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
do i = 1,nxmax+2*nbc
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%xc, 2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%values_c, 8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%flag_boundary, 1, MPI_INTEGER , MPI_COMM_WORLD, ierr)
A_local%cell(i,j) = acell
enddo
enddo
do k = 1,n_proc
if ( my_id .eq. (k-1) ) then
write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
write(*,*) ' After MPI_SENDRECV'
write(*,*) 'my_id ', my_id
write(*,*) 'cols ', local_size(my_id+1)
do i = 1,nxmax+2*nbc
write(*,write_fmt) A_local%cell(i,:)%flag_boundary
enddo
write(*,*) ' '
endif
!call MPI_BARRIER ( MPI_COMM_WORLD, ierr )
enddo
(*)deallocate ( buffer )
deallocate ( buffer_l, buffer_lg, buffer_r, buffer_rg )
call MPI_FINALIZE ( ierr )
end program
代码中间有 deallocate(buffer)
,部分输出如下所示,符合我的预期。
After MPI_SENDRECV
my_id 0
cols 6
0 0 0 0 0 0
0 0 0 0 0 0
0 0 11 12 15 16
0 0 21 22 25 26
0 0 31 32 35 36
0 0 41 42 45 46
0 0 51 52 55 56
0 0 61 62 65 66
0 0 71 72 75 76
0 0 81 82 85 86
0 0 91 92 95 96
0 0 0 0 0 0
0 0 0 0 0 0
但是如果我在代码中间找到 deallocate(buffer)
,输出的相同部分看起来像这样。
After MPI_SENDRECV
my_id 0
cols 6
0 0 0 0 0 0
****** 0 0 0 0
****** 11 12 15 16
****** 21 22 25 26
****** 31 32 35 36
****** 41 42 45 46
****** 51 52 55 56
****** 61 62 65 66
****** 71 72 75 76
****** 81 82 85 86
0 0 91 92 95 96
0 0 0 0 0 0
0 0 0 0 0 0
如果我更改 write
格式以显示更多整数位数,它们是 10 位整数,如 1079533568
.
我在 Segmentation Fault using MPI_Sendrecv with a 2D contiguous array 看到过这种问题,但是没有明确的答案来说明为什么要放置 deallocate
变量声明,我不会在其余部分使用代码中间的代码会出现这样的问题。
这个问题的根源在哪里?
我不确定我是否公平地回答了这个问题,但我对派生类型的实际经验是,使用不同的 MPI 实现处理它们的最安全方法是不使用任何高级 MPI 构造并保留所有派生类型在 Fortran 方面工作。
例如,我会编写 pure
函数来打包和扩展您的数据类型:
integer, parameter :: TYPE_CELL_BUFSIZE = 11
pure function type_cell_pack(this) result(buffer)
class(type_cell), intent(in) :: this
real(real64) :: buffer(TYPE_CELL_BUFSIZE)
buffer(1:8) = this%values_c
buffer(9:10) = this%xc
! It will be faster to not use a separate MPI command for this only
buffer(11) = real(this%flag_boundary,real64)
end function type_cell_pack
pure type(type_cell) function type_cell_unpack(buffer) result(this)
real(real64), intent(in) :: buffer(TYPE_CELL_BUFSIZE)
this%values_c = buffer(1:8)
this%xc = buffer(9:10)
this%flag_boundary = nint(buffer(11))
end function type_cell_unpack
然后仅使用 MPI_send 和 MPI_recv 为 MPI 通信编写两个包装器,就像标量一样:
subroutine type_cell_send_scalar(this,fromCpu,toCpu,mpiWorld)
type(type_cell), intent(inout) :: this
integer, intent(in) :: fromCpu,toCpu,mpiWorld
real(real64) :: mpibuf(TYPE_CELL_BUFSIZE)
if (cpuid==fromCpu) then
mpibuf = type_cell_pack(this)
call mpi_send(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
elseif (cpuid==toCpu) then
call mpi_recv(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
this = type_cell_unpack(mpibuf)
endif
end subroutine type_cell_send_scalar
数组数量如下:
subroutine type_cell_send_array(these,fromCpu,toCpu,mpiWorld)
type(type_cell), intent(inout) :: these(:)
integer, intent(in) :: fromCpu,toCpu,mpiWorld
integer :: i,ncell,bufsize
real(real64) :: mpibuf(TYPE_CELL_BUFSIZE*size(these))
ncell = size(these)
bufsize = ncell*TYPE_CELL_BUFSIZE
if (cpuid==fromCpu) then
do i=1,ncell
mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE) = type_cell_pack(these(i))
end do
call mpi_send(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
elseif (cpuid==toCpu) then
call mpi_recv(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
do i=1,ncell
these(i) = type_cell_unpack(mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE))
end do
endif
end subroutine type_cell_send_array