fortran90 deallocate 语句的内存损坏

Memory corruption with fortran90 deallocate statement

我已经构建了一个使用 MPI_PACKMPI_SENDMPI_RECV 分发 Fortran 派生类型的最小示例,并且还交换了它们的边界以测试 MPI_SENDRECVMPI_PACKED 派生类型。

代码运行良好,但它显示出一些奇怪的行为,如果我将 deallocate 语句放在代码中间,而我将其归因于某些内存损坏,而代码在 [=19 下运行良好=] 语句在代码末尾。 dellocate 语句在 main 脚本的左侧用 (*) 标记。

代码的流程是,

1) MPI_PACK 整个派生类型。

2) 使用 MPI_SENDMPI_RECVMPI_UNPACK 分发以恢复派生类型 结构。

3) MPI_PACK 分布式本地派生类型的边界。

4) 使用 MPI_SENDRECV

在相邻处理器之间交换边界

我已经放置了我测试过的完全相同的代码,因此它们可以像 mpif90 mod_data_structure.f90 main.f90 -o main 一样很好地编译,并且问题将完全可重现。下面的结果是 mpirun -np 2 main.

的输出
module mod_data_structure
  implicit none

  type type_cell
    real(selected_real_kind(15,307)):: xc(2)
    real(selected_real_kind(15,307)):: values_c(8)
    integer                      :: flag_boundary
  end type type_cell

  type type_cell_list
    type(type_cell)              :: cell(13,13)
  end type type_cell_list

  type type_cell_list_local
    type(type_cell),allocatable  :: cell(:,:)
  end type type_cell_list_local

end module mod_data_structure
program main
  use MPI
  use mod_data_structure
  implicit none

  integer,parameter          :: nxmax = 9, nymax = 9, nbc = 2
  integer                    :: i, j, k, ii, jj
  type(type_cell_list)       :: A
  type(type_cell_list_local) :: A_local
  type(type_cell)            :: acell
  character(len=20)          :: write_fmt

  ! MPI variables
  integer                    :: n_proc, my_id, ierr, source, dest
  integer                    :: tag, tag_send, tag_recv
  integer                    :: status ( MPI_STATUS_SIZE ), &
                                status_l ( MPI_STATUS_SIZE ), &
                                status_r ( MPI_STATUS_SIZE )
  integer,allocatable        :: local_size(:), local_start(:)
  real(selected_real_kind(15,307)):: tmp
  character,allocatable      :: buffer(:), buffer_l(:), buffer_lg(:), buffer_r(:), buffer_rg(:) 
  integer                    :: bufsize, bufsize_gc
  integer                    :: left_proc, right_proc
  integer                    :: DBL_SIZE, INT_SIZE, position_local
  integer                    :: position_l, position_r
  integer,allocatable        :: position(:)

  call MPI_INIT ( ierr )
  call MPI_COMM_RANK ( MPI_COMM_WORLD, my_id,  ierr )
  call MPI_COMM_SIZE ( MPI_COMM_WORLD, n_proc, ierr )

  call MPI_PACK_SIZE(1,MPI_DOUBLE_PRECISION,MPI_COMM_WORLD,DBL_SIZE,ierr)
  call MPI_PACK_SIZE(1,MPI_INTEGER         ,MPI_COMM_WORLD,INT_SIZE,ierr)

  ! Construct the derived data types
  if ( my_id .eq. 0 ) then

    do i = 1,nxmax+2*nbc
      do j = 1,nymax+2*nbc
        A%cell(i,j)%flag_boundary = 0
        do k =  1,8
          A%cell(i,j)%values_c(k) = 0.d0
        enddo
        do k = 1,2
          A%cell(i,j)%xc(k) = 0.d0
        enddo
      enddo
    enddo

    do i = 1+nbc,nxmax+nbc
      do j = 1+nbc,nymax+nbc
        ii = i - nbc
        jj = j - nbc
        A%cell(i,j)%flag_boundary = 10*ii + jj
        do k = 1,8
          A%cell(i,j)%values_c(k) = 1.d1*ii + jj + 0.1d0*k
        enddo
        do k = 1,2
          A%cell(i,j)%xc(k) = 1.d1*ii + jj + 0.1d0*k
        enddo
      enddo
    enddo

    write(write_fmt, '(a,i,a)') '(',nymax+2*nbc,'i3)'
    write(*,*) 'my_id ', my_id
    write(*,*) 'Total flag_boundary'
    do i = 1,nxmax+2*nbc
      write(*,write_fmt) A%cell(i,:)%flag_boundary
    enddo
    write(*,*) ' '

  endif

  !*** Test MPI_PACK and MPI_SEND / MPI_RECV
  ! Prepare for the distribution
  allocate ( local_size(n_proc), local_start(n_proc), position(n_proc) )
  local_size  = 0
  local_start = 1

  tmp  = (nymax+2*nbc) / n_proc
  ! 'local_size'
  do i = 1,n_proc-1
    local_size(i) = ceiling(tmp)
  enddo
  local_size(n_proc) = nymax + 2*nbc - (n_proc - 1)*ceiling(tmp)

  allocate ( A_local%cell(nxmax+2*nbc,local_size(my_id+1)) )  ! ###

  ! 'local_start'
  do i = 1,n_proc-1
    local_start(i+1:n_proc) = local_start(i+1:n_proc) + local_size(i)
  enddo

  ! allocate 'buffer'
  bufsize = maxval(local_size) * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
  allocate ( buffer(bufsize) )

  position = 0
  if ( my_id .eq. 0 ) then

    ! Assign 'A_local' for 'my_id .eq. 0' itself
    do j = 1, local_size(my_id+1)
      do i = 1, nxmax+2*nbc
        A_local%cell(i,j) = A%cell(i,j)
      enddo
    enddo

    do k = 2, n_proc ! w/o 'my_id .eq. 0' itself
      do j = local_start(k), local_start(k) + local_size(k) - 1
        do i = 1,nxmax+2*nbc
          acell = A%cell(i,j)
          call MPI_PACK(acell%xc,            2, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
          call MPI_PACK(acell%values_c,      8, MPI_DOUBLE_PRECISION, buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
          call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER         , buffer, bufsize, position(k), MPI_COMM_WORLD, ierr)
        enddo
      enddo

      dest = k-1 ! ###
      tag  = k-1
      call MPI_SEND (buffer, bufsize, MPI_PACKED, dest, tag, MPI_COMM_WORLD, ierr )

    enddo

  else ! ( my_id .ne. 0 ) then

    source = 0
    tag    = my_id
    call MPI_RECV (buffer, bufsize, MPI_PACKED, source, tag, MPI_COMM_WORLD, status, ierr )

    position_local = 0
    do j = 1, local_size(my_id+1)
      do i = 1, nxmax+2*nbc
        call MPI_UNPACK (buffer, bufsize, position_local, acell%xc,            2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
        call MPI_UNPACK (buffer, bufsize, position_local, acell%values_c,      8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
        call MPI_UNPACK (buffer, bufsize, position_local, acell%flag_boundary, 1, MPI_INTEGER         , MPI_COMM_WORLD, ierr)

        A_local%cell(i,j) = acell 
      enddo
    enddo

  endif

(*)!deallocate ( buffer ) 

  do k = 1,n_proc
    if ( my_id .eq. (k-1) ) then
      write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
      write(*,*) ' Before MPI_SENDRECV'
      write(*,*) 'my_id ', my_id
      write(*,*) 'cols  ', local_size(my_id+1)
      do i = 1,nxmax+2*nbc
        write(*,write_fmt) A_local%cell(i,:)%flag_boundary
      enddo
      write(*,*) ' '
    endif
    !call MPI_BARRIER ( MPI_COMM_WORLD, ierr )    
  enddo

  ! Test MPI_SENDRECV 
  bufsize_gc = nbc * ( nxmax + 2*nbc ) * ( (8+2)*DBL_SIZE + (1)*INT_SIZE )
  allocate ( buffer_l(bufsize_gc), buffer_lg(bufsize_gc), buffer_r(bufsize_gc), buffer_rg(bufsize_gc) )

  ! 'left_proc'
  if ( my_id .eq. 0 ) then
    left_proc = MPI_PROC_NULL
  else ! ( my_id .ne. 0 ) then
    left_proc = my_id - 1
  endif

  ! 'right_proc'
  if ( my_id .eq. n_proc-1 ) then
    right_proc = MPI_PROC_NULL
  else ! ( my_id .ne. n_proc - 1 )
    right_proc = my_id + 1
  endif

  ! pack 'buffer_l' & 'buffer_r'
  position_l = 0
  do j = 1,nbc
    do i = 1,nxmax+2*nbc
      acell = A_local%cell(i,j)
      call MPI_PACK(acell%xc,            2, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
      call MPI_PACK(acell%values_c,      8, MPI_DOUBLE_PRECISION, buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
      call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER         , buffer_l, bufsize_gc, position_l, MPI_COMM_WORLD, ierr)
    enddo
  enddo

  position_r = 0
  do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
    do i = 1,nxmax+2*nbc
      acell = A_local%cell(i,j)
      call MPI_PACK(acell%xc,            2, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
      call MPI_PACK(acell%values_c,      8, MPI_DOUBLE_PRECISION, buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
      call MPI_PACK(acell%flag_boundary, 1, MPI_INTEGER         , buffer_r, bufsize_gc, position_r, MPI_COMM_WORLD, ierr)
    enddo
  enddo

  tag_send = my_id
  tag_recv = right_proc
  call MPI_SENDRECV (buffer_l,  bufsize_gc, MPI_PACKED, left_proc,  0, &
                     buffer_rg, bufsize_gc, MPI_PACKED, right_proc, 0, &
                     MPI_COMM_WORLD, status_l, ierr )

  tag_send = my_id
  tag_recv = left_proc
  call MPI_SENDRECV (buffer_r,  bufsize_gc, MPI_PACKED, right_proc,  0, &
                     buffer_lg, bufsize_gc, MPI_PACKED, left_proc,   0, &
                     MPI_COMM_WORLD, status_r, ierr )

  ! fill left boundary
  position_l = 0 
  do j = 1,nbc
    do i = 1,nxmax+2*nbc
      call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%xc,            2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
      call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%values_c,      8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
      call MPI_UNPACK (buffer_lg, bufsize_gc , position_l, acell%flag_boundary, 1, MPI_INTEGER         , MPI_COMM_WORLD, ierr)

      A_local%cell(i,j) = acell
    enddo
  enddo

  ! fill right boundary
  position_r = 0
  do j = local_size(my_id+1)-nbc+1, local_size(my_id+1)-nbc+nbc
    do i = 1,nxmax+2*nbc
      call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%xc,            2, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
      call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%values_c,      8, MPI_DOUBLE_PRECISION, MPI_COMM_WORLD, ierr)
      call MPI_UNPACK (buffer_rg, bufsize_gc , position_r, acell%flag_boundary, 1, MPI_INTEGER         , MPI_COMM_WORLD, ierr)

      A_local%cell(i,j) = acell
    enddo
  enddo

  do k = 1,n_proc
    if ( my_id .eq. (k-1) ) then
      write(write_fmt, '(a,i,a)') '(',local_size(my_id+1),'i3)'
      write(*,*) ' After MPI_SENDRECV'
      write(*,*) 'my_id ', my_id
      write(*,*) 'cols  ', local_size(my_id+1)
      do i = 1,nxmax+2*nbc
        write(*,write_fmt) A_local%cell(i,:)%flag_boundary
      enddo
      write(*,*) ' '
    endif
    !call MPI_BARRIER ( MPI_COMM_WORLD, ierr )    
  enddo

(*)deallocate ( buffer )
  deallocate ( buffer_l, buffer_lg, buffer_r, buffer_rg )

  call MPI_FINALIZE ( ierr )

end program

代码中间有 deallocate(buffer),部分输出如下所示,符合我的预期。

  After MPI_SENDRECV
 my_id            0
 cols             6
  0  0  0  0  0  0
  0  0  0  0  0  0
  0  0 11 12 15 16
  0  0 21 22 25 26
  0  0 31 32 35 36
  0  0 41 42 45 46
  0  0 51 52 55 56
  0  0 61 62 65 66
  0  0 71 72 75 76
  0  0 81 82 85 86
  0  0 91 92 95 96
  0  0  0  0  0  0
  0  0  0  0  0  0

但是如果我在代码中间找到 deallocate(buffer),输出的相同部分看起来像这样。

  After MPI_SENDRECV
 my_id            0
 cols             6
  0  0  0  0  0  0
******  0  0  0  0
****** 11 12 15 16
****** 21 22 25 26
****** 31 32 35 36
****** 41 42 45 46
****** 51 52 55 56
****** 61 62 65 66
****** 71 72 75 76
****** 81 82 85 86
  0  0 91 92 95 96
  0  0  0  0  0  0
  0  0  0  0  0  0

如果我更改 write 格式以显示更多整数位数,它们是 10 位整数,如 1079533568.

我在 Segmentation Fault using MPI_Sendrecv with a 2D contiguous array 看到过这种问题,但是没有明确的答案来说明为什么要放置 deallocate 变量声明,我不会在其余部分使用代码中间的代码会出现这样的问题。

这个问题的根源在哪里?

我不确定我是否公平地回答了这个问题,但我对派生类型的实际经验是,使用不同的 MPI 实现处理它们的最安全方法是不使用任何高级 MPI 构造并保留所有派生类型在 Fortran 方面工作。

例如,我会编写 pure 函数来打包和扩展您的数据类型:

integer, parameter :: TYPE_CELL_BUFSIZE = 11

pure function type_cell_pack(this) result(buffer)
   class(type_cell), intent(in) :: this
   real(real64) :: buffer(TYPE_CELL_BUFSIZE)

   buffer(1:8) = this%values_c
   buffer(9:10) = this%xc

   ! It will be faster to not use a separate MPI command for this only
   buffer(11) = real(this%flag_boundary,real64)

end function type_cell_pack

pure type(type_cell) function type_cell_unpack(buffer) result(this)
   real(real64), intent(in) :: buffer(TYPE_CELL_BUFSIZE)

   this%values_c = buffer(1:8)
   this%xc = buffer(9:10)
   this%flag_boundary = nint(buffer(11))

end function type_cell_unpack

然后仅使用 MPI_send 和 MPI_recv 为 MPI 通信编写两个包装器,就像标量一样:

subroutine type_cell_send_scalar(this,fromCpu,toCpu,mpiWorld)
   type(type_cell), intent(inout) :: this
   integer, intent(in) :: fromCpu,toCpu,mpiWorld

   real(real64) :: mpibuf(TYPE_CELL_BUFSIZE)

   if (cpuid==fromCpu) then 
      mpibuf = type_cell_pack(this)
      call mpi_send(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
   elseif (cpuid==toCpu) then 
      call mpi_recv(...,mpibuf,...,MPI_DOUBLE_PRECISION,...)
      this = type_cell_unpack(mpibuf)
   endif

end subroutine type_cell_send_scalar

数组数量如下:

subroutine type_cell_send_array(these,fromCpu,toCpu,mpiWorld)
   type(type_cell), intent(inout) :: these(:)
   integer, intent(in) :: fromCpu,toCpu,mpiWorld

   integer :: i,ncell,bufsize
   real(real64) :: mpibuf(TYPE_CELL_BUFSIZE*size(these))

   ncell = size(these)
   bufsize = ncell*TYPE_CELL_BUFSIZE

   if (cpuid==fromCpu) then 
      do i=1,ncell
        mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE) = type_cell_pack(these(i))
      end do

      call mpi_send(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
   elseif (cpuid==toCpu) then 
      call mpi_recv(bufsize,mpibuf,...,MPI_DOUBLE_PRECISION,...)
      do i=1,ncell
        these(i) = type_cell_unpack(mpibuf((i-1)*TYPE_CELL_BUFSIZE+1:i*TYPE_CELL_BUFSIZE))
      end do
   endif

end subroutine type_cell_send_array