大型数组的异常 mpi 行为
Abnormal mpi behaviour for large array
在下面的数组求和代码中给出正确答案,当我使用 max_rows=10,100,1000,10000 时,但是当我使用 max_rows=100000 或更多时,我变得异常回答,甚至我从其中一个过程中得到负的部分和。
program sum_vector
use mpi
implicit none
integer,parameter::max_rows=100000
integer::myrank,master=0,ierr,status(mpi_status_size),num_procs
integer::i,rank,avg_rows_per_procs,sender
integer::num_rows_to_send,num_rows_to_receive,start_row,end_row,partial_sum,total_sum,st1,st2
integer,allocatable::vector(:),vector2(:)
allocate(vector(max_rows),stat=st1)
allocate(vector2(max_rows),stat=st2)
if(st1/=0 .or. st2/=0)then
print*,'Cannot allocate'
stop
end if
call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world,myrank,ierr)
call mpi_comm_size(mpi_comm_world,num_procs,ierr)
if (myrank==0)then
do i=1,max_rows
vector(i)=i
end do
avg_rows_per_procs=max_rows/num_procs
do rank=1,num_procs-1
start_row=rank*avg_rows_per_procs+1
end_row=start_row+avg_rows_per_procs-1
if (rank==num_procs-1)end_row=max_rows
num_rows_to_send=end_row-start_row+1
call mpi_send(num_rows_to_send,1,mpi_int,rank,101,mpi_comm_world,ierr)
call mpi_send(vector(start_row),num_rows_to_send,mpi_int,rank,102,mpi_comm_world,ierr)
end do
total_sum=0
do i=1,avg_rows_per_procs
total_sum=total_sum+vector(i)
end do
print*,'Partial sum=',total_sum,'from root process'
do rank=1,num_procs-1
call mpi_recv(partial_sum,1,mpi_int,mpi_any_source,103,mpi_comm_world,status,ierr)
sender=status(mpi_source)
print*,'Partial sum=',partial_sum,'from rank',sender
total_sum=total_sum+partial_sum
end do
print*,'Total sum=',total_sum
else
call mpi_recv(num_rows_to_receive,1,mpi_int,master,mpi_any_tag,mpi_comm_world,status,ierr)
call mpi_recv(vector2,num_rows_to_receive,mpi_int,master,mpi_any_tag,mpi_comm_world,status,ierr)
partial_sum=0
do i=1,num_rows_to_receive
partial_sum=partial_sum+vector2(i)
end do
call mpi_send(partial_sum,1,mpi_int,master,103,mpi_comm_world,ierr)
end if
call mpi_finalize(ierr)
stop
end program sum_vector
似乎 total_sum
和 partial_sum
的大 max_rows
发生整数溢出,因为前者变得与 ~ max_rows**2
一样大。将声明更改为
use iso_fortran_env, only: int64
integer(int64) :: total_sum, partial_sum
并且 MPI 要求 sending/receiving partial_sum
作为
call mpi_recv(partial_sum,1,mpi_long_long_int,mpi_any_source,103,mpi_comm_world,status,ierr)
和
call mpi_send(partial_sum,1,mpi_long_long_int,master,103,mpi_comm_world,ierr)
可能给出了预期的结果。例如用max_rows = 100000
和4个进程(使用gfortran 4.7和openmpi 1.6.5)得到的结果是
Partial sum= 312512500 from root process
Partial sum= 937512500 from rank 1
Partial sum= 1562512500 from rank 2
Partial sum= 2187512500 from rank 3
Total sum= 5000050000
max_rows = 100000000
的结果是
Partial sum= 312500012500000 from root process
Partial sum= 937500012500000 from rank 1
Partial sum= 1562500012500000 from rank 2
Partial sum= 2187500012500000 from rank 3
Total sum= 5000000050000000
只要 max_rows
小于 ~ 2*10^9,此代码就有效。
补充说明:
确切答案是Total sum = max_rows * (max_rows + 1) / 2
(简单的从1到max_rows
的总和)。
integer
的最大数通常约为2*10^9(请参阅integer),所以如果max_rows
大于10^5 , (10^5)^2 / 2变得大于2*10^9,可能超过了integer
.
的限制
编辑:我已将 integer(8)
更改为 integer(int64)
以便它可以移植(请参阅@casey 的评论)。
在下面的数组求和代码中给出正确答案,当我使用 max_rows=10,100,1000,10000 时,但是当我使用 max_rows=100000 或更多时,我变得异常回答,甚至我从其中一个过程中得到负的部分和。
program sum_vector
use mpi
implicit none
integer,parameter::max_rows=100000
integer::myrank,master=0,ierr,status(mpi_status_size),num_procs
integer::i,rank,avg_rows_per_procs,sender
integer::num_rows_to_send,num_rows_to_receive,start_row,end_row,partial_sum,total_sum,st1,st2
integer,allocatable::vector(:),vector2(:)
allocate(vector(max_rows),stat=st1)
allocate(vector2(max_rows),stat=st2)
if(st1/=0 .or. st2/=0)then
print*,'Cannot allocate'
stop
end if
call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world,myrank,ierr)
call mpi_comm_size(mpi_comm_world,num_procs,ierr)
if (myrank==0)then
do i=1,max_rows
vector(i)=i
end do
avg_rows_per_procs=max_rows/num_procs
do rank=1,num_procs-1
start_row=rank*avg_rows_per_procs+1
end_row=start_row+avg_rows_per_procs-1
if (rank==num_procs-1)end_row=max_rows
num_rows_to_send=end_row-start_row+1
call mpi_send(num_rows_to_send,1,mpi_int,rank,101,mpi_comm_world,ierr)
call mpi_send(vector(start_row),num_rows_to_send,mpi_int,rank,102,mpi_comm_world,ierr)
end do
total_sum=0
do i=1,avg_rows_per_procs
total_sum=total_sum+vector(i)
end do
print*,'Partial sum=',total_sum,'from root process'
do rank=1,num_procs-1
call mpi_recv(partial_sum,1,mpi_int,mpi_any_source,103,mpi_comm_world,status,ierr)
sender=status(mpi_source)
print*,'Partial sum=',partial_sum,'from rank',sender
total_sum=total_sum+partial_sum
end do
print*,'Total sum=',total_sum
else
call mpi_recv(num_rows_to_receive,1,mpi_int,master,mpi_any_tag,mpi_comm_world,status,ierr)
call mpi_recv(vector2,num_rows_to_receive,mpi_int,master,mpi_any_tag,mpi_comm_world,status,ierr)
partial_sum=0
do i=1,num_rows_to_receive
partial_sum=partial_sum+vector2(i)
end do
call mpi_send(partial_sum,1,mpi_int,master,103,mpi_comm_world,ierr)
end if
call mpi_finalize(ierr)
stop
end program sum_vector
似乎 total_sum
和 partial_sum
的大 max_rows
发生整数溢出,因为前者变得与 ~ max_rows**2
一样大。将声明更改为
use iso_fortran_env, only: int64
integer(int64) :: total_sum, partial_sum
并且 MPI 要求 sending/receiving partial_sum
作为
call mpi_recv(partial_sum,1,mpi_long_long_int,mpi_any_source,103,mpi_comm_world,status,ierr)
和
call mpi_send(partial_sum,1,mpi_long_long_int,master,103,mpi_comm_world,ierr)
可能给出了预期的结果。例如用max_rows = 100000
和4个进程(使用gfortran 4.7和openmpi 1.6.5)得到的结果是
Partial sum= 312512500 from root process
Partial sum= 937512500 from rank 1
Partial sum= 1562512500 from rank 2
Partial sum= 2187512500 from rank 3
Total sum= 5000050000
max_rows = 100000000
的结果是
Partial sum= 312500012500000 from root process
Partial sum= 937500012500000 from rank 1
Partial sum= 1562500012500000 from rank 2
Partial sum= 2187500012500000 from rank 3
Total sum= 5000000050000000
只要 max_rows
小于 ~ 2*10^9,此代码就有效。
补充说明:
确切答案是
Total sum = max_rows * (max_rows + 1) / 2
(简单的从1到max_rows
的总和)。integer
的最大数通常约为2*10^9(请参阅integer),所以如果max_rows
大于10^5 , (10^5)^2 / 2变得大于2*10^9,可能超过了integer
. 的限制
编辑:我已将 integer(8)
更改为 integer(int64)
以便它可以移植(请参阅@casey 的评论)。