在 SCALAPACK 中找到分布式向量范数的有效方法
Efficient way to find norm of distributed vector in SCALAPACK
考虑以下使用 scalapack 的代码:
! if (norm2(h-x0) < tol) then
tmp_vec = h - x0
call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
if (norm < tol) then
x=h
converged = .true.
exit
endif
s = r0 - alpha*v
call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0,t, 1, 1, descvec, 1)
它是我尝试的迭代求解器的一部分,问题是如果我的处理器网格是二维的,我的向量在这些过程中没有任何元素,因此 dnrm2 产生零或 norm
变量.因此导致循环中的一些过程提前退出,挂起整个循环。
除了人工播报等方式之外,确保正常值被正确分配的正确方法是什么?
注意:这适用于 1-d procs 分布,请参阅:
下面是我从维基百科文章中写的一个简单的 Bi-CGSTAB 求解器,它分别从文件 b.dat 和 A.dat 中读取向量和矩阵。并继续使用 bicgstab_self_sclpk 例程来解决它。
下面打印的是norm
的值
排名=4 运行:
...
current norm2 0.0000000000000000
Bi-CG STAB solver converged in iteration # 1 0.0000000000000000
current norm2 0.0000000000000000
Bi-CG STAB solver converged in iteration # 1 0.0000000000000000
...
一切都挂在这里。
排名 = 7 运行
. . .
current norm2 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
. . .
module bicgstab
contains
subroutine bicgstab_self_sclpk(A,b,N,descvec,descmat,mloc_vec,nloc_vec)
use mpi
implicit none
real :: A(:,:), b(:,:), tol
integer(kind=8) :: N
integer :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec
integer :: i, ierr, rank, maxit
real :: rho0, alpha, omega0, rho, omega, beta, norm, tmp_real
real, allocatable :: r0(:,:), r(:,:), x0(:,:), x(:,:),h(:,:),t(:,:), tmp_vec(:,:)
real, allocatable :: rhat0(:,:),v(:,:), p(:,:), v0(:,:),p0(:,:),s(:,:)
logical :: converged
! ================== Initialize ======================
allocate(r0(mloc_vec,nloc_vec))
allocate(r(mloc_vec,nloc_vec))
allocate(rhat0(mloc_vec,nloc_vec))
allocate(x0(mloc_vec,nloc_vec))
allocate(x(mloc_vec,nloc_vec))
allocate(v0(mloc_vec,nloc_vec))
allocate(v(mloc_vec,nloc_vec))
allocate(p0(mloc_vec,nloc_vec))
allocate(p(mloc_vec,nloc_vec))
allocate(h(mloc_vec,nloc_vec))
allocate(s(mloc_vec,nloc_vec))
allocate(t(mloc_vec,nloc_vec))
allocate(tmp_vec(mloc_vec,nloc_vec))
x0 = 0
r0 = 0
r = 0
x = 0
v0 = 0
v = 0
p0 = 0
p = 0
h = 0
s = 0
t = 0
norm= 0
rhat0 = 0
rho0 = 1
rho = 0
alpha = 1
omega0 = 1
omega = 0
beta = 0
converged = .false.
r0(1:mloc_vec,1:nloc_vec) = b(1:mloc_vec,1:nloc_vec)
rhat0 = r0
tol = 1E-6
maxiter = 1000
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
print *, rank , mloc_vec, nloc_vec
! print *, "rank",rank,"descmat",descmat
! print *, "rank",rank,"descvec",descvec
! ======================================================
! ================Loop==================================
do i = 1, maxiter
! rho = dot_product(rhat0(:,1),r0(:,1))
call pddot(N,rho, rhat0, 1,1,descvec,1,r0,1,1,descvec,1)
beta = (rho/rho0)*(alpha/omega0)
p = r0 + beta*(p0 - omega0*v0)
! v = matmul(A,p)
call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, p, 1, 1, descvec, 1, 0.0,v, 1, 1, descvec, 1)
! alpha = rho/dot_product(rhat0(:,1),v(:,1))
call pddot(N,alpha,rhat0, 1,1,descvec,1,v,1,1,descvec,1)
alpha = rho/alpha
h = x0 + alpha*p
! if (norm2(h-x0) < tol) then
tmp_vec = h - x0
norm = 999.0
call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
! print *, "current norm1", norm, rank
if (norm < tol) then
x=h
converged = .true.
exit
endif
if (i==1) print *,"rank",rank,"was here"
s = r0 - alpha*v
! t = matmul(A,s)
call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0,t, 1, 1, descvec, 1)
! call pdgemm('N', 'N', N, 1, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 0.0, tmp_vec, 1, 1, descvec)
! t = tmp_vec
! omega = dot_product(t(:,1),s(:,1))/dot_product(t(:,1),t(:,1))
call pddot(N,omega,t, 1,1,descvec,1,s,1,1,descvec,1)
call pddot(N,tmp_real,t, 1,1,descvec,1,t,1,1,descvec,1)
omega = omega/tmp_real
x = h + omega*s
! if (norm2(x-x0)<tol) then
tmp_vec = x - x0
norm = 1000000
call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
if (norm < tol) then
print *, "current norm2", norm
converged = .true.
exit
endif
r = s - omega*t
x0 =x
rho0 = rho
p0 = p
r0 = r
v0 = v
omega0 = omega
enddo
! =========================================================
if (converged) then
print *, "Bi-CG STAB solver converged in iteration #", i, norm
else
print *, "Maximum iteration cycles reached"
endif
call MPI_Barrier(MPI_COMM_WORLD,ierr)
b = x
! print *,"rank ",rank
! =================clean up!===============================
deallocate(r0)
deallocate(r)
deallocate(rhat0)
deallocate(x0)
deallocate(x)
deallocate(v0)
deallocate(v)
deallocate(p0)
deallocate(p)
deallocate(h)
deallocate(s)
deallocate(t)
print *,"End of bicgstab"
end subroutine bicgstab_self_sclpk
end module bicgstab
program test_bicgstab
use bicgstab
use mpi
implicit none
character, parameter :: UPLO="U"
character(len=7) :: char_size
integer :: info
integer(kind=8) :: N, i, j
real(kind=8), allocatable :: A_global(:,:), b_global(:,:)
integer(kind=8) :: count_start, count_end,count_rate, dummy_int
real(kind=8) :: time
! =========================BLACS and MPI=======================
integer :: ierr, size, rank,dims(2)
! -------------------------------------------------------------
integer, parameter :: block_size = 100
integer :: context, nprow, npcol, local_nprow, local_npcol
integer :: numroc, indxl2g, descmat(9),descvec(9)
integer :: mloc_mat ,nloc_mat ,mloc_vec ,nloc_vec
real(kind=8), allocatable :: A(:,:), x(:,:), b(:,:)
call blacs_pinfo(rank,size)
dims=0
call MPI_Dims_create(size, 2, dims, ierr)
nprow = dims(1);npcol = dims(2)
call blacs_get(0,0,context)
call blacs_gridinit(context, 'R', nprow, npcol)
call blacs_gridinfo(context, nprow, npcol, local_nprow,local_npcol)
N = 700
allocate(A_global(N,N))
if (rank==0) open(101,file='A.dat')
do i = 1, N
if (rank==0) read(101,*) A_global(i,1:N)
call MPI_Bcast(A_global(i,1:N), N,MPI_DOUBLE,0,MPI_COMM_WORLD, ierr)
enddo
if (rank==0) close(101)
mloc_mat = numroc(N,block_size,local_nprow,0,nprow)
nloc_mat = numroc(N,block_size,local_npcol,0, npcol)
allocate(A(mloc_mat,nloc_mat))
do i = 1, mloc_mat
do j = 1,nloc_mat
A(i,j) = A_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
enddo
enddo
if (rank==0) print *, "Read matrix"
allocate(b_global(N,1))
if (rank==0) then
open(103,file='b.dat')
do i = 1, N
read(103,*) b_global(i,1)
enddo
close(103)
endif
call MPI_Bcast(b_global(:,1), N,MPI_DOUBLE,0,MPI_COMM_WORLD, ierr)
! set up scalapack shared matrices
if (rank==0) print *, "Matrix broadcasted"
mloc_vec = numroc(N,block_size,local_nprow,0, nprow)
nloc_vec = numroc(1,block_size,local_npcol,0, npcol)
print *,"Rank", rank, mloc_vec, nloc_vec
allocate(b(mloc_vec,nloc_vec))
allocate(x(mloc_vec,nloc_vec))
do i = 1, mloc_vec
do j = 1,nloc_vec
b(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
x(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
enddo
enddo
call descinit(descmat , N, N, block_size, block_size, 0,0,context,max(1,mloc_mat),info)
call descinit(descvec , N, 1, block_size, block_size, 0,0,context,max(1,mloc_vec),info)
if (rank==0) print *, "Set up done,solving"
! setup done, call in the cavalary
call MPI_Barrier(MPI_COMM_WORLD, ierr)
call bicgstab_self_sclpk(A,x,N, descvec, descmat,mloc_vec,nloc_vec)
! print *,x
call MPI_Barrier(MPI_COMM_WORLD,ierr)
call blacs_gridexit(context)
call blacs_exit(0)
end program test_bicgstab
如果需要,矩阵文件可以在这里下载:https://github.com/ipcamit/temp_so
好的,首先,您发布的代码存在很多问题,请参阅下面我认为是使用随机数代替文件的更正版本。特别是你真的需要更加小心各种 reals
,我建议始终使用默认的 integer
s,你不需要长的 integer
s,并尝试使用它们只会造成不必要的痛苦。
也就是说,我认为您 运行 发现了 pdnrm2 中的错误。查看
中的源代码
http://www.netlib.org/scalapack/explore-html/d3/d5f/pdnrm2___8c_source.html
有
/*
* Process column 0 broadcasts the combined values of SCALE and SSQ within their
* process column
*/
top = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
if( myrow == 0 )
{
Cdgebs2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2 );
}
else
{
Cdgebr2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2,
0, mycol );
}
/*
现在我并不声称完全理解这一点,但评论强烈暗示结果仅沿着排名为 0 的进程列广播,而不是向所有进程广播。这适用于 1D 分解,但不适用于 2D 分解,这正是您所看到的。因此,恐怕 IMO 要解决此问题,您必须自己从进程零手动广播该值。
下面是我认为是你的代码的更正版本,它显示在 3 个过程(一维分解)而不是 4(使用 2x2 所以二维分解)
Module numbers
Use, Intrinsic :: iso_fortran_env, Only : wp => real64
Public :: wp
Private
End Module numbers
Module bicgstab
Contains
Subroutine bicgstab_self_sclpk(A,b,N,descvec,descmat,mloc_vec,nloc_vec)
Use numbers
Use mpi
Implicit None
Real ( wp ) :: A(:,:), b(:,:), tol
Integer :: N
Integer :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec
Integer :: i, ierr, rank, maxit
Real( wp ) :: rho0, alpha, omega0, rho, omega, beta, norm, tmp_real
Real( wp ), Allocatable :: r0(:,:), r(:,:), x0(:,:), x(:,:),h(:,:),t(:,:), tmp_vec(:,:)
Real( wp ), Allocatable :: rhat0(:,:),v(:,:), p(:,:), v0(:,:),p0(:,:),s(:,:)
Logical :: converged
! ================== Initialize ======================
Allocate(r0(mloc_vec,nloc_vec))
Allocate(r(mloc_vec,nloc_vec))
Allocate(rhat0(mloc_vec,nloc_vec))
Allocate(x0(mloc_vec,nloc_vec))
Allocate(x(mloc_vec,nloc_vec))
Allocate(v0(mloc_vec,nloc_vec))
Allocate(v(mloc_vec,nloc_vec))
Allocate(p0(mloc_vec,nloc_vec))
Allocate(p(mloc_vec,nloc_vec))
Allocate(h(mloc_vec,nloc_vec))
Allocate(s(mloc_vec,nloc_vec))
Allocate(t(mloc_vec,nloc_vec))
Allocate(tmp_vec(mloc_vec,nloc_vec))
x0 = 0.0_wp
r0 = 0.0_wp
r = 0.0_wp
x = 0.0_wp
v0 = 0.0_wp
v = 0.0_wp
p0 = 0.0_wp
p = 0.0_wp
h = 0.0_wp
s = 0.0_wp
t = 0.0_wp
norm= 0.0_wp
rhat0 = 0.0_wp
rho0 = 1.0_wp
rho = 0.0_wp
alpha = 1.0_wp
omega0 = 1.0_wp
omega = 0.0_wp
beta = 0.0_wp
converged = .False.
r0(1:mloc_vec,1:nloc_vec) = b(1:mloc_vec,1:nloc_vec)
rhat0 = r0
tol = 1E-6_wp
maxiter = 1000
Call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
Print *, rank , mloc_vec, nloc_vec
! print *, "rank",rank,"descmat",descmat
! print *, "rank",rank,"descvec",descvec
! ======================================================
! ================Loop==================================
Do i = 1, maxiter
! rho = dot_product(rhat0(:,1),r0(:,1))
Call pddot(N,rho, rhat0, 1,1,descvec,1,r0,1,1,descvec,1)
beta = (rho/rho0)*(alpha/omega0)
p = r0 + beta*(p0 - omega0*v0)
! v = matmul(A,p)
Call pdgemv('N', N, N, 1.0_wp, A, 1, 1, descmat, p, 1, 1, descvec, 1, 0.0_wp,v, 1, 1, descvec, 1)
! alpha = rho/dot_product(rhat0(:,1),v(:,1))
Call pddot(N,alpha,rhat0, 1,1,descvec,1,v,1,1,descvec,1)
alpha = rho/alpha
h = x0 + alpha*p
! if (norm2(h-x0) < tol) then
tmp_vec = h - x0
Call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
! print *, "current norm1", norm, rank
If (norm < tol) Then
x=h
converged = .True.
Exit
Endif
If (i==1) Print *,"rank",rank,"was here"
s = r0 - alpha*v
! t = matmul(A,s)
Call pdgemv('N', N, N, 1.0_wp, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0_wp,t, 1, 1, descvec, 1)
! call pdgemm('N', 'N', N, 1, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 0.0, tmp_vec, 1, 1, descvec)
! t = tmp_vec
! omega = dot_product(t(:,1),s(:,1))/dot_product(t(:,1),t(:,1))
Call pddot(N,omega,t, 1,1,descvec,1,s,1,1,descvec,1)
Call pddot(N,tmp_real,t, 1,1,descvec,1,t,1,1,descvec,1)
omega = omega/tmp_real
x = h + omega*s
! if (norm2(x-x0)<tol) then
tmp_vec = x - x0
norm = 1000000
Call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
If (norm < tol) Then
Print *, "current norm2", norm
converged = .True.
Exit
Endif
r = s - omega*t
x0 =x
rho0 = rho
p0 = p
r0 = r
v0 = v
omega0 = omega
Enddo
! =========================================================
If (converged) Then
Print *, "Bi-CG STAB solver converged in iteration #", i, norm
Else
Print *, "Maximum iteration cycles reached"
Endif
Call MPI_Barrier(MPI_COMM_WORLD,ierr)
b = x
! print *,"rank ",rank
! =================clean up!===============================
Deallocate(r0)
Deallocate(r)
Deallocate(rhat0)
Deallocate(x0)
Deallocate(x)
Deallocate(v0)
Deallocate(v)
Deallocate(p0)
Deallocate(p)
Deallocate(h)
Deallocate(s)
Deallocate(t)
Print *,"End of bicgstab"
End Subroutine bicgstab_self_sclpk
End Module bicgstab
Program test_bicgstab
Use numbers
Use bicgstab
Use mpi
Implicit None
Character, Parameter :: UPLO="U"
Character(len=7) :: char_size
Integer :: info
Integer :: N, i, j
Real(wp), Allocatable :: A_global(:,:), b_global(:,:)
Integer :: count_start, count_end,count_rate, dummy_int
Real(wp) :: time
! =========================BLACS and MPI=======================
! Size is a really bad name for a variable as it clashes with the very useful intrinsic
!!$ Integer :: ierr, size, rank,dims(2)
Integer :: ierr, nprocs, rank,dims(2)
! -------------------------------------------------------------
Integer, Parameter :: block_size = 100
Integer :: context, nprow, npcol, local_nprow, local_npcol
Integer :: numroc, indxl2g, descmat(9),descvec(9)
Integer :: mloc_mat ,nloc_mat ,mloc_vec ,nloc_vec
Real(wp), Allocatable :: A(:,:), x(:,:), b(:,:)
! CAN NOT DO ANY MPI UNTILL CALLED MPI_INIT
Call MPI_Init( ierr )
Call blacs_pinfo(rank,nprocs)
dims=0
Call MPI_Dims_create(nprocs, 2, dims, ierr)
nprow = dims(1);npcol = dims(2)
Call blacs_get(0,0,context)
Call blacs_gridinit(context, 'R', nprow, npcol)
Call blacs_gridinfo(context, nprow, npcol, local_nprow,local_npcol)
N = 700
Allocate(A_global(N,N))
! No file, fill with random numbers
!!$ If (rank==0) Open(101,file='A.dat')
!!$ Do i = 1, N
!!$ If (rank==0) Read(101,*) A_global(i,1:N)
!!$ Call MPI_Bcast(A_global(i,1:N), N,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
!!$ Enddo
!!$ If (rank==0) Close(101)
If( rank == 0 ) Then
Call Random_number( a_global )
! Add something onto the diagonal to hopefully avid horrible condition number
Do i = 1, n
a_global( i, i ) = a_global( i, i ) + n
End Do
End If
Call MPI_Bcast(A_global, Size( a_global ),MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
mloc_mat = numroc(N,block_size,local_nprow,0,nprow)
nloc_mat = numroc(N,block_size,local_npcol,0, npcol)
Allocate(A(mloc_mat,nloc_mat))
Do i = 1, mloc_mat
Do j = 1,nloc_mat
A(i,j) = A_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
Enddo
Enddo
If (rank==0) Print *, "Read matrix"
Allocate(b_global(N,1))
! No file, fill with random numbers
!!$
!!$ If (rank==0) Then
!!$ Open(103,file='b.dat')
!!$ Do i = 1, N
!!$ Read(103,*) b_global(i,1)
!!$ Enddo
!!$ Close(103)
!!$ Endif
If( Rank == 0 ) Then
Call Random_number( b_global )
End If
Call MPI_Bcast(b_global, Size( b_global ) ,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
! set up scalapack shared matrices
If (rank==0) Print *, "Matrix broadcasted"
mloc_vec = numroc(N,block_size,local_nprow,0, nprow)
nloc_vec = numroc(1,block_size,local_npcol,0, npcol)
Print *,"Rank", rank, mloc_vec, nloc_vec
Allocate(b(mloc_vec,nloc_vec))
Allocate(x(mloc_vec,nloc_vec))
Do i = 1, mloc_vec
Do j = 1,nloc_vec
b(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
x(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
Enddo
Enddo
Call descinit(descmat , N, N, block_size, block_size, 0,0,context,Max(1,mloc_mat),info)
Call descinit(descvec , N, 1, block_size, block_size, 0,0,context,Max(1,mloc_vec),info)
If (rank==0) Print *, "Set up done,solving"
! setup done, call in the cavalary
Call MPI_Barrier(MPI_COMM_WORLD, ierr)
Call bicgstab_self_sclpk(A,x,N, descvec, descmat,mloc_vec,nloc_vec)
! print *,x
Call MPI_Barrier(MPI_COMM_WORLD,ierr)
Call blacs_gridexit(context)
!!$ Call blacs_exit(0)
Call MPI_Finalize( ierr )
End Program test_bicgstab
ian@eris:~/work/stack$ mpif90 -g -Wall -Wextra -O -pedantic -fbacktrace -fcheck=all bicg.f90 -lblacsF77init-openmpi -lblacs-openmpi -lscalapack-openmpi
bicg.f90:21:63:
Integer :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec
1
Warning: Unused variable ‘info’ declared at (1) [-Wunused-variable]
bicg.f90:23:47:
Integer :: i, ierr, rank, maxit
1
Warning: Unused variable ‘maxit’ declared at (1) [-Wunused-variable]
bicg.f90:160:42:
Character(len=7) :: char_size
1
Warning: Unused variable ‘char_size’ declared at (1) [-Wunused-variable]
bicg.f90:164:47:
Integer :: count_start, count_end,count_rate, dummy_int
1
Warning: Unused variable ‘count_end’ declared at (1) [-Wunused-variable]
bicg.f90:164:58:
Integer :: count_start, count_end,count_rate, dummy_int
1
Warning: Unused variable ‘count_rate’ declared at (1) [-Wunused-variable]
bicg.f90:164:36:
Integer :: count_start, count_end,count_rate, dummy_int
1
Warning: Unused variable ‘count_start’ declared at (1) [-Wunused-variable]
bicg.f90:164:69:
Integer :: count_start, count_end,count_rate, dummy_int
1
Warning: Unused variable ‘dummy_int’ declared at (1) [-Wunused-variable]
bicg.f90:165:33:
Real(wp) :: time
1
Warning: Unused variable ‘time’ declared at (1) [-Wunused-variable]
bicg.f90:159:37:
Character, Parameter :: UPLO="U"
1
Warning: Unused parameter ‘uplo’ declared at (1) [-Wunused-parameter]
ian@eris:~/work/stack$ mpirun -np 3 ./a.out
Read matrix
Matrix broadcasted
Rank 0 300 1
Rank 2 200 1
Set up done,solving
Rank 1 200 1
0 300 1
2 200 1
1 200 1
rank 2 was here
rank 0 was here
rank 1 was here
Bi-CG STAB solver converged in iteration # 3 6.1140242983184383E-008
Bi-CG STAB solver converged in iteration # 3 6.1140242983184383E-008
End of bicgstab
Bi-CG STAB solver converged in iteration # 3 6.1140242983184383E-008
End of bicgstab
End of bicgstab
ian@eris:~/work/stack$ mpirun -np 4 ./a.out
Read matrix
Matrix broadcasted
Rank 3 300 0
Rank 0 400 1
Rank 1 400 0
Rank 2 300 1
Set up done,solving
1 400 0
3 300 0
2 300 1
0 400 1
Bi-CG STAB solver converged in iteration # 1 0.0000000000000000
Bi-CG STAB solver converged in iteration # 1 0.0000000000000000
rank 0 was here
rank 2 was here
^Cian@eris:~/work/stack$
考虑以下使用 scalapack 的代码:
! if (norm2(h-x0) < tol) then
tmp_vec = h - x0
call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
if (norm < tol) then
x=h
converged = .true.
exit
endif
s = r0 - alpha*v
call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0,t, 1, 1, descvec, 1)
它是我尝试的迭代求解器的一部分,问题是如果我的处理器网格是二维的,我的向量在这些过程中没有任何元素,因此 dnrm2 产生零或 norm
变量.因此导致循环中的一些过程提前退出,挂起整个循环。
除了人工播报等方式之外,确保正常值被正确分配的正确方法是什么?
注意:这适用于 1-d procs 分布,请参阅:
下面是我从维基百科文章中写的一个简单的 Bi-CGSTAB 求解器,它分别从文件 b.dat 和 A.dat 中读取向量和矩阵。并继续使用 bicgstab_self_sclpk 例程来解决它。
下面打印的是norm
排名=4 运行:
...
current norm2 0.0000000000000000
Bi-CG STAB solver converged in iteration # 1 0.0000000000000000
current norm2 0.0000000000000000
Bi-CG STAB solver converged in iteration # 1 0.0000000000000000
...
一切都挂在这里。
排名 = 7 运行
. . .
current norm2 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
current norm2 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
Bi-CG STAB solver converged in iteration # 369 1.2377699991821143E-008
. . .
module bicgstab
contains
subroutine bicgstab_self_sclpk(A,b,N,descvec,descmat,mloc_vec,nloc_vec)
use mpi
implicit none
real :: A(:,:), b(:,:), tol
integer(kind=8) :: N
integer :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec
integer :: i, ierr, rank, maxit
real :: rho0, alpha, omega0, rho, omega, beta, norm, tmp_real
real, allocatable :: r0(:,:), r(:,:), x0(:,:), x(:,:),h(:,:),t(:,:), tmp_vec(:,:)
real, allocatable :: rhat0(:,:),v(:,:), p(:,:), v0(:,:),p0(:,:),s(:,:)
logical :: converged
! ================== Initialize ======================
allocate(r0(mloc_vec,nloc_vec))
allocate(r(mloc_vec,nloc_vec))
allocate(rhat0(mloc_vec,nloc_vec))
allocate(x0(mloc_vec,nloc_vec))
allocate(x(mloc_vec,nloc_vec))
allocate(v0(mloc_vec,nloc_vec))
allocate(v(mloc_vec,nloc_vec))
allocate(p0(mloc_vec,nloc_vec))
allocate(p(mloc_vec,nloc_vec))
allocate(h(mloc_vec,nloc_vec))
allocate(s(mloc_vec,nloc_vec))
allocate(t(mloc_vec,nloc_vec))
allocate(tmp_vec(mloc_vec,nloc_vec))
x0 = 0
r0 = 0
r = 0
x = 0
v0 = 0
v = 0
p0 = 0
p = 0
h = 0
s = 0
t = 0
norm= 0
rhat0 = 0
rho0 = 1
rho = 0
alpha = 1
omega0 = 1
omega = 0
beta = 0
converged = .false.
r0(1:mloc_vec,1:nloc_vec) = b(1:mloc_vec,1:nloc_vec)
rhat0 = r0
tol = 1E-6
maxiter = 1000
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
print *, rank , mloc_vec, nloc_vec
! print *, "rank",rank,"descmat",descmat
! print *, "rank",rank,"descvec",descvec
! ======================================================
! ================Loop==================================
do i = 1, maxiter
! rho = dot_product(rhat0(:,1),r0(:,1))
call pddot(N,rho, rhat0, 1,1,descvec,1,r0,1,1,descvec,1)
beta = (rho/rho0)*(alpha/omega0)
p = r0 + beta*(p0 - omega0*v0)
! v = matmul(A,p)
call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, p, 1, 1, descvec, 1, 0.0,v, 1, 1, descvec, 1)
! alpha = rho/dot_product(rhat0(:,1),v(:,1))
call pddot(N,alpha,rhat0, 1,1,descvec,1,v,1,1,descvec,1)
alpha = rho/alpha
h = x0 + alpha*p
! if (norm2(h-x0) < tol) then
tmp_vec = h - x0
norm = 999.0
call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
! print *, "current norm1", norm, rank
if (norm < tol) then
x=h
converged = .true.
exit
endif
if (i==1) print *,"rank",rank,"was here"
s = r0 - alpha*v
! t = matmul(A,s)
call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0,t, 1, 1, descvec, 1)
! call pdgemm('N', 'N', N, 1, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 0.0, tmp_vec, 1, 1, descvec)
! t = tmp_vec
! omega = dot_product(t(:,1),s(:,1))/dot_product(t(:,1),t(:,1))
call pddot(N,omega,t, 1,1,descvec,1,s,1,1,descvec,1)
call pddot(N,tmp_real,t, 1,1,descvec,1,t,1,1,descvec,1)
omega = omega/tmp_real
x = h + omega*s
! if (norm2(x-x0)<tol) then
tmp_vec = x - x0
norm = 1000000
call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
if (norm < tol) then
print *, "current norm2", norm
converged = .true.
exit
endif
r = s - omega*t
x0 =x
rho0 = rho
p0 = p
r0 = r
v0 = v
omega0 = omega
enddo
! =========================================================
if (converged) then
print *, "Bi-CG STAB solver converged in iteration #", i, norm
else
print *, "Maximum iteration cycles reached"
endif
call MPI_Barrier(MPI_COMM_WORLD,ierr)
b = x
! print *,"rank ",rank
! =================clean up!===============================
deallocate(r0)
deallocate(r)
deallocate(rhat0)
deallocate(x0)
deallocate(x)
deallocate(v0)
deallocate(v)
deallocate(p0)
deallocate(p)
deallocate(h)
deallocate(s)
deallocate(t)
print *,"End of bicgstab"
end subroutine bicgstab_self_sclpk
end module bicgstab
program test_bicgstab
use bicgstab
use mpi
implicit none
character, parameter :: UPLO="U"
character(len=7) :: char_size
integer :: info
integer(kind=8) :: N, i, j
real(kind=8), allocatable :: A_global(:,:), b_global(:,:)
integer(kind=8) :: count_start, count_end,count_rate, dummy_int
real(kind=8) :: time
! =========================BLACS and MPI=======================
integer :: ierr, size, rank,dims(2)
! -------------------------------------------------------------
integer, parameter :: block_size = 100
integer :: context, nprow, npcol, local_nprow, local_npcol
integer :: numroc, indxl2g, descmat(9),descvec(9)
integer :: mloc_mat ,nloc_mat ,mloc_vec ,nloc_vec
real(kind=8), allocatable :: A(:,:), x(:,:), b(:,:)
call blacs_pinfo(rank,size)
dims=0
call MPI_Dims_create(size, 2, dims, ierr)
nprow = dims(1);npcol = dims(2)
call blacs_get(0,0,context)
call blacs_gridinit(context, 'R', nprow, npcol)
call blacs_gridinfo(context, nprow, npcol, local_nprow,local_npcol)
N = 700
allocate(A_global(N,N))
if (rank==0) open(101,file='A.dat')
do i = 1, N
if (rank==0) read(101,*) A_global(i,1:N)
call MPI_Bcast(A_global(i,1:N), N,MPI_DOUBLE,0,MPI_COMM_WORLD, ierr)
enddo
if (rank==0) close(101)
mloc_mat = numroc(N,block_size,local_nprow,0,nprow)
nloc_mat = numroc(N,block_size,local_npcol,0, npcol)
allocate(A(mloc_mat,nloc_mat))
do i = 1, mloc_mat
do j = 1,nloc_mat
A(i,j) = A_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
enddo
enddo
if (rank==0) print *, "Read matrix"
allocate(b_global(N,1))
if (rank==0) then
open(103,file='b.dat')
do i = 1, N
read(103,*) b_global(i,1)
enddo
close(103)
endif
call MPI_Bcast(b_global(:,1), N,MPI_DOUBLE,0,MPI_COMM_WORLD, ierr)
! set up scalapack shared matrices
if (rank==0) print *, "Matrix broadcasted"
mloc_vec = numroc(N,block_size,local_nprow,0, nprow)
nloc_vec = numroc(1,block_size,local_npcol,0, npcol)
print *,"Rank", rank, mloc_vec, nloc_vec
allocate(b(mloc_vec,nloc_vec))
allocate(x(mloc_vec,nloc_vec))
do i = 1, mloc_vec
do j = 1,nloc_vec
b(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
x(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
enddo
enddo
call descinit(descmat , N, N, block_size, block_size, 0,0,context,max(1,mloc_mat),info)
call descinit(descvec , N, 1, block_size, block_size, 0,0,context,max(1,mloc_vec),info)
if (rank==0) print *, "Set up done,solving"
! setup done, call in the cavalary
call MPI_Barrier(MPI_COMM_WORLD, ierr)
call bicgstab_self_sclpk(A,x,N, descvec, descmat,mloc_vec,nloc_vec)
! print *,x
call MPI_Barrier(MPI_COMM_WORLD,ierr)
call blacs_gridexit(context)
call blacs_exit(0)
end program test_bicgstab
如果需要,矩阵文件可以在这里下载:https://github.com/ipcamit/temp_so
好的,首先,您发布的代码存在很多问题,请参阅下面我认为是使用随机数代替文件的更正版本。特别是你真的需要更加小心各种 reals
,我建议始终使用默认的 integer
s,你不需要长的 integer
s,并尝试使用它们只会造成不必要的痛苦。
也就是说,我认为您 运行 发现了 pdnrm2 中的错误。查看
中的源代码http://www.netlib.org/scalapack/explore-html/d3/d5f/pdnrm2___8c_source.html
有
/*
* Process column 0 broadcasts the combined values of SCALE and SSQ within their
* process column
*/
top = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
if( myrow == 0 )
{
Cdgebs2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2 );
}
else
{
Cdgebr2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2,
0, mycol );
}
/*
现在我并不声称完全理解这一点,但评论强烈暗示结果仅沿着排名为 0 的进程列广播,而不是向所有进程广播。这适用于 1D 分解,但不适用于 2D 分解,这正是您所看到的。因此,恐怕 IMO 要解决此问题,您必须自己从进程零手动广播该值。
下面是我认为是你的代码的更正版本,它显示在 3 个过程(一维分解)而不是 4(使用 2x2 所以二维分解)
Module numbers
Use, Intrinsic :: iso_fortran_env, Only : wp => real64
Public :: wp
Private
End Module numbers
Module bicgstab
Contains
Subroutine bicgstab_self_sclpk(A,b,N,descvec,descmat,mloc_vec,nloc_vec)
Use numbers
Use mpi
Implicit None
Real ( wp ) :: A(:,:), b(:,:), tol
Integer :: N
Integer :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec
Integer :: i, ierr, rank, maxit
Real( wp ) :: rho0, alpha, omega0, rho, omega, beta, norm, tmp_real
Real( wp ), Allocatable :: r0(:,:), r(:,:), x0(:,:), x(:,:),h(:,:),t(:,:), tmp_vec(:,:)
Real( wp ), Allocatable :: rhat0(:,:),v(:,:), p(:,:), v0(:,:),p0(:,:),s(:,:)
Logical :: converged
! ================== Initialize ======================
Allocate(r0(mloc_vec,nloc_vec))
Allocate(r(mloc_vec,nloc_vec))
Allocate(rhat0(mloc_vec,nloc_vec))
Allocate(x0(mloc_vec,nloc_vec))
Allocate(x(mloc_vec,nloc_vec))
Allocate(v0(mloc_vec,nloc_vec))
Allocate(v(mloc_vec,nloc_vec))
Allocate(p0(mloc_vec,nloc_vec))
Allocate(p(mloc_vec,nloc_vec))
Allocate(h(mloc_vec,nloc_vec))
Allocate(s(mloc_vec,nloc_vec))
Allocate(t(mloc_vec,nloc_vec))
Allocate(tmp_vec(mloc_vec,nloc_vec))
x0 = 0.0_wp
r0 = 0.0_wp
r = 0.0_wp
x = 0.0_wp
v0 = 0.0_wp
v = 0.0_wp
p0 = 0.0_wp
p = 0.0_wp
h = 0.0_wp
s = 0.0_wp
t = 0.0_wp
norm= 0.0_wp
rhat0 = 0.0_wp
rho0 = 1.0_wp
rho = 0.0_wp
alpha = 1.0_wp
omega0 = 1.0_wp
omega = 0.0_wp
beta = 0.0_wp
converged = .False.
r0(1:mloc_vec,1:nloc_vec) = b(1:mloc_vec,1:nloc_vec)
rhat0 = r0
tol = 1E-6_wp
maxiter = 1000
Call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
Print *, rank , mloc_vec, nloc_vec
! print *, "rank",rank,"descmat",descmat
! print *, "rank",rank,"descvec",descvec
! ======================================================
! ================Loop==================================
Do i = 1, maxiter
! rho = dot_product(rhat0(:,1),r0(:,1))
Call pddot(N,rho, rhat0, 1,1,descvec,1,r0,1,1,descvec,1)
beta = (rho/rho0)*(alpha/omega0)
p = r0 + beta*(p0 - omega0*v0)
! v = matmul(A,p)
Call pdgemv('N', N, N, 1.0_wp, A, 1, 1, descmat, p, 1, 1, descvec, 1, 0.0_wp,v, 1, 1, descvec, 1)
! alpha = rho/dot_product(rhat0(:,1),v(:,1))
Call pddot(N,alpha,rhat0, 1,1,descvec,1,v,1,1,descvec,1)
alpha = rho/alpha
h = x0 + alpha*p
! if (norm2(h-x0) < tol) then
tmp_vec = h - x0
Call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
! print *, "current norm1", norm, rank
If (norm < tol) Then
x=h
converged = .True.
Exit
Endif
If (i==1) Print *,"rank",rank,"was here"
s = r0 - alpha*v
! t = matmul(A,s)
Call pdgemv('N', N, N, 1.0_wp, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0_wp,t, 1, 1, descvec, 1)
! call pdgemm('N', 'N', N, 1, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 0.0, tmp_vec, 1, 1, descvec)
! t = tmp_vec
! omega = dot_product(t(:,1),s(:,1))/dot_product(t(:,1),t(:,1))
Call pddot(N,omega,t, 1,1,descvec,1,s,1,1,descvec,1)
Call pddot(N,tmp_real,t, 1,1,descvec,1,t,1,1,descvec,1)
omega = omega/tmp_real
x = h + omega*s
! if (norm2(x-x0)<tol) then
tmp_vec = x - x0
norm = 1000000
Call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
If (norm < tol) Then
Print *, "current norm2", norm
converged = .True.
Exit
Endif
r = s - omega*t
x0 =x
rho0 = rho
p0 = p
r0 = r
v0 = v
omega0 = omega
Enddo
! =========================================================
If (converged) Then
Print *, "Bi-CG STAB solver converged in iteration #", i, norm
Else
Print *, "Maximum iteration cycles reached"
Endif
Call MPI_Barrier(MPI_COMM_WORLD,ierr)
b = x
! print *,"rank ",rank
! =================clean up!===============================
Deallocate(r0)
Deallocate(r)
Deallocate(rhat0)
Deallocate(x0)
Deallocate(x)
Deallocate(v0)
Deallocate(v)
Deallocate(p0)
Deallocate(p)
Deallocate(h)
Deallocate(s)
Deallocate(t)
Print *,"End of bicgstab"
End Subroutine bicgstab_self_sclpk
End Module bicgstab
Program test_bicgstab
Use numbers
Use bicgstab
Use mpi
Implicit None
Character, Parameter :: UPLO="U"
Character(len=7) :: char_size
Integer :: info
Integer :: N, i, j
Real(wp), Allocatable :: A_global(:,:), b_global(:,:)
Integer :: count_start, count_end,count_rate, dummy_int
Real(wp) :: time
! =========================BLACS and MPI=======================
! Size is a really bad name for a variable as it clashes with the very useful intrinsic
!!$ Integer :: ierr, size, rank,dims(2)
Integer :: ierr, nprocs, rank,dims(2)
! -------------------------------------------------------------
Integer, Parameter :: block_size = 100
Integer :: context, nprow, npcol, local_nprow, local_npcol
Integer :: numroc, indxl2g, descmat(9),descvec(9)
Integer :: mloc_mat ,nloc_mat ,mloc_vec ,nloc_vec
Real(wp), Allocatable :: A(:,:), x(:,:), b(:,:)
! CAN NOT DO ANY MPI UNTILL CALLED MPI_INIT
Call MPI_Init( ierr )
Call blacs_pinfo(rank,nprocs)
dims=0
Call MPI_Dims_create(nprocs, 2, dims, ierr)
nprow = dims(1);npcol = dims(2)
Call blacs_get(0,0,context)
Call blacs_gridinit(context, 'R', nprow, npcol)
Call blacs_gridinfo(context, nprow, npcol, local_nprow,local_npcol)
N = 700
Allocate(A_global(N,N))
! No file, fill with random numbers
!!$ If (rank==0) Open(101,file='A.dat')
!!$ Do i = 1, N
!!$ If (rank==0) Read(101,*) A_global(i,1:N)
!!$ Call MPI_Bcast(A_global(i,1:N), N,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
!!$ Enddo
!!$ If (rank==0) Close(101)
If( rank == 0 ) Then
Call Random_number( a_global )
! Add something onto the diagonal to hopefully avid horrible condition number
Do i = 1, n
a_global( i, i ) = a_global( i, i ) + n
End Do
End If
Call MPI_Bcast(A_global, Size( a_global ),MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
mloc_mat = numroc(N,block_size,local_nprow,0,nprow)
nloc_mat = numroc(N,block_size,local_npcol,0, npcol)
Allocate(A(mloc_mat,nloc_mat))
Do i = 1, mloc_mat
Do j = 1,nloc_mat
A(i,j) = A_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
Enddo
Enddo
If (rank==0) Print *, "Read matrix"
Allocate(b_global(N,1))
! No file, fill with random numbers
!!$
!!$ If (rank==0) Then
!!$ Open(103,file='b.dat')
!!$ Do i = 1, N
!!$ Read(103,*) b_global(i,1)
!!$ Enddo
!!$ Close(103)
!!$ Endif
If( Rank == 0 ) Then
Call Random_number( b_global )
End If
Call MPI_Bcast(b_global, Size( b_global ) ,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
! set up scalapack shared matrices
If (rank==0) Print *, "Matrix broadcasted"
mloc_vec = numroc(N,block_size,local_nprow,0, nprow)
nloc_vec = numroc(1,block_size,local_npcol,0, npcol)
Print *,"Rank", rank, mloc_vec, nloc_vec
Allocate(b(mloc_vec,nloc_vec))
Allocate(x(mloc_vec,nloc_vec))
Do i = 1, mloc_vec
Do j = 1,nloc_vec
b(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
x(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
&indxl2g(j,block_size,local_npcol,0, npcol))
Enddo
Enddo
Call descinit(descmat , N, N, block_size, block_size, 0,0,context,Max(1,mloc_mat),info)
Call descinit(descvec , N, 1, block_size, block_size, 0,0,context,Max(1,mloc_vec),info)
If (rank==0) Print *, "Set up done,solving"
! setup done, call in the cavalary
Call MPI_Barrier(MPI_COMM_WORLD, ierr)
Call bicgstab_self_sclpk(A,x,N, descvec, descmat,mloc_vec,nloc_vec)
! print *,x
Call MPI_Barrier(MPI_COMM_WORLD,ierr)
Call blacs_gridexit(context)
!!$ Call blacs_exit(0)
Call MPI_Finalize( ierr )
End Program test_bicgstab
ian@eris:~/work/stack$ mpif90 -g -Wall -Wextra -O -pedantic -fbacktrace -fcheck=all bicg.f90 -lblacsF77init-openmpi -lblacs-openmpi -lscalapack-openmpi
bicg.f90:21:63:
Integer :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec
1
Warning: Unused variable ‘info’ declared at (1) [-Wunused-variable]
bicg.f90:23:47:
Integer :: i, ierr, rank, maxit
1
Warning: Unused variable ‘maxit’ declared at (1) [-Wunused-variable]
bicg.f90:160:42:
Character(len=7) :: char_size
1
Warning: Unused variable ‘char_size’ declared at (1) [-Wunused-variable]
bicg.f90:164:47:
Integer :: count_start, count_end,count_rate, dummy_int
1
Warning: Unused variable ‘count_end’ declared at (1) [-Wunused-variable]
bicg.f90:164:58:
Integer :: count_start, count_end,count_rate, dummy_int
1
Warning: Unused variable ‘count_rate’ declared at (1) [-Wunused-variable]
bicg.f90:164:36:
Integer :: count_start, count_end,count_rate, dummy_int
1
Warning: Unused variable ‘count_start’ declared at (1) [-Wunused-variable]
bicg.f90:164:69:
Integer :: count_start, count_end,count_rate, dummy_int
1
Warning: Unused variable ‘dummy_int’ declared at (1) [-Wunused-variable]
bicg.f90:165:33:
Real(wp) :: time
1
Warning: Unused variable ‘time’ declared at (1) [-Wunused-variable]
bicg.f90:159:37:
Character, Parameter :: UPLO="U"
1
Warning: Unused parameter ‘uplo’ declared at (1) [-Wunused-parameter]
ian@eris:~/work/stack$ mpirun -np 3 ./a.out
Read matrix
Matrix broadcasted
Rank 0 300 1
Rank 2 200 1
Set up done,solving
Rank 1 200 1
0 300 1
2 200 1
1 200 1
rank 2 was here
rank 0 was here
rank 1 was here
Bi-CG STAB solver converged in iteration # 3 6.1140242983184383E-008
Bi-CG STAB solver converged in iteration # 3 6.1140242983184383E-008
End of bicgstab
Bi-CG STAB solver converged in iteration # 3 6.1140242983184383E-008
End of bicgstab
End of bicgstab
ian@eris:~/work/stack$ mpirun -np 4 ./a.out
Read matrix
Matrix broadcasted
Rank 3 300 0
Rank 0 400 1
Rank 1 400 0
Rank 2 300 1
Set up done,solving
1 400 0
3 300 0
2 300 1
0 400 1
Bi-CG STAB solver converged in iteration # 1 0.0000000000000000
Bi-CG STAB solver converged in iteration # 1 0.0000000000000000
rank 0 was here
rank 2 was here
^Cian@eris:~/work/stack$