在 SCALAPACK 中找到分布式向量范数的有效方法

Efficient way to find norm of distributed vector in SCALAPACK

考虑以下使用 scalapack 的代码:

        ! if (norm2(h-x0) < tol) then
        tmp_vec = h - x0
        call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
        if (norm < tol) then
            x=h
            converged = .true.
            exit
        endif
        s = r0 - alpha*v
        call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0,t, 1, 1, descvec, 1)

它是我尝试的迭代求解器的一部分,问题是如果我的处理器网格是二维的,我的向量在这些过程中没有任何元素,因此 dnrm2 产生零或 norm 变量.因此导致循环中的一些过程提前退出,挂起整个循环。

除了人工播报等方式之外,确保正常值被正确分配的正确方法是什么?

注意:这适用于 1-d procs 分布,请参阅:

下面是我从维基百科文章中写的一个简单的 Bi-CGSTAB 求解器,它分别从文件 b.dat 和 A.dat 中读取向量和矩阵。并继续使用 bicgstab_self_sclpk 例程来解决它。 下面打印的是norm

的值

排名=4 运行:

...
 current norm2   0.0000000000000000
 Bi-CG STAB solver converged in iteration #           1   0.0000000000000000
 current norm2   0.0000000000000000
 Bi-CG STAB solver converged in iteration #           1   0.0000000000000000
...

一切都挂在这里。


排名 = 7 运行

 . . .
 current norm2   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
. . .
module bicgstab

contains
subroutine bicgstab_self_sclpk(A,b,N,descvec,descmat,mloc_vec,nloc_vec)
    use mpi
    implicit none
    real                :: A(:,:), b(:,:), tol
    integer(kind=8)     :: N
    integer             :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec

    integer             :: i, ierr, rank, maxit
    real                :: rho0, alpha, omega0, rho, omega, beta, norm, tmp_real
    real, allocatable   :: r0(:,:), r(:,:), x0(:,:), x(:,:),h(:,:),t(:,:), tmp_vec(:,:)
    real, allocatable   :: rhat0(:,:),v(:,:), p(:,:), v0(:,:),p0(:,:),s(:,:)
    logical             :: converged

    ! ================== Initialize ======================
    allocate(r0(mloc_vec,nloc_vec))
    allocate(r(mloc_vec,nloc_vec))
    allocate(rhat0(mloc_vec,nloc_vec))
    allocate(x0(mloc_vec,nloc_vec))
    allocate(x(mloc_vec,nloc_vec))
    allocate(v0(mloc_vec,nloc_vec))
    allocate(v(mloc_vec,nloc_vec))
    allocate(p0(mloc_vec,nloc_vec))
    allocate(p(mloc_vec,nloc_vec))
    allocate(h(mloc_vec,nloc_vec))
    allocate(s(mloc_vec,nloc_vec))
    allocate(t(mloc_vec,nloc_vec))
    allocate(tmp_vec(mloc_vec,nloc_vec))
    x0  = 0
    r0  = 0
    r   = 0
    x   = 0
    v0  = 0
    v   = 0
    p0  = 0
    p   = 0
    h   = 0
    s   = 0
    t   = 0
    norm= 0
    rhat0 = 0
    rho0 = 1
    rho = 0
    alpha = 1
    omega0 = 1
    omega = 0
    beta = 0
    converged = .false.

    r0(1:mloc_vec,1:nloc_vec) = b(1:mloc_vec,1:nloc_vec)
    rhat0 = r0
    tol = 1E-6
    maxiter = 1000
    call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
    print *, rank , mloc_vec, nloc_vec
    ! print *, "rank",rank,"descmat",descmat
    ! print *, "rank",rank,"descvec",descvec
    ! ======================================================

    ! ================Loop==================================
    do i = 1, maxiter
        ! rho = dot_product(rhat0(:,1),r0(:,1))
        call pddot(N,rho, rhat0, 1,1,descvec,1,r0,1,1,descvec,1)
        beta = (rho/rho0)*(alpha/omega0)
        p = r0 + beta*(p0 - omega0*v0)

        ! v = matmul(A,p)
        call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, p, 1, 1, descvec, 1, 0.0,v, 1, 1, descvec, 1)

        ! alpha = rho/dot_product(rhat0(:,1),v(:,1))
        call pddot(N,alpha,rhat0, 1,1,descvec,1,v,1,1,descvec,1)
        alpha = rho/alpha
        h = x0 + alpha*p

        ! if (norm2(h-x0) < tol) then
        tmp_vec = h - x0
        norm = 999.0
        call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
        ! print *, "current norm1", norm, rank
        if (norm < tol) then
            x=h
            converged = .true.
            exit
        endif
        if (i==1) print *,"rank",rank,"was here"
        s = r0 - alpha*v
        ! t = matmul(A,s)
        call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0,t, 1, 1, descvec, 1)
        ! call pdgemm('N', 'N', N, 1, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 0.0, tmp_vec, 1, 1, descvec)
        ! t = tmp_vec
        ! omega = dot_product(t(:,1),s(:,1))/dot_product(t(:,1),t(:,1))
        call pddot(N,omega,t, 1,1,descvec,1,s,1,1,descvec,1)
        call pddot(N,tmp_real,t, 1,1,descvec,1,t,1,1,descvec,1)
        omega = omega/tmp_real

        x = h + omega*s

        ! if (norm2(x-x0)<tol) then
        tmp_vec = x - x0
        norm = 1000000
        call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
        if (norm < tol) then
                print *, "current norm2", norm
                converged = .true.
                exit
        endif
        r = s - omega*t
        x0 =x
        rho0 = rho
        p0 = p
        r0 = r
        v0 = v
        omega0 = omega
    enddo
    ! =========================================================
    if (converged) then
        print *, "Bi-CG STAB solver converged in iteration #", i, norm
    else
        print *, "Maximum iteration cycles reached"
    endif
    call MPI_Barrier(MPI_COMM_WORLD,ierr)
    b = x
    ! print *,"rank ",rank
    ! =================clean up!===============================
    deallocate(r0)
    deallocate(r)
    deallocate(rhat0)
    deallocate(x0)
    deallocate(x)
    deallocate(v0)
    deallocate(v)
    deallocate(p0)
    deallocate(p)
    deallocate(h)
    deallocate(s)
    deallocate(t)
    print *,"End of bicgstab"
end subroutine bicgstab_self_sclpk
end module bicgstab

program test_bicgstab
    use bicgstab
    use mpi
    implicit none
    character, parameter        :: UPLO="U"
    character(len=7)            :: char_size
    integer                     :: info
    integer(kind=8)             :: N, i, j
    real(kind=8), allocatable   :: A_global(:,:), b_global(:,:)
    integer(kind=8)             :: count_start, count_end,count_rate, dummy_int
    real(kind=8)                :: time
    ! =========================BLACS and MPI=======================
    integer                     :: ierr, size, rank,dims(2)
    ! -------------------------------------------------------------
    integer, parameter          :: block_size = 100
    integer                     :: context, nprow, npcol, local_nprow, local_npcol
    integer                     :: numroc, indxl2g, descmat(9),descvec(9)
    integer                     :: mloc_mat ,nloc_mat ,mloc_vec ,nloc_vec
    real(kind=8), allocatable   :: A(:,:), x(:,:), b(:,:)

    call blacs_pinfo(rank,size)
    dims=0
    call MPI_Dims_create(size, 2, dims, ierr)
    nprow = dims(1);npcol = dims(2)
    call blacs_get(0,0,context)
    call blacs_gridinit(context, 'R', nprow, npcol)
    call blacs_gridinfo(context, nprow, npcol, local_nprow,local_npcol)

    N = 700

    allocate(A_global(N,N))

    if (rank==0) open(101,file='A.dat')
    do i = 1, N
        if (rank==0) read(101,*) A_global(i,1:N)
        call MPI_Bcast(A_global(i,1:N), N,MPI_DOUBLE,0,MPI_COMM_WORLD,  ierr)
    enddo
    if (rank==0) close(101)

    mloc_mat = numroc(N,block_size,local_nprow,0,nprow)
    nloc_mat = numroc(N,block_size,local_npcol,0, npcol)
    allocate(A(mloc_mat,nloc_mat))

    do i = 1, mloc_mat
        do j = 1,nloc_mat
            A(i,j) = A_global(indxl2g(i,block_size,local_nprow,0, nprow),&
                             &indxl2g(j,block_size,local_npcol,0, npcol))
        enddo
    enddo

    if (rank==0) print *, "Read matrix"

    allocate(b_global(N,1))

    if (rank==0) then
        open(103,file='b.dat')
        do i = 1, N
            read(103,*) b_global(i,1)
        enddo
        close(103)
    endif
    call MPI_Bcast(b_global(:,1), N,MPI_DOUBLE,0,MPI_COMM_WORLD,  ierr)

    ! set up scalapack shared matrices
    if (rank==0) print *, "Matrix broadcasted"
    mloc_vec = numroc(N,block_size,local_nprow,0, nprow)
    nloc_vec = numroc(1,block_size,local_npcol,0, npcol)
    print *,"Rank", rank, mloc_vec, nloc_vec
    allocate(b(mloc_vec,nloc_vec))
    allocate(x(mloc_vec,nloc_vec))

    do i = 1, mloc_vec
        do j = 1,nloc_vec 
            b(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
                             &indxl2g(j,block_size,local_npcol,0, npcol))
            x(i,j)   = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
                               &indxl2g(j,block_size,local_npcol,0, npcol))
        enddo
    enddo

    call descinit(descmat    , N, N, block_size, block_size, 0,0,context,max(1,mloc_mat),info)
    call descinit(descvec    , N, 1, block_size, block_size, 0,0,context,max(1,mloc_vec),info)

    if (rank==0) print *, "Set up done,solving"

    ! setup done, call in the cavalary
    call MPI_Barrier(MPI_COMM_WORLD, ierr)
    call bicgstab_self_sclpk(A,x,N, descvec, descmat,mloc_vec,nloc_vec)
    ! print *,x
    call MPI_Barrier(MPI_COMM_WORLD,ierr)
    call blacs_gridexit(context)
    call blacs_exit(0)

end program test_bicgstab

如果需要,矩阵文件可以在这里下载:https://github.com/ipcamit/temp_so

好的,首先,您发布的代码存在很多问题,请参阅下面我认为是使用随机数代替文件的更正版本。特别是你真的需要更加小心各种 reals,我建议始终使用默认的 integers,你不需要长的 integers,并尝试使用它们只会造成不必要的痛苦。

也就是说,我认为您 运行 发现了 pdnrm2 中的错误。查看

中的源代码

http://www.netlib.org/scalapack/explore-html/d3/d5f/pdnrm2___8c_source.html

 /*
 *  Process column 0 broadcasts the combined values of SCALE and SSQ within their
 *  process column
 */
             top = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
             if( myrow == 0 )
             {
                Cdgebs2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2 );
             }
             else
             {
                Cdgebr2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2,
                          0, mycol );
             }
 /*

现在我并不声称完全理解这一点,但评论强烈暗示结果仅沿着排名为 0 的进程列广播,而不是向所有进程广播。这适用于 1D 分解,但不适用于 2D 分解,这正是您所看到的。因此,恐怕 IMO 要解决此问题,您必须自己从进程零手动广播该值。

下面是我认为是你的代码的更正版本,它显示在 3 个过程(一维分解)而不是 4(使用 2x2 所以二维分解)

Module numbers

  Use, Intrinsic :: iso_fortran_env, Only :  wp => real64

  Public :: wp

  Private

End Module numbers

Module bicgstab

Contains
  Subroutine bicgstab_self_sclpk(A,b,N,descvec,descmat,mloc_vec,nloc_vec)
    Use numbers
    Use mpi
    Implicit None
    Real ( wp )               :: A(:,:), b(:,:), tol
    Integer :: N
    Integer             :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec

    Integer             :: i, ierr, rank, maxit
    Real( wp )                :: rho0, alpha, omega0, rho, omega, beta, norm, tmp_real
    Real( wp ), Allocatable   :: r0(:,:), r(:,:), x0(:,:), x(:,:),h(:,:),t(:,:), tmp_vec(:,:)
    Real( wp ), Allocatable   :: rhat0(:,:),v(:,:), p(:,:), v0(:,:),p0(:,:),s(:,:)
    Logical             :: converged

    ! ================== Initialize ======================
    Allocate(r0(mloc_vec,nloc_vec))
    Allocate(r(mloc_vec,nloc_vec))
    Allocate(rhat0(mloc_vec,nloc_vec))
    Allocate(x0(mloc_vec,nloc_vec))
    Allocate(x(mloc_vec,nloc_vec))
    Allocate(v0(mloc_vec,nloc_vec))
    Allocate(v(mloc_vec,nloc_vec))
    Allocate(p0(mloc_vec,nloc_vec))
    Allocate(p(mloc_vec,nloc_vec))
    Allocate(h(mloc_vec,nloc_vec))
    Allocate(s(mloc_vec,nloc_vec))
    Allocate(t(mloc_vec,nloc_vec))
    Allocate(tmp_vec(mloc_vec,nloc_vec))
    x0  = 0.0_wp
    r0  = 0.0_wp
    r   = 0.0_wp
    x   = 0.0_wp
    v0  = 0.0_wp
    v   = 0.0_wp
    p0  = 0.0_wp
    p   = 0.0_wp
    h   = 0.0_wp
    s   = 0.0_wp
    t   = 0.0_wp
    norm= 0.0_wp
    rhat0 = 0.0_wp
    rho0 = 1.0_wp
    rho = 0.0_wp
    alpha = 1.0_wp
    omega0 = 1.0_wp
    omega = 0.0_wp
    beta = 0.0_wp
    converged = .False.

    r0(1:mloc_vec,1:nloc_vec) = b(1:mloc_vec,1:nloc_vec)
    rhat0 = r0
    tol = 1E-6_wp
    maxiter = 1000
    Call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
    Print *, rank , mloc_vec, nloc_vec
    ! print *, "rank",rank,"descmat",descmat
    ! print *, "rank",rank,"descvec",descvec
    ! ======================================================

    ! ================Loop==================================
    Do i = 1, maxiter
       ! rho = dot_product(rhat0(:,1),r0(:,1))
       Call pddot(N,rho, rhat0, 1,1,descvec,1,r0,1,1,descvec,1)
       beta = (rho/rho0)*(alpha/omega0)
       p = r0 + beta*(p0 - omega0*v0)

       ! v = matmul(A,p)
       Call pdgemv('N', N, N, 1.0_wp, A, 1, 1, descmat, p, 1, 1, descvec, 1, 0.0_wp,v, 1, 1, descvec, 1)

       ! alpha = rho/dot_product(rhat0(:,1),v(:,1))
       Call pddot(N,alpha,rhat0, 1,1,descvec,1,v,1,1,descvec,1)
       alpha = rho/alpha
       h = x0 + alpha*p

       ! if (norm2(h-x0) < tol) then
       tmp_vec = h - x0
       Call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
       ! print *, "current norm1", norm, rank
       If (norm < tol) Then
          x=h
          converged = .True.
          Exit
       Endif
       If (i==1) Print *,"rank",rank,"was here"
       s = r0 - alpha*v
       ! t = matmul(A,s)
       Call pdgemv('N', N, N, 1.0_wp, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0_wp,t, 1, 1, descvec, 1)
       ! call pdgemm('N', 'N', N, 1, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 0.0, tmp_vec, 1, 1, descvec)
       ! t = tmp_vec
       ! omega = dot_product(t(:,1),s(:,1))/dot_product(t(:,1),t(:,1))
       Call pddot(N,omega,t, 1,1,descvec,1,s,1,1,descvec,1)
       Call pddot(N,tmp_real,t, 1,1,descvec,1,t,1,1,descvec,1)
       omega = omega/tmp_real

       x = h + omega*s

       ! if (norm2(x-x0)<tol) then
       tmp_vec = x - x0
       norm = 1000000
       Call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
       If (norm < tol) Then
          Print *, "current norm2", norm
          converged = .True.
          Exit
       Endif
       r = s - omega*t
       x0 =x
       rho0 = rho
       p0 = p
       r0 = r
       v0 = v
       omega0 = omega
    Enddo
    ! =========================================================
    If (converged) Then
       Print *, "Bi-CG STAB solver converged in iteration #", i, norm
    Else
       Print *, "Maximum iteration cycles reached"
    Endif
    Call MPI_Barrier(MPI_COMM_WORLD,ierr)
    b = x
    ! print *,"rank ",rank
    ! =================clean up!===============================
    Deallocate(r0)
    Deallocate(r)
    Deallocate(rhat0)
    Deallocate(x0)
    Deallocate(x)
    Deallocate(v0)
    Deallocate(v)
    Deallocate(p0)
    Deallocate(p)
    Deallocate(h)
    Deallocate(s)
    Deallocate(t)
    Print *,"End of bicgstab"
  End Subroutine bicgstab_self_sclpk
End Module bicgstab

Program test_bicgstab
  Use numbers
  Use bicgstab
  Use mpi
  Implicit None
  Character, Parameter        :: UPLO="U"
  Character(len=7)            :: char_size
  Integer                     :: info
  Integer             :: N, i, j
  Real(wp), Allocatable   :: A_global(:,:), b_global(:,:)
  Integer             :: count_start, count_end,count_rate, dummy_int
  Real(wp)                :: time
  ! =========================BLACS and MPI=======================
  ! Size is a really bad name for a variable as it clashes with the very useful intrinsic
!!$  Integer                     :: ierr, size, rank,dims(2)
  Integer                     :: ierr, nprocs, rank,dims(2)
  ! -------------------------------------------------------------
  Integer, Parameter          :: block_size = 100
  Integer                     :: context, nprow, npcol, local_nprow, local_npcol
  Integer                     :: numroc, indxl2g, descmat(9),descvec(9)
  Integer                     :: mloc_mat ,nloc_mat ,mloc_vec ,nloc_vec
  Real(wp), Allocatable   :: A(:,:), x(:,:), b(:,:)

  ! CAN NOT DO ANY MPI UNTILL CALLED MPI_INIT
  Call MPI_Init( ierr )

  Call blacs_pinfo(rank,nprocs)
  dims=0
  Call MPI_Dims_create(nprocs, 2, dims, ierr)
  nprow = dims(1);npcol = dims(2)
  Call blacs_get(0,0,context)
  Call blacs_gridinit(context, 'R', nprow, npcol)
  Call blacs_gridinfo(context, nprow, npcol, local_nprow,local_npcol)

  N = 700

  Allocate(A_global(N,N))

  ! No file, fill with random numbers
!!$  If (rank==0) Open(101,file='A.dat')
!!$  Do i = 1, N
!!$     If (rank==0) Read(101,*) A_global(i,1:N)
!!$     Call MPI_Bcast(A_global(i,1:N), N,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,  ierr)
!!$  Enddo
!!$  If (rank==0) Close(101)
  If( rank == 0 ) Then
     Call Random_number( a_global )
     ! Add something onto the diagonal to hopefully avid horrible condition number
     Do i = 1, n
        a_global( i, i ) = a_global( i, i ) + n
     End Do
  End If
  Call MPI_Bcast(A_global, Size( a_global ),MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,  ierr)

  mloc_mat = numroc(N,block_size,local_nprow,0,nprow)
  nloc_mat = numroc(N,block_size,local_npcol,0, npcol)
  Allocate(A(mloc_mat,nloc_mat))

  Do i = 1, mloc_mat
     Do j = 1,nloc_mat
        A(i,j) = A_global(indxl2g(i,block_size,local_nprow,0, nprow),&
             &indxl2g(j,block_size,local_npcol,0, npcol))
     Enddo
  Enddo

  If (rank==0) Print *, "Read matrix"

  Allocate(b_global(N,1))
  ! No file, fill with random numbers
!!$
!!$  If (rank==0) Then
!!$     Open(103,file='b.dat')
!!$     Do i = 1, N
!!$        Read(103,*) b_global(i,1)
!!$     Enddo
!!$     Close(103)
!!$  Endif
  If( Rank == 0 ) Then
     Call Random_number( b_global )
  End If
  Call MPI_Bcast(b_global, Size( b_global ) ,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,  ierr)

  ! set up scalapack shared matrices
  If (rank==0) Print *, "Matrix broadcasted"
  mloc_vec = numroc(N,block_size,local_nprow,0, nprow)
  nloc_vec = numroc(1,block_size,local_npcol,0, npcol)
  Print *,"Rank", rank, mloc_vec, nloc_vec
  Allocate(b(mloc_vec,nloc_vec))
  Allocate(x(mloc_vec,nloc_vec))

  Do i = 1, mloc_vec
     Do j = 1,nloc_vec 
        b(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
             &indxl2g(j,block_size,local_npcol,0, npcol))
        x(i,j)   = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
             &indxl2g(j,block_size,local_npcol,0, npcol))
     Enddo
  Enddo

  Call descinit(descmat    , N, N, block_size, block_size, 0,0,context,Max(1,mloc_mat),info)
  Call descinit(descvec    , N, 1, block_size, block_size, 0,0,context,Max(1,mloc_vec),info)

  If (rank==0) Print *, "Set up done,solving"

  ! setup done, call in the cavalary
  Call MPI_Barrier(MPI_COMM_WORLD, ierr)
  Call bicgstab_self_sclpk(A,x,N, descvec, descmat,mloc_vec,nloc_vec)
  ! print *,x
  Call MPI_Barrier(MPI_COMM_WORLD,ierr)
  Call blacs_gridexit(context)
!!$  Call blacs_exit(0)
  Call MPI_Finalize( ierr )

End Program test_bicgstab
ian@eris:~/work/stack$ mpif90 -g -Wall -Wextra -O -pedantic -fbacktrace -fcheck=all bicg.f90 -lblacsF77init-openmpi  -lblacs-openmpi -lscalapack-openmpi
bicg.f90:21:63:

     Integer             :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec
                                                               1
Warning: Unused variable ‘info’ declared at (1) [-Wunused-variable]
bicg.f90:23:47:

     Integer             :: i, ierr, rank, maxit
                                               1
Warning: Unused variable ‘maxit’ declared at (1) [-Wunused-variable]
bicg.f90:160:42:

   Character(len=7)            :: char_size
                                          1
Warning: Unused variable ‘char_size’ declared at (1) [-Wunused-variable]
bicg.f90:164:47:

   Integer             :: count_start, count_end,count_rate, dummy_int
                                               1
Warning: Unused variable ‘count_end’ declared at (1) [-Wunused-variable]
bicg.f90:164:58:

   Integer             :: count_start, count_end,count_rate, dummy_int
                                                          1
Warning: Unused variable ‘count_rate’ declared at (1) [-Wunused-variable]
bicg.f90:164:36:

   Integer             :: count_start, count_end,count_rate, dummy_int
                                    1
Warning: Unused variable ‘count_start’ declared at (1) [-Wunused-variable]
bicg.f90:164:69:

   Integer             :: count_start, count_end,count_rate, dummy_int
                                                                     1
Warning: Unused variable ‘dummy_int’ declared at (1) [-Wunused-variable]
bicg.f90:165:33:

   Real(wp)                :: time
                                 1
Warning: Unused variable ‘time’ declared at (1) [-Wunused-variable]
bicg.f90:159:37:

   Character, Parameter        :: UPLO="U"
                                     1
Warning: Unused parameter ‘uplo’ declared at (1) [-Wunused-parameter]
ian@eris:~/work/stack$ mpirun -np 3 ./a.out
 Read matrix
 Matrix broadcasted
 Rank           0         300           1
 Rank           2         200           1
 Set up done,solving
 Rank           1         200           1
           0         300           1
           2         200           1
           1         200           1
 rank           2 was here
 rank           0 was here
 rank           1 was here
 Bi-CG STAB solver converged in iteration #           3   6.1140242983184383E-008
 Bi-CG STAB solver converged in iteration #           3   6.1140242983184383E-008
 End of bicgstab
 Bi-CG STAB solver converged in iteration #           3   6.1140242983184383E-008
 End of bicgstab
 End of bicgstab
ian@eris:~/work/stack$ mpirun -np 4 ./a.out
 Read matrix
 Matrix broadcasted
 Rank           3         300           0
 Rank           0         400           1
 Rank           1         400           0
 Rank           2         300           1
 Set up done,solving
           1         400           0
           3         300           0
           2         300           1
           0         400           1
 Bi-CG STAB solver converged in iteration #           1   0.0000000000000000     
 Bi-CG STAB solver converged in iteration #           1   0.0000000000000000     
 rank           0 was here
 rank           2 was here
^Cian@eris:~/work/stack$