运行 仅针对 Fortran 中指定的整数集循环

Running loops only for specified sets of integers in fortran

        do i=1,n
                s=0
                do l=1,n
                do m=1,n


                    s=s-a(i,l,m)*q0(l)*q0(m)

                end do
                end do


                f0(i)=s-g(i)*q0(i)
        end do

这是我的代码的一部分。由于我必须同时 运行 三个循环,整体执行变得非常慢。

重要的事实是,在这里,数组 a(i,l,m) 仅对于 a(l,m,n) 的一组值是非零的。下面是设置 a(i,l,m) 的代码。

do i=1,n
    do l=1,n
        do m=1,n

        if(i.eq.l+m .or. i.eq.-l+m .or. i.eq.l-m) then
        a1=1
        else 
        a1=0
        end if



        if(i+l+m.eq.n+n+2 .or. i-l+m.eq.n+n+2 .or. i+l-m.eq.n+n+2 .or. i-l-m.eq.n+n+2) then
        b1=1
        else
        b1=0
        end if

    a(i,l,m)=(a1-b1)    (!multiplied with some long function, erased for ease of understanding)

end do
    end do
        end do

现在,在 fortran 中有什么方法可以 运行 循环只针对 a(i,l,m) 不为零的 (i,l,m) 的值?(非零集只在计算中起作用,可以看出)这将节省大量时间。

Fortran 和类似的大量其他编程语言具有跳转语句,允许人们在标准循环控制之外操纵循环迭代。在 Fortran 中,这些语句是 CYCLE 和 EXIT:

CYCLE statement: Execution of a loop iteration can be curtailed by executing a CYCLE statement that belongs to the construct
EXIT statement: The EXIT statement provides one way of terminating a loop, or completing execution of another construct.

使用这些构造,当特定索引与计算无关时,现在可以快速循环循环。在 OP 的情况下,可以做类似的事情:

do i=1,n
   s=0
   do l=1,n
      do m=1,n
         if (a(i,l,m) == 0) cycle
         s=s-a(i,l,m)*q0(l)*q0(m)
      end do
   end do
   f0(i)=s-g(i)*q0(i)
end do

当然,您应该始终考虑到这仍然是一个复杂度为 O(n^3) 的问题。

然而,关于如何构建 3d 数组的更多信息 a。由于 a(i,l,m) = a1 - b1a1b1 只能根据条件具有值 01,因此元素 a(i,l,m) 不同于如果仅满足其中一个条件,则为 0。现在很容易检查是否满足第一个条件:

i == l+m .or. i == -l+m .or. i == l-m

第二个条件从未满足:

i+l+m == 2*n+2 .or. i-l+m == 2*n+2 .or. i+l-m == 2*n+2 .or. i-l-m == 2*n+2

所以只能同时满足其中一个条件。这给了你一些额外的杠杆来加快速度并删除内部循环使得这个 O(n^2):

do i=1,n
  s=0
  do l=1,n
     m=i-l
     if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
     m=i+l
     if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
     m=l-i
     if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
     m=2*n+2-i-l
     if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
     m=2*n+2-i+l
     if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
     m=-(2*n+2-i-l)
     if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
     m=-(2*n+2-i+l)
     if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
  end do
  f0(i)=s-g(i)*q0(i)
end do

肯定还有进一步改进的可能。

你不需要存储所有的数组,你可以在需要的时候计算它的元素,并使用守卫(额外的数组元素来避免越界索引)来避免 if 条件。这是您可能会做的,将问题减少到 O(n^2) 并使用更少的内存。另请注意,我提供了一个完整的测试程序,这使得回答问题变得非常非常容易 - 以后请自己动手!

ijb@ianbushdesktop ~/work/stack $ cat o3.f90
Program o3

  Implicit None

  Integer, Parameter :: wp = Selected_real_kind( 12, 70 )

  Real( wp ), Dimension( :, :, : ), Allocatable :: a

  Real( wp ), Dimension( : ), Allocatable :: q0, g, f0, s2

  Real( wp ) :: a1, b1
  Real( wp ) :: s

  Integer :: n
  Integer :: start, finish, rate
  Integer :: i, l, m

  Write( *, * ) 'n ?'
  Read ( *, * ) n 

  Allocate( a( 1:n, 1:n, 1:n ) )
  Allocate( q0( 1:n ) )
  Allocate( f0( 1:n ) )
  Allocate(  g( 1:n ) )
  Allocate( s2( -4 * n - 2:4 * n + 2 ) ) ! guards to avoid out of bounds - haven't thought very carefully about
                                         ! what they should be!!

  Call Random_number( q0 )
  Call Random_number( g )

  Call system_clock( start , rate )
  b1 = 0.0_wp
  do i=1,n
     do l=1,n
        do m=1,n

           if(i.eq.l+m .or. i.eq.-l+m .or. i.eq.l-m) then
              a1=1.0_wp
           else 
              a1=0.0_wp
           end if

           if(i+l+m.eq.n+n+2 .or. i-l+m.eq.n+n+2 .or. i+l-m.eq.n+n+2 .or. i-l-m.eq.n+n+2) then
              b1=1.0_wp
           else
              b1=0.0_wp
           end if

           a(i,l,m)=(a1-b1)    !(multiplied with some long function, erased for ease of understanding)

        end do
     end do
  end do
  do i=1,n
     s=0.0_wp
     do l=1,n
        do m=1,n

           s=s-a(i,l,m)*q0(l)*q0(m)

        end do
     end do
     f0(i)=s-g(i)*q0(i)
  end do
  Call system_clock( finish, rate )
  Write( *, * ) 'Sum f0, time: ', Sum( f0 ), Real( finish - start ) / rate

  Call system_clock( start , rate )
  s2 = 0.0_wp
  Do l = 1, n
     Do m = 1, n

        ! First condition
        i = l + m
        a1 = 1.0_wp
        s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )

        ! Second condition
        i = - l + m
        a1 = 1.0_wp
        s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )

        ! Third condition
        i = l - m
        a1 = 1.0_wp
        s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )

        ! Fourth Condition
        i = 2 * n + 2 - l - m
        b1 = 1.0_wp
        s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )

        ! Fifth Condition
        i = 2 * n + 2 + l - m
        b1 = 1.0_wp
        s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )


        ! Sixth Condition
        i = 2 * n + 2 - l + m
        b1 = 1.0_wp
        s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )

        ! Seventh Condition
        i = 2 * n + 2 + l + m
        b1 = 1.0_wp
        s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )


     End Do
  End Do
  Do i = 1, n
     f0( i ) = s2( i ) - g( i ) * q0( i )
  End Do
  Call system_clock( finish, rate )
  Write( *, * ) 'Sum f0, time: ', Sum( f0 ), Real( finish - start ) / rate
End Program o3
ijb@ianbushdesktop ~/work/stack $ gfortran --version
GNU Fortran (GCC) 7.4.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

ijb@ianbushdesktop ~/work/stack $ gfortran -Wall -Wextra -std=f2008 -fcheck=all -O o3.f90
ijb@ianbushdesktop ~/work/stack $ ./a.out
 n ?
300
 Sum f0, time:   -23660.711846511185       0.446999997    
 Sum f0, time:   -23660.711846511185        1.00000005E-03
ijb@ianbushdesktop ~/work/stack $ gfortran -Wall -Wextra -std=f2008 -O3 o3.f90
ijb@ianbushdesktop ~/work/stack $ ./a.out
 n ?
300
 Sum f0, time:   -21932.467299817898       0.298999995    
 Sum f0, time:   -21932.467299817898        0.00000000    
ijb@ianbushdesktop ~/work/stack $ ./a.out
 n ?
1000
 Sum f0, time:   -238036.00437753636        52.4760017    
 Sum f0, time:   -238036.00437753636        2.00000009E-03
ijb@ianbushdesktop ~/work/stack $