运行 仅针对 Fortran 中指定的整数集循环
Running loops only for specified sets of integers in fortran
do i=1,n
s=0
do l=1,n
do m=1,n
s=s-a(i,l,m)*q0(l)*q0(m)
end do
end do
f0(i)=s-g(i)*q0(i)
end do
这是我的代码的一部分。由于我必须同时 运行 三个循环,整体执行变得非常慢。
重要的事实是,在这里,数组 a(i,l,m) 仅对于 a(l,m,n) 的一组值是非零的。下面是设置 a(i,l,m) 的代码。
do i=1,n
do l=1,n
do m=1,n
if(i.eq.l+m .or. i.eq.-l+m .or. i.eq.l-m) then
a1=1
else
a1=0
end if
if(i+l+m.eq.n+n+2 .or. i-l+m.eq.n+n+2 .or. i+l-m.eq.n+n+2 .or. i-l-m.eq.n+n+2) then
b1=1
else
b1=0
end if
a(i,l,m)=(a1-b1) (!multiplied with some long function, erased for ease of understanding)
end do
end do
end do
现在,在 fortran 中有什么方法可以 运行 循环只针对 a(i,l,m) 不为零的 (i,l,m) 的值?(非零集只在计算中起作用,可以看出)这将节省大量时间。
Fortran 和类似的大量其他编程语言具有跳转语句,允许人们在标准循环控制之外操纵循环迭代。在 Fortran 中,这些语句是 CYCLE 和 EXIT:
CYCLE statement: Execution of a loop iteration can be curtailed by executing a CYCLE statement that belongs to the construct
EXIT statement: The EXIT statement provides one way of terminating a loop, or completing execution of another construct.
使用这些构造,当特定索引与计算无关时,现在可以快速循环循环。在 OP 的情况下,可以做类似的事情:
do i=1,n
s=0
do l=1,n
do m=1,n
if (a(i,l,m) == 0) cycle
s=s-a(i,l,m)*q0(l)*q0(m)
end do
end do
f0(i)=s-g(i)*q0(i)
end do
当然,您应该始终考虑到这仍然是一个复杂度为 O(n^3) 的问题。
然而,关于如何构建 3d 数组的更多信息 a
。由于 a(i,l,m) = a1 - b1
和 a1
和 b1
只能根据条件具有值 0
或 1
,因此元素 a(i,l,m)
不同于如果仅满足其中一个条件,则为 0。现在很容易检查是否满足第一个条件:
i == l+m .or. i == -l+m .or. i == l-m
第二个条件从未满足:
i+l+m == 2*n+2 .or. i-l+m == 2*n+2 .or. i+l-m == 2*n+2 .or. i-l-m == 2*n+2
所以只能同时满足其中一个条件。这给了你一些额外的杠杆来加快速度并删除内部循环使得这个 O(n^2):
do i=1,n
s=0
do l=1,n
m=i-l
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=i+l
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=l-i
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=2*n+2-i-l
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=2*n+2-i+l
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=-(2*n+2-i-l)
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=-(2*n+2-i+l)
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
end do
f0(i)=s-g(i)*q0(i)
end do
肯定还有进一步改进的可能。
你不需要存储所有的数组,你可以在需要的时候计算它的元素,并使用守卫(额外的数组元素来避免越界索引)来避免 if 条件。这是您可能会做的,将问题减少到 O(n^2) 并使用更少的内存。另请注意,我提供了一个完整的测试程序,这使得回答问题变得非常非常容易 - 以后请自己动手!
ijb@ianbushdesktop ~/work/stack $ cat o3.f90
Program o3
Implicit None
Integer, Parameter :: wp = Selected_real_kind( 12, 70 )
Real( wp ), Dimension( :, :, : ), Allocatable :: a
Real( wp ), Dimension( : ), Allocatable :: q0, g, f0, s2
Real( wp ) :: a1, b1
Real( wp ) :: s
Integer :: n
Integer :: start, finish, rate
Integer :: i, l, m
Write( *, * ) 'n ?'
Read ( *, * ) n
Allocate( a( 1:n, 1:n, 1:n ) )
Allocate( q0( 1:n ) )
Allocate( f0( 1:n ) )
Allocate( g( 1:n ) )
Allocate( s2( -4 * n - 2:4 * n + 2 ) ) ! guards to avoid out of bounds - haven't thought very carefully about
! what they should be!!
Call Random_number( q0 )
Call Random_number( g )
Call system_clock( start , rate )
b1 = 0.0_wp
do i=1,n
do l=1,n
do m=1,n
if(i.eq.l+m .or. i.eq.-l+m .or. i.eq.l-m) then
a1=1.0_wp
else
a1=0.0_wp
end if
if(i+l+m.eq.n+n+2 .or. i-l+m.eq.n+n+2 .or. i+l-m.eq.n+n+2 .or. i-l-m.eq.n+n+2) then
b1=1.0_wp
else
b1=0.0_wp
end if
a(i,l,m)=(a1-b1) !(multiplied with some long function, erased for ease of understanding)
end do
end do
end do
do i=1,n
s=0.0_wp
do l=1,n
do m=1,n
s=s-a(i,l,m)*q0(l)*q0(m)
end do
end do
f0(i)=s-g(i)*q0(i)
end do
Call system_clock( finish, rate )
Write( *, * ) 'Sum f0, time: ', Sum( f0 ), Real( finish - start ) / rate
Call system_clock( start , rate )
s2 = 0.0_wp
Do l = 1, n
Do m = 1, n
! First condition
i = l + m
a1 = 1.0_wp
s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )
! Second condition
i = - l + m
a1 = 1.0_wp
s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )
! Third condition
i = l - m
a1 = 1.0_wp
s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )
! Fourth Condition
i = 2 * n + 2 - l - m
b1 = 1.0_wp
s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )
! Fifth Condition
i = 2 * n + 2 + l - m
b1 = 1.0_wp
s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )
! Sixth Condition
i = 2 * n + 2 - l + m
b1 = 1.0_wp
s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )
! Seventh Condition
i = 2 * n + 2 + l + m
b1 = 1.0_wp
s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )
End Do
End Do
Do i = 1, n
f0( i ) = s2( i ) - g( i ) * q0( i )
End Do
Call system_clock( finish, rate )
Write( *, * ) 'Sum f0, time: ', Sum( f0 ), Real( finish - start ) / rate
End Program o3
ijb@ianbushdesktop ~/work/stack $ gfortran --version
GNU Fortran (GCC) 7.4.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ijb@ianbushdesktop ~/work/stack $ gfortran -Wall -Wextra -std=f2008 -fcheck=all -O o3.f90
ijb@ianbushdesktop ~/work/stack $ ./a.out
n ?
300
Sum f0, time: -23660.711846511185 0.446999997
Sum f0, time: -23660.711846511185 1.00000005E-03
ijb@ianbushdesktop ~/work/stack $ gfortran -Wall -Wextra -std=f2008 -O3 o3.f90
ijb@ianbushdesktop ~/work/stack $ ./a.out
n ?
300
Sum f0, time: -21932.467299817898 0.298999995
Sum f0, time: -21932.467299817898 0.00000000
ijb@ianbushdesktop ~/work/stack $ ./a.out
n ?
1000
Sum f0, time: -238036.00437753636 52.4760017
Sum f0, time: -238036.00437753636 2.00000009E-03
ijb@ianbushdesktop ~/work/stack $
do i=1,n
s=0
do l=1,n
do m=1,n
s=s-a(i,l,m)*q0(l)*q0(m)
end do
end do
f0(i)=s-g(i)*q0(i)
end do
这是我的代码的一部分。由于我必须同时 运行 三个循环,整体执行变得非常慢。
重要的事实是,在这里,数组 a(i,l,m) 仅对于 a(l,m,n) 的一组值是非零的。下面是设置 a(i,l,m) 的代码。
do i=1,n
do l=1,n
do m=1,n
if(i.eq.l+m .or. i.eq.-l+m .or. i.eq.l-m) then
a1=1
else
a1=0
end if
if(i+l+m.eq.n+n+2 .or. i-l+m.eq.n+n+2 .or. i+l-m.eq.n+n+2 .or. i-l-m.eq.n+n+2) then
b1=1
else
b1=0
end if
a(i,l,m)=(a1-b1) (!multiplied with some long function, erased for ease of understanding)
end do
end do
end do
现在,在 fortran 中有什么方法可以 运行 循环只针对 a(i,l,m) 不为零的 (i,l,m) 的值?(非零集只在计算中起作用,可以看出)这将节省大量时间。
Fortran 和类似的大量其他编程语言具有跳转语句,允许人们在标准循环控制之外操纵循环迭代。在 Fortran 中,这些语句是 CYCLE 和 EXIT:
CYCLE statement: Execution of a loop iteration can be curtailed by executing a CYCLE statement that belongs to the construct
EXIT statement: The EXIT statement provides one way of terminating a loop, or completing execution of another construct.
使用这些构造,当特定索引与计算无关时,现在可以快速循环循环。在 OP 的情况下,可以做类似的事情:
do i=1,n
s=0
do l=1,n
do m=1,n
if (a(i,l,m) == 0) cycle
s=s-a(i,l,m)*q0(l)*q0(m)
end do
end do
f0(i)=s-g(i)*q0(i)
end do
当然,您应该始终考虑到这仍然是一个复杂度为 O(n^3) 的问题。
然而,关于如何构建 3d 数组的更多信息 a
。由于 a(i,l,m) = a1 - b1
和 a1
和 b1
只能根据条件具有值 0
或 1
,因此元素 a(i,l,m)
不同于如果仅满足其中一个条件,则为 0。现在很容易检查是否满足第一个条件:
i == l+m .or. i == -l+m .or. i == l-m
第二个条件从未满足:
i+l+m == 2*n+2 .or. i-l+m == 2*n+2 .or. i+l-m == 2*n+2 .or. i-l-m == 2*n+2
所以只能同时满足其中一个条件。这给了你一些额外的杠杆来加快速度并删除内部循环使得这个 O(n^2):
do i=1,n
s=0
do l=1,n
m=i-l
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=i+l
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=l-i
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=2*n+2-i-l
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=2*n+2-i+l
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=-(2*n+2-i-l)
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
m=-(2*n+2-i+l)
if (m > 0 .and. m <= n) s=s-a(i,l,m)*q0(l)*q0(m)
end do
f0(i)=s-g(i)*q0(i)
end do
肯定还有进一步改进的可能。
你不需要存储所有的数组,你可以在需要的时候计算它的元素,并使用守卫(额外的数组元素来避免越界索引)来避免 if 条件。这是您可能会做的,将问题减少到 O(n^2) 并使用更少的内存。另请注意,我提供了一个完整的测试程序,这使得回答问题变得非常非常容易 - 以后请自己动手!
ijb@ianbushdesktop ~/work/stack $ cat o3.f90
Program o3
Implicit None
Integer, Parameter :: wp = Selected_real_kind( 12, 70 )
Real( wp ), Dimension( :, :, : ), Allocatable :: a
Real( wp ), Dimension( : ), Allocatable :: q0, g, f0, s2
Real( wp ) :: a1, b1
Real( wp ) :: s
Integer :: n
Integer :: start, finish, rate
Integer :: i, l, m
Write( *, * ) 'n ?'
Read ( *, * ) n
Allocate( a( 1:n, 1:n, 1:n ) )
Allocate( q0( 1:n ) )
Allocate( f0( 1:n ) )
Allocate( g( 1:n ) )
Allocate( s2( -4 * n - 2:4 * n + 2 ) ) ! guards to avoid out of bounds - haven't thought very carefully about
! what they should be!!
Call Random_number( q0 )
Call Random_number( g )
Call system_clock( start , rate )
b1 = 0.0_wp
do i=1,n
do l=1,n
do m=1,n
if(i.eq.l+m .or. i.eq.-l+m .or. i.eq.l-m) then
a1=1.0_wp
else
a1=0.0_wp
end if
if(i+l+m.eq.n+n+2 .or. i-l+m.eq.n+n+2 .or. i+l-m.eq.n+n+2 .or. i-l-m.eq.n+n+2) then
b1=1.0_wp
else
b1=0.0_wp
end if
a(i,l,m)=(a1-b1) !(multiplied with some long function, erased for ease of understanding)
end do
end do
end do
do i=1,n
s=0.0_wp
do l=1,n
do m=1,n
s=s-a(i,l,m)*q0(l)*q0(m)
end do
end do
f0(i)=s-g(i)*q0(i)
end do
Call system_clock( finish, rate )
Write( *, * ) 'Sum f0, time: ', Sum( f0 ), Real( finish - start ) / rate
Call system_clock( start , rate )
s2 = 0.0_wp
Do l = 1, n
Do m = 1, n
! First condition
i = l + m
a1 = 1.0_wp
s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )
! Second condition
i = - l + m
a1 = 1.0_wp
s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )
! Third condition
i = l - m
a1 = 1.0_wp
s2( i ) = s2( i ) - a1 * q0( l ) * q0( m )
! Fourth Condition
i = 2 * n + 2 - l - m
b1 = 1.0_wp
s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )
! Fifth Condition
i = 2 * n + 2 + l - m
b1 = 1.0_wp
s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )
! Sixth Condition
i = 2 * n + 2 - l + m
b1 = 1.0_wp
s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )
! Seventh Condition
i = 2 * n + 2 + l + m
b1 = 1.0_wp
s2( i ) = s2( i ) - ( - b1 ) * q0( l ) * q0( m )
End Do
End Do
Do i = 1, n
f0( i ) = s2( i ) - g( i ) * q0( i )
End Do
Call system_clock( finish, rate )
Write( *, * ) 'Sum f0, time: ', Sum( f0 ), Real( finish - start ) / rate
End Program o3
ijb@ianbushdesktop ~/work/stack $ gfortran --version
GNU Fortran (GCC) 7.4.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ijb@ianbushdesktop ~/work/stack $ gfortran -Wall -Wextra -std=f2008 -fcheck=all -O o3.f90
ijb@ianbushdesktop ~/work/stack $ ./a.out
n ?
300
Sum f0, time: -23660.711846511185 0.446999997
Sum f0, time: -23660.711846511185 1.00000005E-03
ijb@ianbushdesktop ~/work/stack $ gfortran -Wall -Wextra -std=f2008 -O3 o3.f90
ijb@ianbushdesktop ~/work/stack $ ./a.out
n ?
300
Sum f0, time: -21932.467299817898 0.298999995
Sum f0, time: -21932.467299817898 0.00000000
ijb@ianbushdesktop ~/work/stack $ ./a.out
n ?
1000
Sum f0, time: -238036.00437753636 52.4760017
Sum f0, time: -238036.00437753636 2.00000009E-03
ijb@ianbushdesktop ~/work/stack $