为什么我的程序 return 结果不准确?

Why does my program return inaccurate result?

我最初在 MATLAB 中编写(并优化)它之后,我在 Fortran 中编写了著名的 spectral-norm algorithm。直接转换为 Fortran 后的加速至少为 18X,但问题是 Fortran 程序的输出不准确。正确的输出应该是 1.274224153 但我的 Fortran 程序输出 1.273722712,我在 Fortran 中做错了什么?

program perf_spectralnorm
implicit none
integer, parameter :: n = 5500, dp = kind(0.d0) 
real(dp) :: u(n) = 1, v(n), w(n), vBv, vv, res
integer  :: i, j, nvec(n)

nvec = [(i, i=1,n)]
do i = 1,10
   call Au(w, u)   ! change w
   call Atu(v, w)  ! change v
   call Au(w, v)   ! change w
   call Atu(u, w)  ! change u
end do
vBv = dot_product(u, v) 
vv  = dot_product(v, v)
res = sqrt(vBv/vv)

print '(f12.9)', res

contains 

elemental real(dp) function A(i, j)
   integer, intent(in) :: i, j
   A = 1.0_dp / ((i+j) * (i+j+1.0_dP)/2 + i + 1)
end

subroutine Au(w, u)
   real(dp) :: w(:), u(:)  
   do i = 1,n 
      w(i) = dot_product(A(i-1,nvec-1) , u)  
   end do
end

subroutine Atu(v, w)
   real(dp) :: v(:), w(:)     
   do i = 1,n  
      v(i) = dot_product(A(nvec-1,i-1) , w)       
   end do
end

end program perf_spectralnorm

我在 MATLAB 中正确输出的原始实现如下:

n = 5500; 
fprintf("%.9f\n", perf_spectralnorm(n))

function res = A(i,j) 
    res = 1 ./ ((i+j) .* (i+j+1)/2 + i + 1);
end

function w = Au(u,w)
    n = length(u);
    j = 1:n;
    for i = 1:n         
        w(i) = dot( A(i-1,j-1), u );
    end
end

function v = Atu(w,v)
    n = length(w);
    j = 1:n;
    for i = 1:n         
        v(i) = dot( A(j-1,i-1), w );
    end
end

function res = perf_spectralnorm(n)
    u = ones(n,1);
    v = zeros(n,1);
    w = zeros(n,1);
    for i = 1:10
        w = Au(u,w);
        v = Atu(w,v);
        w = Au(v,w);
        u = Atu(w,u);
    end
    vBv = dot(u,v);
    vv  = dot(v,v);
    res = sqrt(vBv/vv);
end

子例程 AuAtu 使用变量 i 通过主机关联进行循环。修改了主程序中的do-loop变量i,无效。要解决这个问题,您需要在AuAtu 中将i 声明为局部变量。例如,

subroutine Au(w, u)
     real(dp), intent(out) :: w(:)
     real(dp), intent(in)  :: u(:)
     integer i
     do i = 1, n 
        w(i) = dot_product(A(nvec-1,i-1), u)  
     end do
  end

请注意,我冒昧地也包含了虚拟参数的 INTENT