Fortran 提高大型数组写入文件速度的最佳方法
Fortran best way to improve write to file speed for large arrays
我有一个 20,000,000 的非常大的数组,我想将其写入未格式化的文件。
它是一个自相关函数。
在不写入文件的情况下使用-O4 优化编译标志非常快。
但是一旦我写入文件,它似乎需要一天才能完成。
最后是f90程序。以下是未写入文件和写入文件的输出。
显然,写入一个数组的单个元素大约需要 10 毫秒。
20,000,000 x 0.01 = 200,000 秒 = 3,333 分钟 = 55 小时
读文件只需要 45 秒,怎么可能写这么长时间?我可以做些什么来提高速度?
注释
系统:Ubuntu20.04
编译行:fortran -o acorr.exe -O4 acorr.f90
没有文件写入
elapsed time for reading: 43.4389992
Size of Jx: 20000000
Loop Start Time: 43.5009995
correlation time magnitude 1e0 elapsed time: 43.5009995
correlation time magnitude 1e1 elapsed time: 43.5009995
correlation time magnitude 1e2 elapsed time: 43.5009995
correlation time magnitude 1e3 elapsed time: 43.5009995
correlation time magnitude 1e4 elapsed time: 43.5009995
correlation time magnitude 1e5 elapsed time: 43.5009995
correlation time magnitude 1e6 elapsed time: 43.5029984
correlation time magnitude 1e7 elapsed time: 43.5190010
elapsed time: 43.5369987
带文件写入
elapsed time for reading: 43.6349983
Size of Jx: 20000000
Loop Start Time: 43.6949997
correlation time magnitude 1e0 elapsed time: 43.7319984
correlation time magnitude 1e1 elapsed time: 43.8969994
correlation time magnitude 1e2 elapsed time: 45.4980011
correlation time magnitude 1e3 elapsed time: 61.5289993
acorr.f90
PROGRAM acorr
real:: a,b,c,d, sum, mean, var
integer:: i,j, jsize,beginning, rate, end, end1
real, dimension(20000000):: Jx, Jxm, corr
integer:: skip_lines = 4
call system_clock(beginning, rate)
!reading file
open(10, file='DiamHeat.log', status='old')
do i = 1,skip_lines
read(10,*)
end do
do i = 1, 20000000
read(10,*) a, b, Jx(i), c, d
end do
call system_clock(end)
print *, "elapsed time for reading: ", real(end - beginning) / real(rate)
close(10)
!finished reading
open(20, file='acorr.txt', form='UNFORMATTED')
jsize = size(Jx)
print *, "Size of Jx: ", jsize
!print *, dot_product(Jx(10:jsize),Jx(1:jsize-10))
!calculate mean
mean = sum(Jx)/jsize
Jxm(:) = Jx(:)-mean
!calculate variance
var = dot_product(Jxm,Jxm)/jsize
!begin autocorrelation calc
call system_clock(end1)
print *, "Loop Start Time: ", real(end1 - beginning) / real(rate)
do i =0,jsize-1
!calculation
corr(i+1) = dot_product(Jxm(i+1:jsize),Jxm(1:jsize-i))/var/(jsize-i)
!clock timing
if(i == 1) then
call system_clock(end)
print *, "correlation time magnitude 1e0 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 10) then
call system_clock(end)
print *, "correlation time magnitude 1e1 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 100) then
call system_clock(end)
print *, "correlation time magnitude 1e2 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 1000) then
call system_clock(end)
print *, "correlation time magnitude 1e3 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 10000) then
call system_clock(end)
print *, "correlation time magnitude 1e4 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 100000) then
call system_clock(end)
print *, "correlation time magnitude 1e5 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 1000000) then
call system_clock(end)
print *, "correlation time magnitude 1e6 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 10000000) then
call system_clock(end)
print *, "correlation time magnitude 1e7 elapsed time: ", real(end - beginning) / real(rate)
end if
end do
write(20,*) corr
close(20)
call system_clock(end)
print *, "elapsed time: ", real(end - beginning) / real(rate)
END PROGRAM
正如@francescalus 评论的那样,编译器似乎跳过了计算,除非它用于其他目的。
添加
print*, sum corr
循环之后似乎让程序在循环中计算点积。这只需要很长时间,但它正在以最佳容量计算。
再次感谢@francescalus
我有一个 20,000,000 的非常大的数组,我想将其写入未格式化的文件。 它是一个自相关函数。
在不写入文件的情况下使用-O4 优化编译标志非常快。 但是一旦我写入文件,它似乎需要一天才能完成。
最后是f90程序。以下是未写入文件和写入文件的输出。
显然,写入一个数组的单个元素大约需要 10 毫秒。
20,000,000 x 0.01 = 200,000 秒 = 3,333 分钟 = 55 小时
读文件只需要 45 秒,怎么可能写这么长时间?我可以做些什么来提高速度?
注释
系统:Ubuntu20.04
编译行:fortran -o acorr.exe -O4 acorr.f90
没有文件写入
elapsed time for reading: 43.4389992
Size of Jx: 20000000
Loop Start Time: 43.5009995
correlation time magnitude 1e0 elapsed time: 43.5009995
correlation time magnitude 1e1 elapsed time: 43.5009995
correlation time magnitude 1e2 elapsed time: 43.5009995
correlation time magnitude 1e3 elapsed time: 43.5009995
correlation time magnitude 1e4 elapsed time: 43.5009995
correlation time magnitude 1e5 elapsed time: 43.5009995
correlation time magnitude 1e6 elapsed time: 43.5029984
correlation time magnitude 1e7 elapsed time: 43.5190010
elapsed time: 43.5369987
带文件写入
elapsed time for reading: 43.6349983
Size of Jx: 20000000
Loop Start Time: 43.6949997
correlation time magnitude 1e0 elapsed time: 43.7319984
correlation time magnitude 1e1 elapsed time: 43.8969994
correlation time magnitude 1e2 elapsed time: 45.4980011
correlation time magnitude 1e3 elapsed time: 61.5289993
acorr.f90
PROGRAM acorr
real:: a,b,c,d, sum, mean, var
integer:: i,j, jsize,beginning, rate, end, end1
real, dimension(20000000):: Jx, Jxm, corr
integer:: skip_lines = 4
call system_clock(beginning, rate)
!reading file
open(10, file='DiamHeat.log', status='old')
do i = 1,skip_lines
read(10,*)
end do
do i = 1, 20000000
read(10,*) a, b, Jx(i), c, d
end do
call system_clock(end)
print *, "elapsed time for reading: ", real(end - beginning) / real(rate)
close(10)
!finished reading
open(20, file='acorr.txt', form='UNFORMATTED')
jsize = size(Jx)
print *, "Size of Jx: ", jsize
!print *, dot_product(Jx(10:jsize),Jx(1:jsize-10))
!calculate mean
mean = sum(Jx)/jsize
Jxm(:) = Jx(:)-mean
!calculate variance
var = dot_product(Jxm,Jxm)/jsize
!begin autocorrelation calc
call system_clock(end1)
print *, "Loop Start Time: ", real(end1 - beginning) / real(rate)
do i =0,jsize-1
!calculation
corr(i+1) = dot_product(Jxm(i+1:jsize),Jxm(1:jsize-i))/var/(jsize-i)
!clock timing
if(i == 1) then
call system_clock(end)
print *, "correlation time magnitude 1e0 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 10) then
call system_clock(end)
print *, "correlation time magnitude 1e1 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 100) then
call system_clock(end)
print *, "correlation time magnitude 1e2 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 1000) then
call system_clock(end)
print *, "correlation time magnitude 1e3 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 10000) then
call system_clock(end)
print *, "correlation time magnitude 1e4 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 100000) then
call system_clock(end)
print *, "correlation time magnitude 1e5 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 1000000) then
call system_clock(end)
print *, "correlation time magnitude 1e6 elapsed time: ", real(end - beginning) / real(rate)
else if(i == 10000000) then
call system_clock(end)
print *, "correlation time magnitude 1e7 elapsed time: ", real(end - beginning) / real(rate)
end if
end do
write(20,*) corr
close(20)
call system_clock(end)
print *, "elapsed time: ", real(end - beginning) / real(rate)
END PROGRAM
正如@francescalus 评论的那样,编译器似乎跳过了计算,除非它用于其他目的。 添加
print*, sum corr
循环之后似乎让程序在循环中计算点积。这只需要很长时间,但它正在以最佳容量计算。
再次感谢@francescalus