读取选定的一行并输入文件
Read a selected a line in and input file
我有一个格式化文件,我想递归随机select一行并读取它。
由于内存问题,无法读取所有数据,将其保存在向量上,以后 select 一次一行。
我用这种方法解决了(只报告了相关代码),但是速度很慢,我想知道是否有人可以帮助我找到最快的方法(我不是 fortran 专家)
编辑:是的,我想多次使用此例程 (~1kk),我正在定义起始参数以供进一步分析
PARAMETER(NLINES=10000000)
REAL ID,E,X,Y,Z,COSX,COSY
SAVE LOO
DATA LFIRST / .TRUE. /
IF ( LFIRST ) THEN
LFIRST = .FALSE.
OPEN(UNIT=88,FILE="../../../gene_rid.txt",STATUS="OLD")
END IF
XI = FLRNDM(XDUMMY)
LINE = INT(XI * DBLE(NLINES)) + 1
DO LOO=1,LINE
READ(88,*,IOSTAT=iostat) ID
END DO
READ(88,*,IOSTAT=iostat) ID,E,
& COSX, COSY, X, Y, Z
REWIND(88)
这是输入文件的格式
head gene_rid.txt
7 0.933549E-03 -.162537E+00 0.136150E-01 -.4791E+01 0.3356E+00 0.2900E+02
7 0.203748E-02 -.115359E+00 -.217682E+00 -.3453E+01 -.6606E+01 0.2900E+02
7 0.289498E-02 0.159572E+00 -.954033E-01 0.4767E+01 -.2730E+01 0.2900E+02
读取文件一次,然后将其写入大量 one-line 文件可能会有所帮助,例如
integer, parameter :: nlines = 1e7
character(100) :: line
character(100) :: filename
integer :: i
open(unit=88, file="../../../gene_rid.txt", status="old")
do i=1,nlines
read(88,'(a)') line
write(filename,'(a,i0)') "gene_rid", i
open(unit=89, file=filename, status="new")
write(89,*) line
close(89)
enddo
close(88)
然后在你的主程序中你只需要打开正确的 one-line 文件并每次读取一行,而不是滚动浏览整个大文件。
如果写入 1e7
one-line 文件对您的文件系统来说太多了,可能值得写入 1e4
thousand-line 文件或类似文件。
鉴于评论中说所有行的长度都相同,我会使用直接访问文件。像下面这样的东西,花一点时间,因为谁知道 OS 正在做什么缓存。
ijb@ijb-Latitude-5410:~/work/stack$ cat rl.f90
Program testit
Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64
Implicit None
Integer, Parameter :: max_file_length = 128
Integer, Parameter :: max_line_length = 8192
Integer, Parameter :: n_lines = 10000000
Integer, Parameter :: n_tests = 1000000
Character( Len = * ), Parameter :: fmt = '( i8, 1x, f14.2 )'
Character( Len = max_file_length ) :: file
Character( Len = max_line_length ) :: line
Real( wp ) :: rand
Real( wp ) :: a
Integer :: start, finish, rate
Integer :: recl
Integer :: unit
Integer :: line_number
Integer :: d
Integer :: i
Logical :: worked
file = 'test.dat'
! Write some test data
Open( newunit = unit, file = file, form = 'formatted' )
Call system_clock( start, rate )
Do i = 1, n_lines
Write( unit, fmt ) i, Real( i, wp )
End Do
Call system_clock( finish, rate )
Write( *, * ) 'Write time ', Real( finish - start, wp ) / rate
Write( *, * ) 'Av. time per write = ', &
Real( finish - start, wp ) / rate / n_lines
Close( unit )
! Read it via direct access
! Work out the record length including the end of record marker
Write( line, fmt ) 1, 1.0_wp
recl = Len_trim( line ) + Len( new_line( 'a' ) )
! Now read the file
Open( newunit = unit, file = file, &
access = 'direct', recl = recl, form = 'formatted' )
worked = .True.
Call system_clock( start, rate )
Do i = 1, n_tests
Call Random_number( rand )
line_number = Int( rand * n_lines ) + 1
Read( unit, fmt, rec = line_number ) d, a
If( d /= line_number ) Then
Write( *, * ) 'Failed to read right line'
worked = .False.
Exit
End If
End Do
Call system_clock( finish, rate )
Close( unit )
If( worked ) Then
Write( *, * )
Write( *, * ) 'Read successful'
Write( *, * ) 'Read time ', Real( finish - start, wp ) / rate
Write( *, * ) 'Av. time per read = ', &
Real( finish - start, wp ) / rate / n_tests
End If
End Program testit
ijb@ijb-Latitude-5410:~/work/stack$ gfortran --version
GNU Fortran (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ijb@ijb-Latitude-5410:~/work/stack$ gfortran -std=f2008 -fcheck=all -Wall -Wextra -g -O rl.f90
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Write time 8.8620000000000001
Av. time per write = 8.8619999999999998E-007
Read successful
Read time 2.3670000000000000
Av. time per read = 2.3670000000000000E-006
ijb@ijb-Latitude-5410:~/work/stack$
我有一个格式化文件,我想递归随机select一行并读取它。 由于内存问题,无法读取所有数据,将其保存在向量上,以后 select 一次一行。
我用这种方法解决了(只报告了相关代码),但是速度很慢,我想知道是否有人可以帮助我找到最快的方法(我不是 fortran 专家)
编辑:是的,我想多次使用此例程 (~1kk),我正在定义起始参数以供进一步分析
PARAMETER(NLINES=10000000)
REAL ID,E,X,Y,Z,COSX,COSY
SAVE LOO
DATA LFIRST / .TRUE. /
IF ( LFIRST ) THEN
LFIRST = .FALSE.
OPEN(UNIT=88,FILE="../../../gene_rid.txt",STATUS="OLD")
END IF
XI = FLRNDM(XDUMMY)
LINE = INT(XI * DBLE(NLINES)) + 1
DO LOO=1,LINE
READ(88,*,IOSTAT=iostat) ID
END DO
READ(88,*,IOSTAT=iostat) ID,E,
& COSX, COSY, X, Y, Z
REWIND(88)
这是输入文件的格式
head gene_rid.txt
7 0.933549E-03 -.162537E+00 0.136150E-01 -.4791E+01 0.3356E+00 0.2900E+02
7 0.203748E-02 -.115359E+00 -.217682E+00 -.3453E+01 -.6606E+01 0.2900E+02
7 0.289498E-02 0.159572E+00 -.954033E-01 0.4767E+01 -.2730E+01 0.2900E+02
读取文件一次,然后将其写入大量 one-line 文件可能会有所帮助,例如
integer, parameter :: nlines = 1e7
character(100) :: line
character(100) :: filename
integer :: i
open(unit=88, file="../../../gene_rid.txt", status="old")
do i=1,nlines
read(88,'(a)') line
write(filename,'(a,i0)') "gene_rid", i
open(unit=89, file=filename, status="new")
write(89,*) line
close(89)
enddo
close(88)
然后在你的主程序中你只需要打开正确的 one-line 文件并每次读取一行,而不是滚动浏览整个大文件。
如果写入 1e7
one-line 文件对您的文件系统来说太多了,可能值得写入 1e4
thousand-line 文件或类似文件。
鉴于评论中说所有行的长度都相同,我会使用直接访问文件。像下面这样的东西,花一点时间,因为谁知道 OS 正在做什么缓存。
ijb@ijb-Latitude-5410:~/work/stack$ cat rl.f90
Program testit
Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64
Implicit None
Integer, Parameter :: max_file_length = 128
Integer, Parameter :: max_line_length = 8192
Integer, Parameter :: n_lines = 10000000
Integer, Parameter :: n_tests = 1000000
Character( Len = * ), Parameter :: fmt = '( i8, 1x, f14.2 )'
Character( Len = max_file_length ) :: file
Character( Len = max_line_length ) :: line
Real( wp ) :: rand
Real( wp ) :: a
Integer :: start, finish, rate
Integer :: recl
Integer :: unit
Integer :: line_number
Integer :: d
Integer :: i
Logical :: worked
file = 'test.dat'
! Write some test data
Open( newunit = unit, file = file, form = 'formatted' )
Call system_clock( start, rate )
Do i = 1, n_lines
Write( unit, fmt ) i, Real( i, wp )
End Do
Call system_clock( finish, rate )
Write( *, * ) 'Write time ', Real( finish - start, wp ) / rate
Write( *, * ) 'Av. time per write = ', &
Real( finish - start, wp ) / rate / n_lines
Close( unit )
! Read it via direct access
! Work out the record length including the end of record marker
Write( line, fmt ) 1, 1.0_wp
recl = Len_trim( line ) + Len( new_line( 'a' ) )
! Now read the file
Open( newunit = unit, file = file, &
access = 'direct', recl = recl, form = 'formatted' )
worked = .True.
Call system_clock( start, rate )
Do i = 1, n_tests
Call Random_number( rand )
line_number = Int( rand * n_lines ) + 1
Read( unit, fmt, rec = line_number ) d, a
If( d /= line_number ) Then
Write( *, * ) 'Failed to read right line'
worked = .False.
Exit
End If
End Do
Call system_clock( finish, rate )
Close( unit )
If( worked ) Then
Write( *, * )
Write( *, * ) 'Read successful'
Write( *, * ) 'Read time ', Real( finish - start, wp ) / rate
Write( *, * ) 'Av. time per read = ', &
Real( finish - start, wp ) / rate / n_tests
End If
End Program testit
ijb@ijb-Latitude-5410:~/work/stack$ gfortran --version
GNU Fortran (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ijb@ijb-Latitude-5410:~/work/stack$ gfortran -std=f2008 -fcheck=all -Wall -Wextra -g -O rl.f90
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Write time 8.8620000000000001
Av. time per write = 8.8619999999999998E-007
Read successful
Read time 2.3670000000000000
Av. time per read = 2.3670000000000000E-006
ijb@ijb-Latitude-5410:~/work/stack$