读取选定的一行并输入文件

Read a selected a line in and input file

我有一个格式化文件,我想递归随机select一行并读取它。 由于内存问题,无法读取所有数据,将其保存在向量上,以后 select 一次一行。

我用这种方法解决了(只报告了相关代码),但是速度很慢,我想知道是否有人可以帮助我找到最快的方法(我不是 fortran 专家)

编辑:是的,我想多次使用此例程 (~1kk),我正在定义起始参数以供进一步分析

  PARAMETER(NLINES=10000000)
  REAL ID,E,X,Y,Z,COSX,COSY
  SAVE LOO
  DATA LFIRST / .TRUE. /
  
  IF ( LFIRST ) THEN
  LFIRST = .FALSE.
  OPEN(UNIT=88,FILE="../../../gene_rid.txt",STATUS="OLD")
  END IF

  XI = FLRNDM(XDUMMY)
  LINE = INT(XI * DBLE(NLINES)) + 1

  DO LOO=1,LINE
     READ(88,*,IOSTAT=iostat) ID
  END DO
  READ(88,*,IOSTAT=iostat) ID,E,
 &        COSX, COSY, X, Y, Z
  REWIND(88)

这是输入文件的格式

head gene_rid.txt 
  7  0.933549E-03  -.162537E+00  0.136150E-01   -.4791E+01   0.3356E+00   0.2900E+02
  7  0.203748E-02  -.115359E+00  -.217682E+00   -.3453E+01   -.6606E+01   0.2900E+02
  7  0.289498E-02  0.159572E+00  -.954033E-01   0.4767E+01   -.2730E+01   0.2900E+02

读取文件一次,然后将其写入大量 one-line 文件可能会有所帮助,例如

integer, parameter :: nlines = 1e7
character(100) :: line
character(100) :: filename

integer :: i

open(unit=88, file="../../../gene_rid.txt", status="old")
do i=1,nlines
  read(88,'(a)') line
  write(filename,'(a,i0)') "gene_rid", i
  open(unit=89, file=filename, status="new")
  write(89,*) line
  close(89)
enddo
close(88)

然后在你的主程序中你只需要打开正确的 one-line 文件并每次读取一行,而不是滚动浏览整个大文件。

如果写入 1e7 one-line 文件对您的文件系统来说太多了,可能值得写入 1e4 thousand-line 文件或类似文件。

鉴于评论中说所有行的长度都相同,我会使用直接访问文件。像下面这样的东西,花一点时间,因为谁知道 OS 正在做什么缓存。

ijb@ijb-Latitude-5410:~/work/stack$ cat rl.f90
Program testit

  Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64

  Implicit None

  Integer, Parameter :: max_file_length =  128
  Integer, Parameter :: max_line_length = 8192
  Integer, Parameter :: n_lines         = 10000000
  Integer, Parameter :: n_tests         = 1000000

  Character( Len = * ), Parameter :: fmt = '( i8, 1x, f14.2 )'
  
  Character( Len = max_file_length ) :: file
  Character( Len = max_line_length ) :: line

  Real( wp ) :: rand
  Real( wp ) :: a

  Integer :: start, finish, rate
  
  Integer :: recl
  Integer :: unit
  Integer :: line_number
  Integer :: d
  Integer :: i

  Logical :: worked

  file = 'test.dat'

  ! Write some test data
  Open( newunit = unit, file = file, form = 'formatted' )
  Call system_clock( start, rate )
  Do  i = 1, n_lines
     Write( unit, fmt ) i, Real( i, wp )
  End Do
  Call system_clock( finish, rate )
  Write( *, * ) 'Write time ', Real( finish - start, wp ) / rate 
  Write( *, * ) 'Av. time per write = ', &
       Real( finish - start, wp ) / rate / n_lines
  Close( unit )

  ! Read it via direct access
  ! Work out the record length including the end of record marker
  Write( line, fmt ) 1, 1.0_wp
  recl = Len_trim( line ) + Len( new_line( 'a' ) )
  ! Now read the file
  Open( newunit = unit, file = file, &
       access = 'direct', recl = recl, form = 'formatted' )
  worked = .True.
  Call system_clock( start, rate )
  Do i = 1, n_tests
     Call Random_number( rand )
     line_number = Int( rand * n_lines ) + 1 
     Read( unit, fmt, rec = line_number ) d, a
     If( d /= line_number ) Then
        Write( *, * ) 'Failed to read right line'
        worked = .False.
        Exit
     End If
  End Do
  Call system_clock( finish, rate )
  Close( unit )

  If( worked ) Then
     Write( *, * )
     Write( *, * ) 'Read successful'
     Write( *, * ) 'Read time ', Real( finish - start, wp ) / rate 
     Write( *, * ) 'Av. time per read = ', &
          Real( finish - start, wp ) / rate / n_tests
  End If
  
End Program testit
ijb@ijb-Latitude-5410:~/work/stack$ gfortran --version
GNU Fortran (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

ijb@ijb-Latitude-5410:~/work/stack$ gfortran -std=f2008 -fcheck=all -Wall -Wextra -g -O rl.f90 
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Write time    8.8620000000000001     
 Av. time per write =    8.8619999999999998E-007

 Read successful
 Read time    2.3670000000000000     
 Av. time per read =    2.3670000000000000E-006
ijb@ijb-Latitude-5410:~/work/stack$