F#:如何正确枚举多个文件?
F#: How to enumerate through multiple files correctly?
我有一堆几个 MiB 大小的文件,非常简单:
- 它们的大小是 8 的倍数
- 它们只包含小端字节序,因此可以使用
BinaryReader
的 ReadDouble()
方法读取
按字典顺序排序时,它们包含按所需顺序排列的所有值。
我无法将所有内容作为 float list
或 float array
保存在内存中,因此我需要一个 float seq
在实际访问时遍历必要的文件。通过序列的部分实际上使用 GetEnumerator()
以命令式方式执行,因为我不希望任何资源泄漏并希望正确关闭所有文件。
我的第一个函数式方法是:
let readFile file =
let rec readReader (maybeReader : BinaryReader option) =
match maybeReader with
| None ->
let openFile() =
printfn "Opening the file"
new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read))
|> Some
|> readReader
seq { yield! openFile() }
| Some reader when reader.BaseStream.Position >= reader.BaseStream.Length ->
printfn "Closing the file"
reader.Dispose()
Seq.empty
| Some reader ->
reader.BaseStream.Position |> printfn "Reading from position %d"
let bytesToRead = Math.Min(1048576L, reader.BaseStream.Length - reader.BaseStream.Position) |> int
let bytes = reader.ReadBytes bytesToRead
let doubles = Array.zeroCreate<float> (bytesToRead / 8)
Buffer.BlockCopy(bytes, 0, doubles, 0, bytesToRead)
seq {
yield! doubles
yield! readReader maybeReader
}
readReader None
然后,当我有一个包含所有文件的 string list
时,我可以这样说:
let values = files |> Seq.collect readFile
use ve = values.GetEnumerator()
// Do stuff that only gets partial data from one file
但是,这只会在 reader 结束时关闭文件(查看函数时很清楚)。因此,作为第二种方法,我强制执行了文件枚举:
type FileEnumerator(file : string) =
let reader = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read))
let mutable _current : float = Double.NaN
do file |> printfn "Enumerator active for %s"
interface IDisposable with
member this.Dispose() =
reader.Dispose()
file |> printfn "Enumerator disposed for %s"
interface IEnumerator with
member this.Current = _current :> obj
member this.Reset() = reader.BaseStream.Position <- 0L
member this.MoveNext() =
let stream = reader.BaseStream
if stream.Position >= stream.Length then false
else
_current <- reader.ReadDouble()
true
interface IEnumerator<float> with
member this.Current = _current
type FileEnumerable(file : string) =
interface IEnumerable with
member this.GetEnumerator() = new FileEnumerator(file) :> IEnumerator
interface IEnumerable<float> with
member this.GetEnumerator() = new FileEnumerator(file) :> IEnumerator<float>
let readFile' file = new FileEnumerable(file) :> float seq
现在,当我说
let values = files |> Seq.collect readFile'
use ve = values.GetEnumerator()
// do stuff with the enumerator
正确处理枚举器会冒泡到我的命令式枚举器。
虽然这是我想要实现的一个可行的解决方案(我可以像第一个函数式方法一样通过分块阅读它来加快它的速度,但为了简洁我没有在这里这样做)我想知道是否有一个真正的避免枚举器中可变状态的功能方法。
当您说使用 GetEnumerator() 将防止资源泄漏并允许正确关闭所有文件时,我不太明白您的意思。以下是我对此的尝试(出于演示目的忽略块复制部分),我认为它会导致文件正确关闭。
let eof (br : BinaryReader) =
br.BaseStream.Position = br.BaseStream.Length
let readFileAsFloats filePath =
seq{
use file = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read)
use reader = new BinaryReader(file)
while (not (eof reader)) do
yield reader.ReadDouble()
}
let readFilesAsFloats filePaths =
filePaths |> Seq.collect readFileAsFloats
let floats = readFilesAsFloats ["D:\floatFile1.txt"; "D:\floatFile2.txt"]
这是你的想法吗?
我有一堆几个 MiB 大小的文件,非常简单:
- 它们的大小是 8 的倍数
- 它们只包含小端字节序,因此可以使用
BinaryReader
的ReadDouble()
方法读取
按字典顺序排序时,它们包含按所需顺序排列的所有值。
我无法将所有内容作为 float list
或 float array
保存在内存中,因此我需要一个 float seq
在实际访问时遍历必要的文件。通过序列的部分实际上使用 GetEnumerator()
以命令式方式执行,因为我不希望任何资源泄漏并希望正确关闭所有文件。
我的第一个函数式方法是:
let readFile file =
let rec readReader (maybeReader : BinaryReader option) =
match maybeReader with
| None ->
let openFile() =
printfn "Opening the file"
new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read))
|> Some
|> readReader
seq { yield! openFile() }
| Some reader when reader.BaseStream.Position >= reader.BaseStream.Length ->
printfn "Closing the file"
reader.Dispose()
Seq.empty
| Some reader ->
reader.BaseStream.Position |> printfn "Reading from position %d"
let bytesToRead = Math.Min(1048576L, reader.BaseStream.Length - reader.BaseStream.Position) |> int
let bytes = reader.ReadBytes bytesToRead
let doubles = Array.zeroCreate<float> (bytesToRead / 8)
Buffer.BlockCopy(bytes, 0, doubles, 0, bytesToRead)
seq {
yield! doubles
yield! readReader maybeReader
}
readReader None
然后,当我有一个包含所有文件的 string list
时,我可以这样说:
let values = files |> Seq.collect readFile
use ve = values.GetEnumerator()
// Do stuff that only gets partial data from one file
但是,这只会在 reader 结束时关闭文件(查看函数时很清楚)。因此,作为第二种方法,我强制执行了文件枚举:
type FileEnumerator(file : string) =
let reader = new BinaryReader(new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read))
let mutable _current : float = Double.NaN
do file |> printfn "Enumerator active for %s"
interface IDisposable with
member this.Dispose() =
reader.Dispose()
file |> printfn "Enumerator disposed for %s"
interface IEnumerator with
member this.Current = _current :> obj
member this.Reset() = reader.BaseStream.Position <- 0L
member this.MoveNext() =
let stream = reader.BaseStream
if stream.Position >= stream.Length then false
else
_current <- reader.ReadDouble()
true
interface IEnumerator<float> with
member this.Current = _current
type FileEnumerable(file : string) =
interface IEnumerable with
member this.GetEnumerator() = new FileEnumerator(file) :> IEnumerator
interface IEnumerable<float> with
member this.GetEnumerator() = new FileEnumerator(file) :> IEnumerator<float>
let readFile' file = new FileEnumerable(file) :> float seq
现在,当我说
let values = files |> Seq.collect readFile'
use ve = values.GetEnumerator()
// do stuff with the enumerator
正确处理枚举器会冒泡到我的命令式枚举器。
虽然这是我想要实现的一个可行的解决方案(我可以像第一个函数式方法一样通过分块阅读它来加快它的速度,但为了简洁我没有在这里这样做)我想知道是否有一个真正的避免枚举器中可变状态的功能方法。
当您说使用 GetEnumerator() 将防止资源泄漏并允许正确关闭所有文件时,我不太明白您的意思。以下是我对此的尝试(出于演示目的忽略块复制部分),我认为它会导致文件正确关闭。
let eof (br : BinaryReader) =
br.BaseStream.Position = br.BaseStream.Length
let readFileAsFloats filePath =
seq{
use file = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read)
use reader = new BinaryReader(file)
while (not (eof reader)) do
yield reader.ReadDouble()
}
let readFilesAsFloats filePaths =
filePaths |> Seq.collect readFileAsFloats
let floats = readFilesAsFloats ["D:\floatFile1.txt"; "D:\floatFile2.txt"]
这是你的想法吗?