C# FileStream.Read 没有读取最后一个块

C# FileStream.Read doesn't read last block

我将二进制文件逐块读取为十六进制。

用FileStream.Read和File.ReadAllBytes

不一样
  1. FileSteram.Read

    int limit = 0;
    if (openFileDlg.FileName.Length > 0)
    {
        fileName = openFileDlg.FileName;
        FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
        fsLen = (int)fs.Length;
        int count = 0;
        limit = 100;
        byte[] read_buff = new byte[limit];
        StringBuilder sb = new StringBuilder();
        while ( (count = fs.Read(read_buff, 0, limit)) > 0)
        {
             foreach (byte b in read_buff)
             {
                sb.Append(Convert.ToString(b, 16).PadLeft(2, '0'));
             }
    
        }
        rtxb_bin.AppendText(sb.ToString() + "\n");
    }
    
  2. File.ReadAllBytes

    if (openFileDlg.FileName.Length > 0)
    {
            fileName = openFileDlg.FileName;
            byte[] fileBytes = File.ReadAllBytes(fileName);
            StringBuilder sb2 = new StringBuilder();
    
            foreach (byte b2 in fileBytes)
            {
                sb2.Append(Convert.ToString(b2, 16).PadLeft(2, '0'));
            }
            rtxb_allbin.AppendText(sb2.ToString());
     }
    

案例 1,结果是...

    ........04c0020f00452a00421346108129844f2138448500208020250405250043188510812e0

案例 2 是

      .......04c0020f00452a00421346108129844f2138448500208020250405250043188510812e044f212cc48120c24125404f2069c2c0008bff35f8f401efbd17047

FileStream.Read 在“12e0”之后没有读取 缺少“44f212cc48120c24125404f2069c2c0008bff35f8f401efbd17047”

如何使用 FileStream.Read 读取所有字节?

为什么 FileStream.Read 不读取最后一个块?

您需要更新偏移量和计数。

Sintaxis

public override int Read(
    byte[] array,
    int offset,
    int count
)

Example

public static byte[] ReadFile(string filePath)
{
  byte[] buffer;
  FileStream fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read);
  try
  {
    int length = (int)fileStream.Length;  // get file length
    buffer = new byte[length];            // create buffer
    int count;                            // actual number of bytes read
    int sum = 0;                          // total number of bytes read

    // read until Read method returns 0 (end of the stream has been reached)
    while ((count = fileStream.Read(buffer, sum, length - sum)) > 0)
      sum += count;  // sum is a buffer offset for next reading
  }
  finally
  {
    fileStream.Close();
  }
  return buffer;
}

Reference

很可能您觉得它没有读取最后一个块。假设您有长度为 102 的文件。循环的第一次迭代读取前 100 个字节,一切都很好。但是第二个(最后一个)会发生什么?您将两个字节读入长度为 100 的 read_buff。现在缓冲区包含最后一个块的 2 个字节和前一个(第一个)块的 98 个字节,因为 Read 不会清除缓冲区。然后你继续:

 foreach (byte b in read_buff)
 {
    sb.Append(Convert.ToString(b, 16).PadLeft(2, '0'));
 }

结果,sb 第一个块有 100 个字节,最后一个块有 2 个字节,然后第一个块有 98 个字节。如果你不仔细看,它可能看起来只是跳过了最后一个块,而实际上它重复了前一个块的一部分。

要修复,请使用 count(表示实际读入缓冲区的字节数)仅适用于 read_buff:

的有效部分
for (int i = 0; i < count; i++) {
    sb.Append(Convert.ToString(read_buff[i], 16).PadLeft(2, '0'));
}
   public static void ReadAndProcessLargeFile(string theFilename, long whereToStartReading = 0)
    {
        FileInfo info = new FileInfo(theFilename);
        long fileLength = info.Length;
        long timesToRead = (fileLength / megabyte);
        long ctr = 0;
        long timesRead = 0;

        FileStream fileStram = new FileStream(theFilename, FileMode.Open, FileAccess.Read);
        using (fileStram)
        {
            byte[] buffer = new byte[megabyte];

            fileStram.Seek(whereToStartReading, SeekOrigin.Begin);

            int bytesRead = 0;
            
            //bytesRead = fileStram.Read(buffer, 0, megabyte);
            //ctr = ctr + 1;

            while ((bytesRead = fileStram.Read(buffer, 0, megabyte)) > 0)
            {
                ProcessChunk(buffer, bytesRead);
                buffer = new byte[megabyte]; // This solves last read prob
            }

        }
    }

    private static void ProcessChunk(byte[] buffer, int bytesRead)
    {
        // Do the processing here
        string utfString = Encoding.UTF8.GetString(buffer, 0, bytesRead);
        Console.Write(utfString);
    }