使用 ReadFile 时,一半的读取缓冲区已损坏
Half of read buffer is corrupt when using ReadFile
与 ReadFile 一起使用的一半缓冲区已损坏。无论缓冲区的大小如何,其中一半具有相同的损坏字符。我已经在寻找任何可能导致读取提前停止的因素,等等。如果我增加缓冲区的大小,我会看到更多的文件,所以它不会在文件的特定部分失败。
Visual Studio2019.Windows10.
#define MAXBUFFERSIZE 1024
DWORD bufferSize = MAXBUFFERSIZE;
_int64 fileRemaining;
HANDLE hFile;
DWORD dwBytesRead = 0;
//OVERLAPPED ol = { 0 };
LARGE_INTEGER dwPosition;
TCHAR* buffer;
hFile = CreateFile(
inputFilePath, // file to open
GENERIC_READ, // open for reading
FILE_SHARE_READ, // share for reading
NULL, // default security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file | FILE_FLAG_OVERLAPPED
NULL); // no attr. template
if (hFile == INVALID_HANDLE_VALUE)
{
DisplayErrorBox((LPWSTR)L"CreateFile");
return 0;
}
LARGE_INTEGER size;
GetFileSizeEx(hFile, &size);
_int64 fileSize = (__int64)size.QuadPart;
double gigabytes = fileSize * 9.3132e-10;
sendToReportWindow(L"file size: %lld bytes \(%.1f gigabytes\)\n", fileSize, gigabytes);
if(fileSize > MAXBUFFERSIZE)
{
buffer = new TCHAR[MAXBUFFERSIZE];
}
else
{
buffer = new TCHAR[fileSize];
}
fileRemaining = fileSize;
sendToReportWindow(L"file remaining: %lld bytes\n", fileRemaining);
while (fileRemaining) // outer loop. while file remaining, read file chunk to buffer
{
sendToReportWindow(L"fileRemaining:%d\n", fileRemaining);
if (bufferSize > fileRemaining) // as fileremaining gets smaller as file is processed, it eventually is smaller than the buffer
bufferSize = fileRemaining;
if (FALSE == ReadFile(hFile, buffer, bufferSize, &dwBytesRead, NULL))
{
sendToReportWindow(L"file read failed\n");
CloseHandle(hFile);
return 0;
}
fileRemaining -= bufferSize;
// bunch of commented out code (verified that it does not cause the corruption)
}
delete [] buffer;
调试器html视图(512 字节缓冲区)
调试器html 视图(1024 字节缓冲区)。这表明该文件可能不是损坏的来源。
杂项说明:有人告诉我,文件的内存映射没有任何优势,因为我是按顺序处理文件的。这种方法的另一个优点是,当我在 WARC 文件中检测到特定的和重复出现的标签时,我可以向前跳过约 500 个字节并继续处理。这提高了速度。
原因是你使用了TCHAR
类型的缓冲数组,TCHAR
类型的大小是2个字节。所以调用ReadFile函数时设置的bufferSize实际上是每2个字节填充到buffer数组中的。
但是缓冲区的实际大小是 sizeof(TCHAR) * fileSize
,所以您看到的缓冲区数组的一半是“损坏的”
与 ReadFile 一起使用的一半缓冲区已损坏。无论缓冲区的大小如何,其中一半具有相同的损坏字符。我已经在寻找任何可能导致读取提前停止的因素,等等。如果我增加缓冲区的大小,我会看到更多的文件,所以它不会在文件的特定部分失败。
Visual Studio2019.Windows10.
#define MAXBUFFERSIZE 1024
DWORD bufferSize = MAXBUFFERSIZE;
_int64 fileRemaining;
HANDLE hFile;
DWORD dwBytesRead = 0;
//OVERLAPPED ol = { 0 };
LARGE_INTEGER dwPosition;
TCHAR* buffer;
hFile = CreateFile(
inputFilePath, // file to open
GENERIC_READ, // open for reading
FILE_SHARE_READ, // share for reading
NULL, // default security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file | FILE_FLAG_OVERLAPPED
NULL); // no attr. template
if (hFile == INVALID_HANDLE_VALUE)
{
DisplayErrorBox((LPWSTR)L"CreateFile");
return 0;
}
LARGE_INTEGER size;
GetFileSizeEx(hFile, &size);
_int64 fileSize = (__int64)size.QuadPart;
double gigabytes = fileSize * 9.3132e-10;
sendToReportWindow(L"file size: %lld bytes \(%.1f gigabytes\)\n", fileSize, gigabytes);
if(fileSize > MAXBUFFERSIZE)
{
buffer = new TCHAR[MAXBUFFERSIZE];
}
else
{
buffer = new TCHAR[fileSize];
}
fileRemaining = fileSize;
sendToReportWindow(L"file remaining: %lld bytes\n", fileRemaining);
while (fileRemaining) // outer loop. while file remaining, read file chunk to buffer
{
sendToReportWindow(L"fileRemaining:%d\n", fileRemaining);
if (bufferSize > fileRemaining) // as fileremaining gets smaller as file is processed, it eventually is smaller than the buffer
bufferSize = fileRemaining;
if (FALSE == ReadFile(hFile, buffer, bufferSize, &dwBytesRead, NULL))
{
sendToReportWindow(L"file read failed\n");
CloseHandle(hFile);
return 0;
}
fileRemaining -= bufferSize;
// bunch of commented out code (verified that it does not cause the corruption)
}
delete [] buffer;
调试器html视图(512 字节缓冲区)
调试器html 视图(1024 字节缓冲区)。这表明该文件可能不是损坏的来源。
杂项说明:有人告诉我,文件的内存映射没有任何优势,因为我是按顺序处理文件的。这种方法的另一个优点是,当我在 WARC 文件中检测到特定的和重复出现的标签时,我可以向前跳过约 500 个字节并继续处理。这提高了速度。
原因是你使用了TCHAR
类型的缓冲数组,TCHAR
类型的大小是2个字节。所以调用ReadFile函数时设置的bufferSize实际上是每2个字节填充到buffer数组中的。
但是缓冲区的实际大小是 sizeof(TCHAR) * fileSize
,所以您看到的缓冲区数组的一半是“损坏的”