要读取一个文件,哪个会更快?使用 fstream、FILE* 还是内存映射?
To read a file,which one will be faster?using fstream,FILE* or memory mapping?
这里是我的项目配置:vs2013,win32,Debug。
我想知道对于不同的文件大小,3种文件读取方法中的哪一种faster.They是c++风格的fstream,c风格的文件读写和内存映射。
但是执行后,这是我的结果:
File Size 1225284
fstream time 47
c file pointer time 0
memory mapping time 0
File Size 14856192
fstream time 15
c file pointer time 0
memory mapping time 47
File Size 97198080
fstream time 16
c file pointer time 0
memory mapping time 265
File Size 1259530844
fstream time 31
c file pointer time 16
memory mapping time 11138
似乎对于stream和FILE*的读取,读取一个文件所需的时间不会随着文件大小increases.But的内存映射而增加,这种true.This现象很奇怪。
因为在我看来,对于大文件,内存映射会更快。
这是我的代码:
string ifile = "M:/Thesis/FileReadCmp/1.txt";
string os = "M:/Thesis/FileReadCmp/new_cmp1.txt";
int page_size = 2 * 1024 * 64 * 1024;//128M
for (int j = 0; j < 100; ++j){
os[os.size() - 5] = '1' + j;
ofstream o(os);
for (int i = 0; i < 4; ++i){
ifile[ifile.size() - 5] = '1' + i;
ifstream in(ifile);
in.seekg(0, ios::end);
o << "File Size " << in.tellg() << endl;
o << endl;
in.close();
//using fstream to read file
long long st = GetTickCount();
in.open(ifile);
char c;
while (in >> c){
;
}
in.close();
long long et = GetTickCount();
o << "fstream time " << et - st << endl;
//using FILE* to read file
st = GetTickCount();
FILE* cpf = fopen(ifile.c_str(), "r");
char cc = fgetc(cpf);
while (cc != EOF)
{
cc = fgetc(cpf);
}
fclose(cpf);
et = GetTickCount();
o << "c file pointer time " << et - st << endl;
//using memory mapping to read file
const char* pc = ifile.c_str();
st = GetTickCount();
HANDLE hFile = CreateFile(pc, GENERIC_WRITE | GENERIC_READ, 0,
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
int file_size = GetFileSize(hFile, NULL);
HANDLE hFileMap = OpenFileMapping(FILE_MAP_READ | FILE_MAP_WRITE, FALSE,
TEXT("SharedData"));
if (hFileMap == NULL){
// if no such object,create a file mapping object
hFileMap = CreateFileMapping(hFile, NULL, PAGE_READWRITE,
0, 0, TEXT("SharedData"));
}
int rem_file_size = file_size;
int offset = 0;
while (rem_file_size > page_size){
PVOID pvFileView = MapViewOfFile(hFileMap, FILE_MAP_WRITE, 0, offset, page_size);
char* asc_dex = (char*)pvFileView;
for (int i = 0; i < page_size; ++i){//, c = 0++c
char c = asc_dex[i];
}
//UnmapViewOfFile(pvFileView);
offset += page_size;
rem_file_size -= page_size;
}
PVOID pvFileView = MapViewOfFile(hFileMap, FILE_MAP_WRITE, 0, offset, rem_file_size);
char* asc_dex = (char*)pvFileView;
for (int i = 0; i < rem_file_size; ++i){//, c = 0++c
char c = asc_dex[i];
}
UnmapViewOfFile(pvFileView);
CloseHandle(hFileMap);
CloseHandle(hFile);
et = GetTickCount();
o << "memory mapping time " << et - st << endl;
o << endl;
}
}
最有效的方法是保持输入流流畅。通常,这意味着读取大块数据(每个请求的数据更多)。
一种方法是使用多个缓冲区(在互联网上搜索 "double buffering")和多个线程。
读取线程将读取并填满一个缓冲区,然后发出数据准备就绪的信号。读取线程继续填充下一个缓冲区。
处理线程(等待读取信号)唤醒并开始处理缓冲区,然后输出数据。
一个objective是使用尽可能多的大缓冲区来调整读取数据的速度与处理速度。同样,这个想法是数据读取在处理方面是连续的。
可能是打开的问题mode.The内存映射的默认打开方式是二进制。
当我把in.open(ifile);
改成in.open(ifile, ios::binary);
;FILE* cpf = fopen(ifile.c_str(), "r");
改成FILE* cpf = fopen(ifile.c_str(), "rb");
3种方法真正的运行次变成按照theory.They如下:
File Size 1225284
fstream time 297
c file pointer time 93
memory mapping time 0
File Size 14856192
fstream time 3167
c file pointer time 1045
memory mapping time 47
File Size 97198080
fstream time 20779
c file pointer time 6833
memory mapping time 281
File Size 1259530844
fstream time 268696
c file pointer time 88406
memory mapping time 11216
这里是我的项目配置:vs2013,win32,Debug。
我想知道对于不同的文件大小,3种文件读取方法中的哪一种faster.They是c++风格的fstream,c风格的文件读写和内存映射。
但是执行后,这是我的结果:
File Size 1225284
fstream time 47
c file pointer time 0
memory mapping time 0File Size 14856192
fstream time 15
c file pointer time 0
memory mapping time 47File Size 97198080
fstream time 16
c file pointer time 0
memory mapping time 265File Size 1259530844
fstream time 31
c file pointer time 16
memory mapping time 11138
似乎对于stream和FILE*的读取,读取一个文件所需的时间不会随着文件大小increases.But的内存映射而增加,这种true.This现象很奇怪。
因为在我看来,对于大文件,内存映射会更快。
这是我的代码:
string ifile = "M:/Thesis/FileReadCmp/1.txt";
string os = "M:/Thesis/FileReadCmp/new_cmp1.txt";
int page_size = 2 * 1024 * 64 * 1024;//128M
for (int j = 0; j < 100; ++j){
os[os.size() - 5] = '1' + j;
ofstream o(os);
for (int i = 0; i < 4; ++i){
ifile[ifile.size() - 5] = '1' + i;
ifstream in(ifile);
in.seekg(0, ios::end);
o << "File Size " << in.tellg() << endl;
o << endl;
in.close();
//using fstream to read file
long long st = GetTickCount();
in.open(ifile);
char c;
while (in >> c){
;
}
in.close();
long long et = GetTickCount();
o << "fstream time " << et - st << endl;
//using FILE* to read file
st = GetTickCount();
FILE* cpf = fopen(ifile.c_str(), "r");
char cc = fgetc(cpf);
while (cc != EOF)
{
cc = fgetc(cpf);
}
fclose(cpf);
et = GetTickCount();
o << "c file pointer time " << et - st << endl;
//using memory mapping to read file
const char* pc = ifile.c_str();
st = GetTickCount();
HANDLE hFile = CreateFile(pc, GENERIC_WRITE | GENERIC_READ, 0,
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
int file_size = GetFileSize(hFile, NULL);
HANDLE hFileMap = OpenFileMapping(FILE_MAP_READ | FILE_MAP_WRITE, FALSE,
TEXT("SharedData"));
if (hFileMap == NULL){
// if no such object,create a file mapping object
hFileMap = CreateFileMapping(hFile, NULL, PAGE_READWRITE,
0, 0, TEXT("SharedData"));
}
int rem_file_size = file_size;
int offset = 0;
while (rem_file_size > page_size){
PVOID pvFileView = MapViewOfFile(hFileMap, FILE_MAP_WRITE, 0, offset, page_size);
char* asc_dex = (char*)pvFileView;
for (int i = 0; i < page_size; ++i){//, c = 0++c
char c = asc_dex[i];
}
//UnmapViewOfFile(pvFileView);
offset += page_size;
rem_file_size -= page_size;
}
PVOID pvFileView = MapViewOfFile(hFileMap, FILE_MAP_WRITE, 0, offset, rem_file_size);
char* asc_dex = (char*)pvFileView;
for (int i = 0; i < rem_file_size; ++i){//, c = 0++c
char c = asc_dex[i];
}
UnmapViewOfFile(pvFileView);
CloseHandle(hFileMap);
CloseHandle(hFile);
et = GetTickCount();
o << "memory mapping time " << et - st << endl;
o << endl;
}
}
最有效的方法是保持输入流流畅。通常,这意味着读取大块数据(每个请求的数据更多)。
一种方法是使用多个缓冲区(在互联网上搜索 "double buffering")和多个线程。
读取线程将读取并填满一个缓冲区,然后发出数据准备就绪的信号。读取线程继续填充下一个缓冲区。
处理线程(等待读取信号)唤醒并开始处理缓冲区,然后输出数据。
一个objective是使用尽可能多的大缓冲区来调整读取数据的速度与处理速度。同样,这个想法是数据读取在处理方面是连续的。
可能是打开的问题mode.The内存映射的默认打开方式是二进制。
当我把in.open(ifile);
改成in.open(ifile, ios::binary);
;FILE* cpf = fopen(ifile.c_str(), "r");
改成FILE* cpf = fopen(ifile.c_str(), "rb");
3种方法真正的运行次变成按照theory.They如下:
File Size 1225284
fstream time 297
c file pointer time 93
memory mapping time 0File Size 14856192
fstream time 3167
c file pointer time 1045
memory mapping time 47File Size 97198080
fstream time 20779
c file pointer time 6833
memory mapping time 281File Size 1259530844
fstream time 268696
c file pointer time 88406
memory mapping time 11216