独立于平台获取文件大小 (C)
Getting file size platform-independently (C)
我正在寻找一些独立于平台的 C 代码来确定给定文件的大小。首先,我阅读了以下答案:
答案将 fseek 与 SEEK_END 和 ftell 结合使用。现在,我的问题是我找到了以下 C 标准引号。
Setting the file position indicator to end-of-file, as with fseek(file, 0, SEEK_END), has undefined behavior for a binary stream (because of possible trailing null characters) or for any stream with state-dependent encoding that does not assuredly end in the initial shift state.
和
A binary stream need not meaningfully support fseek calls with a whence value of SEEK_END.
看来我有问题了。可能,以下计算读取字节数的代码是一种解决方法。
file = fopen(file_path, "rb");
/* ... */
while ( EOF != fgetc(file) ) {
ret = size_t_inc(&file_size_); /* essentially, this does ++file_size_ */
/* ... */
}
ret = feof(file);
/* ... */
if (!ret) {
return 1; /* Error! */
}
(这里的整个函数:https://github.com/630R6/bytelev/blob/8e3d0dd14042f16086f3ca4e9a33d49a0629630e/main.c#L138)
不过,我正在寻找更好的解决方案。
非常感谢您的宝贵时间!
首先,让我指出,如前所述,这个问题有点像傻瓜的差事:在有文件等的平台上,有明确定义的方式以最有效的方式访问此信息.鉴于通常必须满足的环境数量有限,将功能抽象为简单的平台特定功能是可行的方法。
所以,基本上,只要 "file" 的大小有限,读取整个文件并沿途计算字节数是唯一通用的方法。
可以稍微调整一下边缘(比如,对于一个 10GB 的文件,我们真的需要 100 亿次 fgetc 调用吗?)
例如,下面我使用 fread 以最大 64K 的块读取文件:
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
static const size_t SIZE_T_MAX = (size_t) -1;
static const size_t CHUNK_SIZE = 64 * 1024;
int
calc_file_size_slowly(const char *const filename, size_t *size) {
FILE *fp;
int ret = -1;
size_t bytes_read = 0;
unsigned char *buffer = malloc(CHUNK_SIZE);
assert(buffer);
errno = 0;
fp = fopen(filename, "rb");
if (!fp) {
goto FAIL_FOPEN;
}
errno = 0;
*size = 0;
while ((bytes_read = fread(buffer, 1, CHUNK_SIZE, fp)) > 0) {
if (ferror(fp)) {
goto FAIL_FERROR;
}
if ((*size + bytes_read) > *size) {
(*size) += bytes_read;
}
else {
goto FAIL_OVERFLOW;
}
errno = 0;
}
if (feof(fp)) {
ret = 0;
goto DONE;
}
FAIL_FOPEN:
{
ret = errno;
goto DONE;
}
FAIL_FERROR:
{
ret = errno;
fclose(fp);
goto DONE;
}
FAIL_OVERFLOW:
{
ret = EOVERFLOW;
fclose(fp);
goto DONE;
}
DONE:
free(buffer);
return ret;
}
int main(int argc, char *argv[]) {
int i;
for (i = 1; i < argc; i += 1) {
size_t size;
if (calc_file_size_slowly(argv[i], &size) == 0) {
printf("%s: %lu bytes\n", argv[i], size);
}
}
}
输出:
C:\...\Temp> dir wpi.msi
...
2015-05-15 12:12 PM 1,859,584 wpi.msi
...
C:\...\Temp> mysizer wpi.msi
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=65536
Bytes read=24576
wpi.msi: 1859584 bytes
我正在寻找一些独立于平台的 C 代码来确定给定文件的大小。首先,我阅读了以下答案:
答案将 fseek 与 SEEK_END 和 ftell 结合使用。现在,我的问题是我找到了以下 C 标准引号。
Setting the file position indicator to end-of-file, as with fseek(file, 0, SEEK_END), has undefined behavior for a binary stream (because of possible trailing null characters) or for any stream with state-dependent encoding that does not assuredly end in the initial shift state.
和
A binary stream need not meaningfully support fseek calls with a whence value of SEEK_END.
看来我有问题了。可能,以下计算读取字节数的代码是一种解决方法。
file = fopen(file_path, "rb");
/* ... */
while ( EOF != fgetc(file) ) {
ret = size_t_inc(&file_size_); /* essentially, this does ++file_size_ */
/* ... */
}
ret = feof(file);
/* ... */
if (!ret) {
return 1; /* Error! */
}
(这里的整个函数:https://github.com/630R6/bytelev/blob/8e3d0dd14042f16086f3ca4e9a33d49a0629630e/main.c#L138)
不过,我正在寻找更好的解决方案。
非常感谢您的宝贵时间!
首先,让我指出,如前所述,这个问题有点像傻瓜的差事:在有文件等的平台上,有明确定义的方式以最有效的方式访问此信息.鉴于通常必须满足的环境数量有限,将功能抽象为简单的平台特定功能是可行的方法。
所以,基本上,只要 "file" 的大小有限,读取整个文件并沿途计算字节数是唯一通用的方法。
可以稍微调整一下边缘(比如,对于一个 10GB 的文件,我们真的需要 100 亿次 fgetc 调用吗?)
例如,下面我使用 fread 以最大 64K 的块读取文件:
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
static const size_t SIZE_T_MAX = (size_t) -1;
static const size_t CHUNK_SIZE = 64 * 1024;
int
calc_file_size_slowly(const char *const filename, size_t *size) {
FILE *fp;
int ret = -1;
size_t bytes_read = 0;
unsigned char *buffer = malloc(CHUNK_SIZE);
assert(buffer);
errno = 0;
fp = fopen(filename, "rb");
if (!fp) {
goto FAIL_FOPEN;
}
errno = 0;
*size = 0;
while ((bytes_read = fread(buffer, 1, CHUNK_SIZE, fp)) > 0) {
if (ferror(fp)) {
goto FAIL_FERROR;
}
if ((*size + bytes_read) > *size) {
(*size) += bytes_read;
}
else {
goto FAIL_OVERFLOW;
}
errno = 0;
}
if (feof(fp)) {
ret = 0;
goto DONE;
}
FAIL_FOPEN:
{
ret = errno;
goto DONE;
}
FAIL_FERROR:
{
ret = errno;
fclose(fp);
goto DONE;
}
FAIL_OVERFLOW:
{
ret = EOVERFLOW;
fclose(fp);
goto DONE;
}
DONE:
free(buffer);
return ret;
}
int main(int argc, char *argv[]) {
int i;
for (i = 1; i < argc; i += 1) {
size_t size;
if (calc_file_size_slowly(argv[i], &size) == 0) {
printf("%s: %lu bytes\n", argv[i], size);
}
}
}
输出:
C:\...\Temp> dir wpi.msi ... 2015-05-15 12:12 PM 1,859,584 wpi.msi ... C:\...\Temp> mysizer wpi.msi Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=65536 Bytes read=24576 wpi.msi: 1859584 bytes