如何确定文件(所有内容)的大小以便我可以立即为其分配内存?
How to determine the size of (all the content) a file so I can allocate memory for it at once?
我正在尝试为包含单词(分隔符:\n)的文件内容分配内存。
如何替换 16000 以使其可用于更大的文件?
我的代码:
typedef struct node {
bool is_word;
struct node* children[27];
} node;
node* root;
bool load(const char* dictionary)
{
FILE *fp;
fp = fopen(dictionary, "rb");
node* node_bucket = calloc(16000, sizeof(node));
node* next_free_node = node_bucket;
// compute...
// to later free the memory with another function
root = node_bucket;
}
谢谢
您可以在不知道文件有多大的情况下动态分配内存。我使用的块大小是 2 的幂,通常对块 I/O 更友好。当最后一个块只被部分使用时会浪费一点,但这里有一个例子,你可以调整它来使用你的节点结构:
#include <stdio.h>
#include <stdlib.h>
#define BLOCKSIZE 16384
int main(void) {
unsigned char *buf = NULL;
unsigned char *tmp = NULL;
size_t totalread = 0;
size_t currentsize = 0;
size_t currentread = 0;
FILE *fp;
if((fp = fopen("test.txt", "rb")) == NULL)
exit(1);
do {
currentsize += BLOCKSIZE;
if((tmp = realloc(buf, currentsize)) == NULL)
exit(1);
buf = tmp;
currentread = fread( &buf[totalread], 1, BLOCKSIZE, fp);
totalread += currentread;
} while (currentread == BLOCKSIZE);
printf("Total size was %zu\n", totalread);
free(buf);
fclose(fp);
return 0;
}
获取文件大小的最简单方法是使用 ftell()
:
fseek(fp, 0, SEEK_END); // non-portable
long size = ftell(fp);
不过,正如评论所说,这个是不可移植的,因为N1570的文档在“7.21.9.2的seek功能”中:
2 ...... A binary stream need not meaningfully support fseek calls with a
whence value of SEEK_END.
或者,您可以自己编写一个函数来获取文件的大小:
size_t fSize(FILE *fp)
{
void *ptr = malloc(1);
size_t size = 0;
while(fread(ptr, 1, 1, fp) == 1)
size++;
if(feof(fp))
return size;
else
return 0; // reading error
}
准确性与效率的权衡:
size_t fRoughSize(FILE *fp)
{
void *ptr = malloc(1024);
size_t size = 0;
while(fread(ptr, 1024, 1, fp) == 1024)
size += 1024;
if(feof(fp))
return size;
else
return 0; // reading error
}
我正在尝试为包含单词(分隔符:\n)的文件内容分配内存。
如何替换 16000 以使其可用于更大的文件?
我的代码:
typedef struct node {
bool is_word;
struct node* children[27];
} node;
node* root;
bool load(const char* dictionary)
{
FILE *fp;
fp = fopen(dictionary, "rb");
node* node_bucket = calloc(16000, sizeof(node));
node* next_free_node = node_bucket;
// compute...
// to later free the memory with another function
root = node_bucket;
}
谢谢
您可以在不知道文件有多大的情况下动态分配内存。我使用的块大小是 2 的幂,通常对块 I/O 更友好。当最后一个块只被部分使用时会浪费一点,但这里有一个例子,你可以调整它来使用你的节点结构:
#include <stdio.h>
#include <stdlib.h>
#define BLOCKSIZE 16384
int main(void) {
unsigned char *buf = NULL;
unsigned char *tmp = NULL;
size_t totalread = 0;
size_t currentsize = 0;
size_t currentread = 0;
FILE *fp;
if((fp = fopen("test.txt", "rb")) == NULL)
exit(1);
do {
currentsize += BLOCKSIZE;
if((tmp = realloc(buf, currentsize)) == NULL)
exit(1);
buf = tmp;
currentread = fread( &buf[totalread], 1, BLOCKSIZE, fp);
totalread += currentread;
} while (currentread == BLOCKSIZE);
printf("Total size was %zu\n", totalread);
free(buf);
fclose(fp);
return 0;
}
获取文件大小的最简单方法是使用 ftell()
:
fseek(fp, 0, SEEK_END); // non-portable
long size = ftell(fp);
不过,正如评论所说,这个是不可移植的,因为N1570的文档在“7.21.9.2的seek功能”中:
2 ...... A binary stream need not meaningfully support fseek calls with a whence value of SEEK_END.
或者,您可以自己编写一个函数来获取文件的大小:
size_t fSize(FILE *fp)
{
void *ptr = malloc(1);
size_t size = 0;
while(fread(ptr, 1, 1, fp) == 1)
size++;
if(feof(fp))
return size;
else
return 0; // reading error
}
准确性与效率的权衡:
size_t fRoughSize(FILE *fp)
{
void *ptr = malloc(1024);
size_t size = 0;
while(fread(ptr, 1024, 1, fp) == 1024)
size += 1024;
if(feof(fp))
return size;
else
return 0; // reading error
}