如何读取文本文件中的逗号分隔行并将其字段插入到结构指针数组中?
How do I read a comma separated line in a text file and insert its fields into an array of struct pointers?
我已经尝试弄清楚这个问题有一段时间了,我觉得我必须接近了。基本上,我有一个数据文件,其中包含由换行符分隔的各个国家/地区的记录。每条记录都包含逗号分隔的字段,我正在尝试提取其中的某些字段。
例如(单行):
60,AFG,Afghanistan,Asia,Southern and Central Asia,652090,1919,22720000,45.9,5976,Afganistan/Afqanestan,Islamic Emirate,Mohammad Omar,1,AF
这些行中的每一行都将构成一个结构。本质上,我想读取每一行并将其插入到一个结构指针数组中(如此动态地)。我也只想要特定的领域。当我 "tokenize" 行时,我想要代码、姓名、人口和预期寿命的字段。分别是:
AFG, 阿富汗, 22720000, 45.
我的想法是使用 fgets() 读取文件中的每一行,并在循环 malloc() 中为指针分配一些内存,标记我想要的字段,然后插入。但是,我正在做的事情一定是错误的,因为各种测试似乎没有在我的输出中显示任何内容。
这是我到目前为止的工作。我将不胜感激任何帮助。
#include "allheaders.h" // contains all common headers for personal use
#define BUF_SIZE 512
#define NUM_RECS 238
typedef struct {
char code[4];
char name[40];
int population;
float lifeExpectancy;
} Country;
typedef Country *countryPtr;
int main( int argc, const char* argv[] ) {
/* Opening the file */
FILE *filePtr; // pointer to file
if ((filePtr = fopen("AllCountries.dat", "r")) == NULL) { // if couldn't open file
printf("Error opening file\n"); // error message
exit(1);
}
/* Reading the file */
char buffer[BUF_SIZE]; // buffer to read
int index = 0;
char *token;
countryPtr *myCountries = malloc(sizeof(*myCountries) * NUM_RECS);
for(int i = 0; i < NUM_RECS; ++i) {
myCountries[i] = malloc(sizeof(*myCountries[i]));
}
while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {
token = strtok(buffer,",");
token = strtok(NULL, ",");
strcpy(myCountries[index]->code, token);
token = strtok(NULL, ",");
strcpy(myCountries[index]->name, token);
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
myCountries[index]->population = atoi(token);
token = strtok(NULL, ",");
myCountries[index]->lifeExpectancy = atof(token);
//printf("%s", buffer);
index++;
}
printf("%s", myCountries[1]->code); // test?
free(myCountries);
}
看看下面的内容。
首先,您需要做一些工作来改善标记为 NYI
的区域
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#define BUF_SIZE 512
#define NUM_RECS 238
typedef struct {
char code[4]; // NYI - magic numbers
char name[41]; // NYI - magic numbers
int population; // NYI - what if atoi fails?
float lifeExpectancy; // NYI - what if atof fails?
} Country;
typedef Country* countryPtr;
int main( int argc, const char* argv[] ) {
/* Opening the file */
FILE *filePtr; // pointer to file
if ((filePtr = fopen("a.txt", "r")) == NULL) { // if couldn't open file
printf("Error opening file\n"); // error message
exit(1);
}
/* Reading the file */
char buffer[BUF_SIZE]; // buffer to read
int index=0;
char *token; // NYI - initial value
countryPtr* myCountries = calloc(NUM_RECS, sizeof(countryPtr));
for(int i = 0; i < NUM_RECS; ++i) {
myCountries[i] = calloc(1, sizeof(Country));
}
while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {
// NYI - magic lengths / overflow strcpy targets
token = strtok(buffer,","); // NYI - This is probably not the best way to do this. At least fold into a loop.
token = strtok(NULL, ",");
strcpy(myCountries[index]->code, token);
token = strtok(NULL, ",");
strcpy(myCountries[index]->name, token);
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
myCountries[index]->population = atoi(token); // NYI - atoi failure
token = strtok(NULL, ",");
myCountries[index]->lifeExpectancy = atof(token); // NYI - atof failure
printf("%s", buffer);
index++;
}
printf("%s\n", myCountries[0]->code); // test? NYI - need more proof
free(myCountries); // NYI - this is a sequence - need to free each of the new elements
}
我根据你的代码和数据文件采取了不同的方法来解决它。我测试了它。它适用于您显示的记录类型的文件。希望它能解释一些事情并使您的工作更轻松,并为您提供一个良好的工作场所。
我不喜欢按照一般原则 pre-count(耗时)或 pre-know 文件中记录数的方式编写程序,除非在极少数情况下。因此,在读取文件时,我更喜欢边读边分配内存。现在如果有一个大文件和大量数据,那么你必须想出一个比将它们全部保存在内存中更好的内存管理方案。在某些时候,您最好使用某种固定的数据库解决方案。 MySQL、API、库、解析器等...但这应该适用于小文件。
通常在UNIX的C中,exit(0)
表示成功,exit(-1)
表示失败。此外,由于您的国家/地区代码是 3 个字符,因此保留它的字段必须至少有 4 个字符用于尾随 '\0'
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <strings.h>
#define MAXRECL 512
#define MAXFIELDS 100
#define MAXFIELDL 80
// Field indicies
#define COUNTRY_CODE 1
#define COUNTRY_NAME 2
#define POPULATION 7
#define LIFE_EXPECTANCY 8
#define CCMAX 3
#define CNMAX 40
typedef struct Country {
struct Country *next;
char code[CCMAX + 1]; // (Need room for trailing '[=10=]')
char name[CNMAX + 1]; // (Need room for trailing '[=10=]')
int population;
float lifeExpectancy;
} country_t;
country_t *countryRecords;
int main( int argc, const char* argv[] ) {
FILE *fp;
if ((fp = fopen("AllCountries.dat", "r")) == NULL) {
printf("Error opening file\n");
exit(-1);
}
int totalCountries = 0;
char buf[MAXRECL];
char fields[MAXFIELDS][MAXFIELDL];
country_t *prev_country = NULL;
while (fgets(buf, MAXRECL, fp) != NULL) {
++totalCountries;
country_t *country = calloc(sizeof(struct Country), 1);
if (country == NULL) {
fprintf(stderr, "Out of memory\n");
exit(-1);
}
char *field = strtok(buf, ",");
int i = 0;
while(field != NULL) {
strncpy(fields[i++], field, MAXFIELDL);
field = strtok(NULL, ",");
}
strcpy(country->code, fields[COUNTRY_CODE]);
strcpy(country->name, fields[COUNTRY_NAME]);
country->population = atoi(fields[POPULATION]);
country->lifeExpectancy = atof(fields[LIFE_EXPECTANCY]);
if (countryRecords == NULL)
countryRecords = country;
else
prev_country->next = country;
prev_country = country;
}
printf("Total countries: %d\n", totalCountries);
country_t *country = countryRecords;
while(country != NULL) {
printf("%3s %30s Population: %7d Life Expectancy: %5.2f\n",
country->code, country->name, country->population, country->lifeExpectancy);
country_t *prev_country = country;
country = country->next;
free(prev_country);
}
printf("Done\n");
exit(0);
}
我已经尝试弄清楚这个问题有一段时间了,我觉得我必须接近了。基本上,我有一个数据文件,其中包含由换行符分隔的各个国家/地区的记录。每条记录都包含逗号分隔的字段,我正在尝试提取其中的某些字段。
例如(单行):
60,AFG,Afghanistan,Asia,Southern and Central Asia,652090,1919,22720000,45.9,5976,Afganistan/Afqanestan,Islamic Emirate,Mohammad Omar,1,AF
这些行中的每一行都将构成一个结构。本质上,我想读取每一行并将其插入到一个结构指针数组中(如此动态地)。我也只想要特定的领域。当我 "tokenize" 行时,我想要代码、姓名、人口和预期寿命的字段。分别是:
AFG, 阿富汗, 22720000, 45.
我的想法是使用 fgets() 读取文件中的每一行,并在循环 malloc() 中为指针分配一些内存,标记我想要的字段,然后插入。但是,我正在做的事情一定是错误的,因为各种测试似乎没有在我的输出中显示任何内容。
这是我到目前为止的工作。我将不胜感激任何帮助。
#include "allheaders.h" // contains all common headers for personal use
#define BUF_SIZE 512
#define NUM_RECS 238
typedef struct {
char code[4];
char name[40];
int population;
float lifeExpectancy;
} Country;
typedef Country *countryPtr;
int main( int argc, const char* argv[] ) {
/* Opening the file */
FILE *filePtr; // pointer to file
if ((filePtr = fopen("AllCountries.dat", "r")) == NULL) { // if couldn't open file
printf("Error opening file\n"); // error message
exit(1);
}
/* Reading the file */
char buffer[BUF_SIZE]; // buffer to read
int index = 0;
char *token;
countryPtr *myCountries = malloc(sizeof(*myCountries) * NUM_RECS);
for(int i = 0; i < NUM_RECS; ++i) {
myCountries[i] = malloc(sizeof(*myCountries[i]));
}
while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {
token = strtok(buffer,",");
token = strtok(NULL, ",");
strcpy(myCountries[index]->code, token);
token = strtok(NULL, ",");
strcpy(myCountries[index]->name, token);
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
myCountries[index]->population = atoi(token);
token = strtok(NULL, ",");
myCountries[index]->lifeExpectancy = atof(token);
//printf("%s", buffer);
index++;
}
printf("%s", myCountries[1]->code); // test?
free(myCountries);
}
看看下面的内容。 首先,您需要做一些工作来改善标记为 NYI
的区域#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#define BUF_SIZE 512
#define NUM_RECS 238
typedef struct {
char code[4]; // NYI - magic numbers
char name[41]; // NYI - magic numbers
int population; // NYI - what if atoi fails?
float lifeExpectancy; // NYI - what if atof fails?
} Country;
typedef Country* countryPtr;
int main( int argc, const char* argv[] ) {
/* Opening the file */
FILE *filePtr; // pointer to file
if ((filePtr = fopen("a.txt", "r")) == NULL) { // if couldn't open file
printf("Error opening file\n"); // error message
exit(1);
}
/* Reading the file */
char buffer[BUF_SIZE]; // buffer to read
int index=0;
char *token; // NYI - initial value
countryPtr* myCountries = calloc(NUM_RECS, sizeof(countryPtr));
for(int i = 0; i < NUM_RECS; ++i) {
myCountries[i] = calloc(1, sizeof(Country));
}
while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {
// NYI - magic lengths / overflow strcpy targets
token = strtok(buffer,","); // NYI - This is probably not the best way to do this. At least fold into a loop.
token = strtok(NULL, ",");
strcpy(myCountries[index]->code, token);
token = strtok(NULL, ",");
strcpy(myCountries[index]->name, token);
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
token = strtok(NULL, ",");
myCountries[index]->population = atoi(token); // NYI - atoi failure
token = strtok(NULL, ",");
myCountries[index]->lifeExpectancy = atof(token); // NYI - atof failure
printf("%s", buffer);
index++;
}
printf("%s\n", myCountries[0]->code); // test? NYI - need more proof
free(myCountries); // NYI - this is a sequence - need to free each of the new elements
}
我根据你的代码和数据文件采取了不同的方法来解决它。我测试了它。它适用于您显示的记录类型的文件。希望它能解释一些事情并使您的工作更轻松,并为您提供一个良好的工作场所。
我不喜欢按照一般原则 pre-count(耗时)或 pre-know 文件中记录数的方式编写程序,除非在极少数情况下。因此,在读取文件时,我更喜欢边读边分配内存。现在如果有一个大文件和大量数据,那么你必须想出一个比将它们全部保存在内存中更好的内存管理方案。在某些时候,您最好使用某种固定的数据库解决方案。 MySQL、API、库、解析器等...但这应该适用于小文件。
通常在UNIX的C中,exit(0)
表示成功,exit(-1)
表示失败。此外,由于您的国家/地区代码是 3 个字符,因此保留它的字段必须至少有 4 个字符用于尾随 '\0'
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <strings.h>
#define MAXRECL 512
#define MAXFIELDS 100
#define MAXFIELDL 80
// Field indicies
#define COUNTRY_CODE 1
#define COUNTRY_NAME 2
#define POPULATION 7
#define LIFE_EXPECTANCY 8
#define CCMAX 3
#define CNMAX 40
typedef struct Country {
struct Country *next;
char code[CCMAX + 1]; // (Need room for trailing '[=10=]')
char name[CNMAX + 1]; // (Need room for trailing '[=10=]')
int population;
float lifeExpectancy;
} country_t;
country_t *countryRecords;
int main( int argc, const char* argv[] ) {
FILE *fp;
if ((fp = fopen("AllCountries.dat", "r")) == NULL) {
printf("Error opening file\n");
exit(-1);
}
int totalCountries = 0;
char buf[MAXRECL];
char fields[MAXFIELDS][MAXFIELDL];
country_t *prev_country = NULL;
while (fgets(buf, MAXRECL, fp) != NULL) {
++totalCountries;
country_t *country = calloc(sizeof(struct Country), 1);
if (country == NULL) {
fprintf(stderr, "Out of memory\n");
exit(-1);
}
char *field = strtok(buf, ",");
int i = 0;
while(field != NULL) {
strncpy(fields[i++], field, MAXFIELDL);
field = strtok(NULL, ",");
}
strcpy(country->code, fields[COUNTRY_CODE]);
strcpy(country->name, fields[COUNTRY_NAME]);
country->population = atoi(fields[POPULATION]);
country->lifeExpectancy = atof(fields[LIFE_EXPECTANCY]);
if (countryRecords == NULL)
countryRecords = country;
else
prev_country->next = country;
prev_country = country;
}
printf("Total countries: %d\n", totalCountries);
country_t *country = countryRecords;
while(country != NULL) {
printf("%3s %30s Population: %7d Life Expectancy: %5.2f\n",
country->code, country->name, country->population, country->lifeExpectancy);
country_t *prev_country = country;
country = country->next;
free(prev_country);
}
printf("Done\n");
exit(0);
}