从一个文件中过滤数据并将该数据写入新文件
Filtering data from one file and writing that data to a new file
我正在尝试将文件(customers.txt)复制到新文件(filtered_customers.txt) 过滤数据。现在我的程序只读取 "customers.txt" 文件中的所有内容并将其复制到新文件中。我正在尝试更改此设置,以便只有在第一列 AND 最后一列中为 1 的条目才会被复制到新文件中。
我正在研究使用 strcmp()
函数将第一列中的值与字符串“Dublin”进行比较,并使用此方法将最后一列中的所有值与“1”进行比较,但我我以前从未真正做过任何文件处理,所以我完全不知道从哪里开始。非常感谢任何有关如何处理此问题的建议,谢谢
代码 - 将一个文件中的所有条目复制到另一个文件中
#include <stdio.h>
#include <stdlib.h> // For exit()
#include <conio.h>
int main()
{
FILE *fptr1;
FILE *fptr2;
char c;
char* filename = "C:\Users\uzair\Desktop\Comp-Programming 2\customers.txt";
char* filename2 = "C:\Users\uzair\Desktop\Comp-Programming 2\filtered_customers.txt";
// Open customers.txt file for reading
fptr1 = fopen(filename, "r");
if (fptr1 == NULL)
{
printf("Cannot open file %s \n", filename);
exit(0);
}
// Open file for writing
fptr2 = fopen(filename2, "w");
if (fptr2 == NULL)
{
printf("Cannot open file %s \n", filename2);
exit(0);
}
// Read contents from file
c = fgetc(fptr1);
while (c != EOF)
{
fputc(c, fptr2);
c = fgetc(fptr1);
}
printf("\nContents Copied From ---\n%s\nTo\n%s\nSuccessfully :)\n", filename, filename2);
fclose(fptr1);
fclose(fptr2);
return 0;
}
customers.txt
Dublin Dunne 865463389 21 1
Dublin Milford 865438990 22 0
Dublin Mowlds 876765443 24 1
Dublin Wang 873456789 22 1
Dublin Smith 875432234 25 1
Dublin Henry 876654429 22 0
Dublin Gupta 896765443 24 1
Wicklow Monahan 865432245 22 1
Wicklow Brunsdon 865678894 22 1
Wexford Tyson 865434566 22 0
Wexford Browne 865564766 20 1
Dublin Dunne 890065443 27 1
Dublin Connolly 876733999 20 0
Louth Jennings 987645673 60 1
Mayo Wang 876232123 29 1
您只是逐个字符地复制文件。
输入是一系列具有单独列的行。
我们必须 accept/reject 一行基于与列的匹配。
所以,首先,我们想要一次 read/examine 整行。所以,我们想要 fgets
而不是 fgetc
然后,我们需要将行拆分成列。有多种方法可以做到这一点,但最简单的方法是制作该行的 copy。使用 strtok
.
拆分该副本
然后,第一列可以与 Dublin
进行比较,最后一列可以与 1
.
进行比较
如果两者匹配,我们可以用fputs
输出[原始]行
这是为执行此操作而重构的代码:
#include <stdio.h>
#include <stdlib.h> // For exit()
#include <string.h>
//#include <conio.h>
#define BUFMAX 1000
int
is_valid(const char *src)
{
char *cp;
char buf[BUFMAX];
int tokcnt = 0;
char *toklist[20];
int match;
// make a copy of the line (because strtok is destructive of the buffer)
strcpy(buf,src);
// split line into tokens/columns keeping track of the number of columns
while (1) {
cp = strtok((tokcnt == 0) ? buf : NULL," \t\n");
if (cp == NULL)
break;
toklist[tokcnt++] = cp;
}
// check for match
do {
// first column must be "Dublin"
match = (strcmp(toklist[0],"Dublin") == 0);
if (! match)
break;
// last column must be "1"
match = (strcmp(toklist[tokcnt - 1],"1") == 0);
if (! match)
break;
} while (0);
return match;
}
int
main()
{
FILE *finp;
FILE *fout;
char *filename = "customers.txt";
char *filename2 = "filtered_customers.txt";
// Open customers.txt file for reading
finp = fopen(filename, "r");
if (finp == NULL) {
printf("Cannot open file %s \n", filename);
exit(0);
}
// Open file for writing
fout = fopen(filename2, "w");
if (fout == NULL) {
printf("Cannot open file %s \n", filename2);
exit(0);
}
// Read contents from file
char buf[BUFMAX];
while (1) {
// read line
char *cp = fgets(buf,sizeof(buf),finp);
if (cp == NULL)
break;
// output the line if it matches our criteria
if (is_valid(buf))
fputs(buf,fout);
}
printf("\nContents Copied From ---\n%s\nTo\n%s\nSuccessfully :)\n",
filename, filename2);
fclose(finp);
fclose(fout);
return 0;
}
这里有一些想法。请注意,如果最后一列之前的白色 space 是制表符而不是 space,这将完全失败,并且它不会验证每行中是否有 4 列。毫无疑问,它遗漏了许多其他边缘情况。留给 reader 的练习:
/* A limited implementation of awk ' == "Dublin" && $NF == 1' */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
FILE * xfopen(const char *path, const char *mode);
int
main(int argc, char **argv)
{
FILE *fptr1 = xfopen( argc > 1 ? argv[1] : "-", "r");
FILE *fptr2 = xfopen( argc > 2 ? argv[2] : "-", "w");
char buf[256];
while( fgets(buf, sizeof buf, fptr1) != NULL ){
size_t len = strlen(buf);
if( buf[len - 1] != '\n' ){
fprintf(stderr, "input error: line too long\n");
exit(EXIT_FAILURE);
}
if( strncmp("Dublin", buf, 6) == 0 &&
strncmp(buf + len - 3, " 1\n", 3) == 0 ){
fwrite(buf, 1, len, fptr2);
}
}
fclose(fptr1);
fclose(fptr2);
return 0;
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = path[0] != '-' || path[1] != '[=10=]' ? fopen(path, mode) :
*mode == 'r' ? stdin : stdout;
if( fp == NULL ){
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}
我正在尝试将文件(customers.txt)复制到新文件(filtered_customers.txt) 过滤数据。现在我的程序只读取 "customers.txt" 文件中的所有内容并将其复制到新文件中。我正在尝试更改此设置,以便只有在第一列 AND 最后一列中为 1 的条目才会被复制到新文件中。
我正在研究使用 strcmp()
函数将第一列中的值与字符串“Dublin”进行比较,并使用此方法将最后一列中的所有值与“1”进行比较,但我我以前从未真正做过任何文件处理,所以我完全不知道从哪里开始。非常感谢任何有关如何处理此问题的建议,谢谢
代码 - 将一个文件中的所有条目复制到另一个文件中
#include <stdio.h>
#include <stdlib.h> // For exit()
#include <conio.h>
int main()
{
FILE *fptr1;
FILE *fptr2;
char c;
char* filename = "C:\Users\uzair\Desktop\Comp-Programming 2\customers.txt";
char* filename2 = "C:\Users\uzair\Desktop\Comp-Programming 2\filtered_customers.txt";
// Open customers.txt file for reading
fptr1 = fopen(filename, "r");
if (fptr1 == NULL)
{
printf("Cannot open file %s \n", filename);
exit(0);
}
// Open file for writing
fptr2 = fopen(filename2, "w");
if (fptr2 == NULL)
{
printf("Cannot open file %s \n", filename2);
exit(0);
}
// Read contents from file
c = fgetc(fptr1);
while (c != EOF)
{
fputc(c, fptr2);
c = fgetc(fptr1);
}
printf("\nContents Copied From ---\n%s\nTo\n%s\nSuccessfully :)\n", filename, filename2);
fclose(fptr1);
fclose(fptr2);
return 0;
}
customers.txt
Dublin Dunne 865463389 21 1
Dublin Milford 865438990 22 0
Dublin Mowlds 876765443 24 1
Dublin Wang 873456789 22 1
Dublin Smith 875432234 25 1
Dublin Henry 876654429 22 0
Dublin Gupta 896765443 24 1
Wicklow Monahan 865432245 22 1
Wicklow Brunsdon 865678894 22 1
Wexford Tyson 865434566 22 0
Wexford Browne 865564766 20 1
Dublin Dunne 890065443 27 1
Dublin Connolly 876733999 20 0
Louth Jennings 987645673 60 1
Mayo Wang 876232123 29 1
您只是逐个字符地复制文件。
输入是一系列具有单独列的行。
我们必须 accept/reject 一行基于与列的匹配。
所以,首先,我们想要一次 read/examine 整行。所以,我们想要 fgets
fgetc
然后,我们需要将行拆分成列。有多种方法可以做到这一点,但最简单的方法是制作该行的 copy。使用 strtok
.
然后,第一列可以与 Dublin
进行比较,最后一列可以与 1
.
如果两者匹配,我们可以用fputs
这是为执行此操作而重构的代码:
#include <stdio.h>
#include <stdlib.h> // For exit()
#include <string.h>
//#include <conio.h>
#define BUFMAX 1000
int
is_valid(const char *src)
{
char *cp;
char buf[BUFMAX];
int tokcnt = 0;
char *toklist[20];
int match;
// make a copy of the line (because strtok is destructive of the buffer)
strcpy(buf,src);
// split line into tokens/columns keeping track of the number of columns
while (1) {
cp = strtok((tokcnt == 0) ? buf : NULL," \t\n");
if (cp == NULL)
break;
toklist[tokcnt++] = cp;
}
// check for match
do {
// first column must be "Dublin"
match = (strcmp(toklist[0],"Dublin") == 0);
if (! match)
break;
// last column must be "1"
match = (strcmp(toklist[tokcnt - 1],"1") == 0);
if (! match)
break;
} while (0);
return match;
}
int
main()
{
FILE *finp;
FILE *fout;
char *filename = "customers.txt";
char *filename2 = "filtered_customers.txt";
// Open customers.txt file for reading
finp = fopen(filename, "r");
if (finp == NULL) {
printf("Cannot open file %s \n", filename);
exit(0);
}
// Open file for writing
fout = fopen(filename2, "w");
if (fout == NULL) {
printf("Cannot open file %s \n", filename2);
exit(0);
}
// Read contents from file
char buf[BUFMAX];
while (1) {
// read line
char *cp = fgets(buf,sizeof(buf),finp);
if (cp == NULL)
break;
// output the line if it matches our criteria
if (is_valid(buf))
fputs(buf,fout);
}
printf("\nContents Copied From ---\n%s\nTo\n%s\nSuccessfully :)\n",
filename, filename2);
fclose(finp);
fclose(fout);
return 0;
}
这里有一些想法。请注意,如果最后一列之前的白色 space 是制表符而不是 space,这将完全失败,并且它不会验证每行中是否有 4 列。毫无疑问,它遗漏了许多其他边缘情况。留给 reader 的练习:
/* A limited implementation of awk ' == "Dublin" && $NF == 1' */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
FILE * xfopen(const char *path, const char *mode);
int
main(int argc, char **argv)
{
FILE *fptr1 = xfopen( argc > 1 ? argv[1] : "-", "r");
FILE *fptr2 = xfopen( argc > 2 ? argv[2] : "-", "w");
char buf[256];
while( fgets(buf, sizeof buf, fptr1) != NULL ){
size_t len = strlen(buf);
if( buf[len - 1] != '\n' ){
fprintf(stderr, "input error: line too long\n");
exit(EXIT_FAILURE);
}
if( strncmp("Dublin", buf, 6) == 0 &&
strncmp(buf + len - 3, " 1\n", 3) == 0 ){
fwrite(buf, 1, len, fptr2);
}
}
fclose(fptr1);
fclose(fptr2);
return 0;
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = path[0] != '-' || path[1] != '[=10=]' ? fopen(path, mode) :
*mode == 'r' ? stdin : stdout;
if( fp == NULL ){
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}