从一个文件中过滤数据并将该数据写入新文件

Filtering data from one file and writing that data to a new file

我正在尝试将文件(customers.txt)复制到新文件(filtered_customers.txt) 过滤数据。现在我的程序只读取 "customers.txt" 文件中的所有内容并将其复制到新文件中。我正在尝试更改此设置,以便只有在第一列 AND 最后一列中为 1 的条目才会被复制到新文件中。

我正在研究使用 strcmp() 函数将第一列中的值与字符串“Dublin”进行比较,并使用此方法将最后一列中的所有值与“1”进行比较,但我我以前从未真正做过任何文件处理,所以我完全不知道从哪里开始。非常感谢任何有关如何处理此问题的建议,谢谢

代码 - 将一个文件中的所有条目复制到另一个文件中

#include <stdio.h> 
#include <stdlib.h> // For exit()
#include <conio.h> 

int main() 
{ 
    FILE *fptr1;
    FILE *fptr2; 
    char c; 
  
    char* filename = "C:\Users\uzair\Desktop\Comp-Programming 2\customers.txt";
    char* filename2 = "C:\Users\uzair\Desktop\Comp-Programming 2\filtered_customers.txt";

    // Open customers.txt file for reading 
    fptr1 = fopen(filename, "r"); 
    if (fptr1 == NULL) 
    { 
        printf("Cannot open file %s \n", filename); 
        exit(0); 
    } 
  

    // Open file for writing 
    fptr2 = fopen(filename2, "w"); 
    if (fptr2 == NULL) 
    { 
        printf("Cannot open file %s \n", filename2); 
        exit(0); 
    } 
  
    // Read contents from file 
    c = fgetc(fptr1); 
    while (c != EOF) 
    { 
        fputc(c, fptr2); 
        c = fgetc(fptr1); 
    } 

    printf("\nContents Copied From ---\n%s\nTo\n%s\nSuccessfully :)\n", filename, filename2); 

    fclose(fptr1); 
    fclose(fptr2); 
    return 0; 
}

customers.txt

Dublin  Dunne       865463389   21  1
Dublin  Milford     865438990   22  0
Dublin  Mowlds      876765443   24  1
Dublin  Wang        873456789   22  1
Dublin  Smith       875432234   25  1
Dublin  Henry       876654429   22  0
Dublin  Gupta       896765443   24  1
Wicklow Monahan     865432245   22  1
Wicklow Brunsdon    865678894   22  1
Wexford Tyson       865434566   22  0
Wexford Browne      865564766   20  1
Dublin  Dunne       890065443   27  1
Dublin  Connolly    876733999   20  0
Louth   Jennings    987645673   60  1
Mayo    Wang        876232123   29  1

只是逐个字符地复制文件。

输入是一系列具有单独列的行。

我们必须 accept/reject 一行基于与列的匹配。

所以,首先,我们想要一次 read/examine 整行。所以,我们想要 fgets

而不是 fgetc

然后,我们需要将行拆分成列。有多种方法可以做到这一点,但最简单的方法是制作该行的 copy。使用 strtok.

拆分该副本

然后,第一列可以与 Dublin 进行比较,最后一列可以与 1.

进行比较

如果两者匹配,我们可以用fputs

输出[原始]行

这是为执行此操作而重构的代码:

#include <stdio.h>
#include <stdlib.h>                     // For exit()
#include <string.h>
//#include <conio.h>

#define BUFMAX      1000

int
is_valid(const char *src)
{
    char *cp;
    char buf[BUFMAX];
    int tokcnt = 0;
    char *toklist[20];
    int match;

    // make a copy of the line (because strtok is destructive of the buffer)
    strcpy(buf,src);

    // split line into tokens/columns keeping track of the number of columns
    while (1) {
        cp = strtok((tokcnt == 0) ? buf : NULL," \t\n");
        if (cp == NULL)
            break;
        toklist[tokcnt++] = cp;
    }

    // check for match
    do {
        // first column must be "Dublin"
        match = (strcmp(toklist[0],"Dublin") == 0);
        if (! match)
            break;

        // last column must be "1"
        match = (strcmp(toklist[tokcnt - 1],"1") == 0);
        if (! match)
            break;
    } while (0);

    return match;
}

int
main()
{
    FILE *finp;
    FILE *fout;

    char *filename = "customers.txt";
    char *filename2 = "filtered_customers.txt";

    // Open customers.txt file for reading
    finp = fopen(filename, "r");
    if (finp == NULL) {
        printf("Cannot open file %s \n", filename);
        exit(0);
    }

    // Open file for writing
    fout = fopen(filename2, "w");
    if (fout == NULL) {
        printf("Cannot open file %s \n", filename2);
        exit(0);
    }

    // Read contents from file
    char buf[BUFMAX];
    while (1) {
        // read line
        char *cp = fgets(buf,sizeof(buf),finp);
        if (cp == NULL)
            break;

        // output the line if it matches our criteria
        if (is_valid(buf))
            fputs(buf,fout);
    }

    printf("\nContents Copied From ---\n%s\nTo\n%s\nSuccessfully :)\n",
        filename, filename2);

    fclose(finp);
    fclose(fout);

    return 0;
}

这里有一些想法。请注意,如果最后一列之前的白色 space 是制表符而不是 space,这将完全失败,并且它不会验证每行中是否有 4 列。毫无疑问,它遗漏了许多其他边缘情况。留给 reader 的练习:

/* A limited implementation of awk ' == "Dublin" && $NF == 1' */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
FILE * xfopen(const char *path, const char *mode);

int
main(int argc, char **argv)
{
        FILE *fptr1 = xfopen( argc > 1 ? argv[1] : "-", "r");
        FILE *fptr2 = xfopen( argc > 2 ? argv[2] : "-", "w");
        char buf[256];

        while( fgets(buf, sizeof buf, fptr1) != NULL ){
                size_t len = strlen(buf);
                if( buf[len - 1] != '\n' ){
                        fprintf(stderr, "input error: line too long\n");
                        exit(EXIT_FAILURE);
                }
                if( strncmp("Dublin", buf, 6) == 0 &&
                        strncmp(buf + len - 3, " 1\n", 3) == 0 ){
                        fwrite(buf, 1, len, fptr2);
                }
        }
        fclose(fptr1);
        fclose(fptr2);
        return 0;
}

FILE *
xfopen(const char *path, const char *mode)
{
        FILE *fp = path[0] != '-' || path[1] != '[=10=]' ? fopen(path, mode) :
                *mode == 'r' ? stdin : stdout;
        if( fp == NULL ){
                perror(path);
                exit(EXIT_FAILURE);
        }
        return fp;
}