C语言中如何使用strstr()函数比较字符数组中的内容？

Question

这个小程序的目的是比较两个独立文件（file1.csv和file2.csv）中的字符串，并将其结果提供给file3.csv。如果file2.csv中的字符串在file1.csv中找到，则程序将file1.csv中的字符串复制到file3.csv。

file1.csv的内容（6列）：

10000001    text1   text1   text1   text1   text1
10000002    text2   text2   text2   text2   text2
10000003    text3   text3   text3   text3   text3
10000004    text4   text4   text4   text4   text4
10000005    text5   text5   text5   text5   text5
10000006    text6   text6   text6   text6   text6
10000007    text7   text7   text7   text7   text7
10000008    text8   text8   text8   text8   text8
10000009    text9   text9   text9   text9   text9
10000010    text10  text10  text10  text10  text10
10000011    text11  text11  text11  text11  text11
10000012    text12  text12  text12  text12  text12
10000013    text13  text13  text13  text13  text13
10000014    text14  text14  text14  text14  text14
10000015    text15  text15  text15  text15  text15
10000016    text16  text16  text16  text16  text16
10000017    text17  text17  text17  text17  text17
10000018    text18  text18  text18  text18  text18
10000019    text19  text19  text19  text19  text19
10000020    text20  text20  text20  text20  text20
10000021    text21  text21  text21  text21  text21
10000022    text22  text22  text22  text22  text22
10000023    text23  text23  text23  text23  text23
10000024    text24  text24  text24  text24  text24
10000025    text25  text25  text25  text25  text25

file2.csv的内容（只有一栏）：

预期结果将是：

10000001    text1   text1   text1   text1   text1
10000003    text3   text3   text3   text3   text3
10000004    text4   text4   text4   text4   text4
10000006    text6   text6   text6   text6   text6
10000007    text7   text7   text7   text7   text7
10000008    text8   text8   text8   text8   text8
10000009    text9   text9   text9   text9   text9
10000011    text11  text11  text11  text11  text11
10000012    text12  text12  text12  text12  text12
10000015    text15  text15  text15  text15  text15
10000025    text25  text25  text25  text25  text25

我使用了strstr()函数来比较字符串但是没有用，代码如下：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
    char line[150][80] = {0};  
    char line2[150][80] = {0};  

    int i = 0, b = 0;

    FILE *file1 = fopen("file1.csv", "r");
    FILE *file2 = fopen("file2.csv", "r");
    FILE *file3 = fopen("file3.csv", "w");

    while (fscanf(file1, "%79[^\n]\n", line[i]) != EOF) {
        i++;
        while (fscanf(file2, "%79[^\n]\n", line2[b]) != EOF) {
            b++;
        }
        if (strstr(line[i],line2[b]))
        fprintf(file3, "%s\n", line[i]);
     }

    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

提前致谢！

Answer 1

编辑：正如 Peter Miehle 所说，如果 file2 中的每一行在 file1 中都有一个匹配项，并且它只匹配一次；并且两个文件都已排序。

如果不想倒回file2.csv，需要在主循环中读取，在子循环中读取file1.csv。子字符串的搜索必须在每次读取新行时执行的子循环中。这是一个似乎有效的代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
    char line[150][80] = {0};
    char line2[150][80] = {0};

    int i = 0, b = 0;

    FILE *file1 = fopen("file1.csv", "r");
    FILE *file2 = fopen("file2.csv", "r");
    FILE *file3 = fopen("file3.csv", "w");

    while (fscanf(file2, "%79[^\n]\n", line2[b]) != EOF) {

        while (fscanf(file1, "%79[^\n]\n", line[i]) != EOF) {

            if (strstr(line[i],line2[b])) {
              printf("\nFound: %s",line2[b]);
              fprintf(file3, "%s\n", line[i]);
              i++;
              break;
            }
            i++;
        }
        b++;
     }

    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

Answer 2

正如 unwind 和其他评论者正确指出的那样，在每次 file1.csv 阅读迭代时重新阅读 file2.csv 是没有意义的。只需阅读一次，然后循环遍历 line2[] 数组。

其次，您可能会逐行处理 file1.csv 并在处理完后立即忘记当前行的内容，因此也不需要将 line[][] 设为数组。

这是您修改后的代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define CHECK_FILE_OPEN(file, name) \
    if (file == NULL) { \
        printf("Failed to open %s\n", name); \
        return 1; \
    }

int main()
{
    char line[150] = "";  
    char line2[150][80] = {0};

    int b = 0;
    int filterCount = 0;

    FILE *file1 = fopen("file1.csv", "r");
    CHECK_FILE_OPEN(file1, "file1.csv");
    FILE *file2 = fopen("file2.csv", "r");
    CHECK_FILE_OPEN(file2, "file2.csv");
    FILE *file3 = fopen("file3.csv", "w");
    CHECK_FILE_OPEN(file3, "file3.csv");

    while (fscanf(file2, "%79[^\n]\n", line2[b]) != EOF) {
        b++;
    }
    filterCount = b;
    while (fscanf(file1, "%79[^\n]\n", line) != EOF) {
        for (b = 0; b < filterCount; b++) {
            if (strstr(line,line2[b])) {
                fprintf(file3, "%s\n", line);
                break;
            }
        }
     }

    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

但是，您的输入数据似乎足够具体，可以做出一些优化假设。特别是，(a) 所有文件都按第一列值排序，并且 (b) 第二个只是第一个的缩减副本（根据行索引值）。

在这种情况下，您可能希望实施一次性处理。逐行扫描两个文件，比较索引值。然后在每次迭代时前进到第一个文件的下一行。从第二个文件开始，只有在第一个文件中找到当前行时才移动到下一行。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define CHECK_FILE_OPEN(file, name) \
    if (file == NULL) { \
        printf("Failed to open %s\n", name); \
        return 1; \
    }

int main()
{
    char line1[150] = "";  
    char line2[150] = "";

    FILE *file1 = fopen("file1.csv", "r");
    CHECK_FILE_OPEN(file1, "file1.csv");
    FILE *file2 = fopen("file2.csv", "r");
    CHECK_FILE_OPEN(file2, "file2.csv");
    FILE *file3 = fopen("file3.csv", "w");
    CHECK_FILE_OPEN(file3, "file3.csv");

    bool eof1 = fscanf(file1, "%79[^\n]\n", line1) == EOF;
    bool eof2 = fscanf(file2, "%79[^\n]\n", line2) == EOF;

    while (!eof1 && !eof2) {
        if (strstr(line1, line2)) {
            fprintf(file3, "%s\n", line1);
            eof2 = fscanf(file2, "%79[^\n]\n", line2) == EOF;
        }
        eof1 = fscanf(file1, "%79[^\n]\n", line1) == EOF;
    }

    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

Answer 3

非常感谢所有花时间和精力回答和评论我的问题的人。根据您的回答和建议，我刚刚弄明白了，这是代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int numberOfLines(char *fn)
{
    FILE *file = fopen(fn, "r"); 
    char line[1024]=""; 
    int no = 0;
    if(!file) 
    return 0;
    while(!feof(file)) 
    { 
    fgets(line, sizeof(line), file); no++;
    }
    fclose(file);
    return no;
}


int main()
{
    char line[150][80];  
    char line2[150][80];  

    int i, j;

    FILE *file1 = fopen("file1.csv", "r");
    FILE *file2 = fopen("file2.csv", "r");
    FILE *file3 = fopen("file3.csv", "w");

   char fileOne[100]="file1.csv"; 
   int nolFileone = 0;
   nolFileone = numberOfLines(fileOne);


   char fileTwo[100]="file2.csv"; 
   int nolFiletwo = 0;
   nolFiletwo = numberOfLines(fileTwo);   

      for (i = 0; i < nolFileone; i++){
        fscanf(file1, "%79[^\n]\n", line[i]);

            for (j = 0; j < nolFiletwo-1; j++){
                fscanf(file2, "%79[^\n]\n", line2[j]);       
                if (strstr(line[i], line2[j]))
                fprintf(file3, "%s\n", line[i]);

            }
      }


    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

C语言中如何使用strstr()函数比较字符数组中的内容？

How to use strstr() function to compare content in character arrays in C?

c

strstr