查找除子元素外所有包含搜索词的文件

Find all files containing search term except sub elements

我正在尝试制作一个 C 程序来显示包含给定搜索词的所有文件和文件夹。搜索词在执行程序时作为参数给出。如果文件夹/文件的名称包含搜索词(不区分大小写),则会将其显示到标准输出。 困难在于我不想输出包含在包含搜索词的文件夹中的文件和子文件夹。这是一个例子:

假设我的搜索词是 docker,这是当前输出:

"/Users/me/.docker"
"/Users/me/.docker/contexts"
"/Users/me/.docker/contexts/meta"
"/Users/me/.docker/config.json"
"/Users/me/.docker/scan"
"/Users/me/.docker/scan/config.json"
"/Users/me/.docker/application-template"
"/Users/me/.docker/application-template/logs"
"/Users/me/.docker/application-template/logs/com.docker.log"
"/Users/me/.docker/daemon.json"
"/Users/me/.docker/run"
"/Users/me/Library/Application Support/Docker Desktop"
"/Users/me/Library/Application Support/Docker Desktop/blob_storage"
"/Users/me/Library/Application Support/Docker Desktop/blob_storage/6965e70b-e33a-4415-b9a8-e19996fe221d"

但这是我要实现的输出:

"/Users/me/.docker"
"/Users/me/Library/Application Support/Docker Desktop"

到目前为止,这是我的代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>

static int display_info(const char *fpath, const char * fname, const char * term) {
    int what_len = strlen(term);
    int count = 0;

    char *where = fpath;

    if (what_len){
       while ((where = strcasestr(where, term))) {
                where += what_len;
                count++;
        }

        if (count == 1) {
            printf("\"%s/%s\"\n", fpath, fname);
        }

    }

    return 0;
}


static void listFilesRecursively(char * basePath, const char * searchTerm) {
    char path[1000];
    struct dirent * dp;
    DIR * dir = opendir(basePath);

    // Unable to open directory stream
    if (!dir)
        return;

    while ((dp = readdir(dir)) != NULL) {
        if (strcmp(dp -> d_name, ".") != 0 && strcmp(dp -> d_name, "..") != 0) {
//            printf("%s %hhu %s\n", basePath, dp->d_type, dp -> d_name);
            display_info(basePath, dp->d_name, searchTerm);

            // Construct new path from our base path
            strcpy(path, basePath);
            strcat(path, "/");
            strcat(path, dp -> d_name);

            listFilesRecursively(path, searchTerm);
        }
    }

    closedir(dir);
}


int main(int argc, const char * argv[]) {
    char * home = getenv("HOME");

    if (argc == 2) {
        listFilesRecursively(home, argv[1]);
    } else {
        printf("Please provide one argument");
    }
    return 0;
}

非常感谢任何反馈,谢谢!

感谢@KamilCuk 我能够解决我的问题。这是我的最终 listFilesRecursively 函数:

static void listFilesRecursively(char * basePath, const char * searchTerm) {
    char path[1000];
    struct dirent * dp;
    DIR * dir = opendir(basePath);
    
    // Unable to open directory stream
    if (!dir)
        return;
    
    while ((dp = readdir(dir)) != NULL) {
        if (strcmp(dp -> d_name, ".") != 0 && strcmp(dp -> d_name, "..") != 0) {
            if (strcasestr(dp->d_name, searchTerm)) {
                printf("%s/%s\n",basePath,dp->d_name);
                
                listFilesRecursively(path, searchTerm);
            } else {
                // Construct new path from our base path
                strcpy(path, basePath);
                strcat(path, "/");
                strcat(path, dp -> d_name);
                
                listFilesRecursively(path, searchTerm);
            }
        }
    }
    
    closedir(dir);
}
  • 我不确定我是否理解 display_info()
  • 的逻辑
  • listFilesRecursively() 上,您不能重复使用 path 来电
  • main() 应该是代码中的第一个函数,也许在单独的文件中

另一种选择

我将添加一个 C 示例,稍微更改一下您的 listFilesRecursively()...

  • 不使用 void() 所以你可以 return -1 错误
  • 在循环开始时测试 ... 并仅使用 continue 可能会使代码更易于阅读
  • path 在本地分配,free() 在 return
  • 上分配
  • strstr_ign()strstr() 的不区分大小写的版本,用于模式搜索

更改后 list_files() 的代码

int list_files(char* pattern, char* base_path)
{
    struct dirent* dp;
    DIR* dir = opendir(base_path);
    if (!dir) return -1; // Unable to open directory stream
    while ((dp = readdir(dir)) != NULL)
    {
        if (strcmp(dp->d_name, ".") == 0) continue;
        if (strcmp(dp->d_name, "..") == 0) continue;
        if ( strstr_ign((const char*)dp->d_name, pattern) != NULL )
        {
            display_info(base_path, dp->d_name);
        }
        else
        {
            char* path = (char*)malloc(1 + strlen(dp->d_name) + strlen(base_path) + 1);
            sprintf(path, "%s/%s", base_path, dp->d_name);
            list_files(pattern, path);
            free(path); // ok with path
        }
    };  // while()
    closedir(dir);
    return 0;
};  // list_files()

代码 strstr_ign()

我讨厌 strstr() 的参数顺序,但将其保留在这里只是为了让事情变得平等。这样一来,可以使用 strstr_ign() 作为 strstr() 的直接替代,而无需更改参数的顺序。我相信 needle 应该排在第一位 :) 用语言来说:大海捞针大海捞针要普遍得多 但 Ken 和 Dennis 有他们的理由来写 strstr() 他们这样做的方式......

//
// strstr() ignoring case
//
char*       strstr_ign(const char* haystack, const char* needle)
{
    if (needle == NULL) return NULL;
    if (haystack == NULL) return NULL;
    if (*needle == 0)
    {
        if (*haystack == 0)
            return (char*) haystack;
        else
            return NULL;
    }
    int limit = strlen(haystack) - strlen(needle);
    for (int x = 0; x <= limit; x += 1)
    {   // search for needle at position 'x' of 'haystack'
        int here = 1;
        for (unsigned y = 0; y < strlen(needle); y += 1)
        {
            if ( tolower(haystack[x + y]) != tolower(needle[y]) )
            {
                here = 0; break;
            };
        };
        if ( here == 1) return (char*)(haystack + x);
    }
    return NULL;
};

一个新的display_info()

已更改为显示文件夹的最后访问权限和与搜索模式匹配的常规文件的文件大小(不区分大小写)。以下是文件和文件夹的输出示例。请注意 ls -l 输出中的“-”和 'd'。

    - "./hms.c" [size: 1546]
    d "./sub/1/xyzHMSxyz"   [last access: Sat Apr 24 12:38:04 2021]
int display_info(const char* base, const char* file)
{
    struct  stat Stats;
    char*   path = (char*)malloc(1 + strlen(base) + strlen(file) + 1);
    char    atime[40];
    sprintf(path, "%s/%s", base, file);
    if ( stat(path, &Stats) < 0)
    {
        perror("Inside display_info()");
        free(path);
        return -1;
    }
    if ( S_ISDIR(Stats.st_mode) )
    {
        strftime( atime, sizeof(atime), "%a %b %d %H:%M:%S %Y", localtime(&Stats.st_atime) );
        printf("\td \"%s/%s\"\t[last access: %s]\n", base, file, atime );
    }
    else
    {
        if ( S_ISREG(Stats.st_mode) )
            printf("\t- \"%s/%s\"\t[size: %ld]\n", base, file, Stats.st_size );
        else
            printf("is somthing else\n");
    }
    free(path);
    return 0;
}

示例输出

Search pattern is "hms" (case is ignored) 
    - "./hms"   [size: 16848]
    - "./hms-soma.c"    [size: 1379]
    - "./hms.c" [size: 1546]
    d "./sub/1/xyzHMSxyz"   [last access: Sat Apr 24 12:38:04 2021]
    d "./sub/2/xyzHMS"  [last access: Sat Apr 24 12:21:11 2021]
    d "./sub/hMs"   [last access: Sat Apr 24 12:21:11 2021]

此测试的 C 代码

最小测试:)

#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>


int         display_info(const char*, const char*);
int         list_files(char*, char*);
char*       strstr_ign(const char*, const char*);

 int main(int argc, const char * argv[])
 {
     char    search_term[80];
     if (argc >= 2)
     {
         if ( strlen(argv[1]) > (sizeof(search_term)-1) )
         {
             printf("Size of substring (%zd) must not be greater than %zd\n",
             strlen(argv[1]), sizeof(search_term)-1 );
             return -1;  
         }
         for ( int i = 0; i<= strlen(argv[1]); search_term[i] = (char)(tolower(argv[1][i])), i+=1 ); 
         printf("Search pattern is \"%s\" (case is ignored) \n", search_term );
         list_files(search_term,".");
     } else {
         printf("Please provide pattern to search for.\n");
     }
     return 0;
 };  // main()


int display_info(const char* base, const char* file)
{
    struct  stat Stats;
    char*   path = (char*)malloc(1 + strlen(base) + strlen(file) + 1);
    char    atime[40];
    sprintf(path, "%s/%s", base, file);
    if ( stat(path, &Stats) < 0)
    {
        perror("Inside display_info()");
        free(path);
        return -1;
    }
    if ( S_ISDIR(Stats.st_mode) )
    {
        strftime( atime, sizeof(atime), "%a %b %d %H:%M:%S %Y", localtime(&Stats.st_atime) );
        printf("\td \"%s/%s\"\t[last access: %s]\n", base, file, atime );
    }
    else
    {
        if ( S_ISREG(Stats.st_mode) )
            printf("\t- \"%s/%s\"\t[size: %ld]\n", base, file, Stats.st_size );
        else
            printf("is somthing else\n");
    }
    free(path);
    return 0;
}


int list_files(char* pattern, char* base_path)
{
    struct dirent* dp;
    DIR* dir = opendir(base_path);
    if (!dir) return -1; // Unable to open directory stream
    while ((dp = readdir(dir)) != NULL)
    {
        if (strcmp(dp->d_name, ".") == 0) continue;
        if (strcmp(dp->d_name, "..") == 0) continue;
        if ( strstr_ign((const char*)dp->d_name, pattern) != NULL )
        {
            display_info(base_path, dp->d_name);
        }
        else
        {
            char* path = (char*)malloc(1 + strlen(dp->d_name) + strlen(base_path) + 1);
            sprintf(path, "%s/%s", base_path, dp->d_name);
            list_files(pattern, path);
            free(path); // ok with path
        }
    };  // while()
    closedir(dir);
    return 0;
};  // list_files()

//
// strstr() ignoring case
//
char*       strstr_ign(const char* haystack, const char* needle)
{
    if (needle == NULL) return NULL;
    if (haystack == NULL) return NULL;
    if (*needle == 0)
    {
        if (*haystack == 0)
            return (char*) haystack;
        else
            return NULL;
    }
    int limit = strlen(haystack) - strlen(needle);
    for (int x = 0; x <= limit; x += 1)
    {   // search for needle at position 'x' of 'haystack'
        int here = 1;
        for (unsigned y = 0; y < strlen(needle); y += 1)
        {
            if ( tolower(haystack[x + y]) != tolower(needle[y]) )
            {
                here = 0; break;
            };
        };
        if ( here == 1) return (char*)(haystack + x);
    }
    return NULL;
};