如何在 C 中浏览任意长度的字符串数组?

How can I navigate through an array of strings of any length in C?

了解在 C 中处理直接指针

这是一个适用于固定项目数和固定行长的字符串数组的代码:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAXNAMELEN 100
#define MAXLINELEN 100
#define MAXITEMS 1000

int main(int argc, char ** argv) {

 FILE * infile, * outfile;
 char name[MAXNAMELEN];
 char line[MAXLINELEN];
 char lines[MAXITEMS][MAXLINELEN];
 int i, items = 0;

 printf("Enter a source filename: ");
 fgets(name, sizeof(name), stdin);
 name[strlen(name)-1] = '[=10=]'; // strip newline
 infile = fopen(name, "r");
 while (fgets(line, sizeof(line), infile)) {
        strcpy(lines[items], line);
        items++;
 }

 qsort(lines, items, MAXLINELEN, strcmp);

 printf("Enter a destination filename: ");
 fgets(name, sizeof(name), stdin);
 name[strlen(name)-1] = '[=10=]'; // strip newline
 outfile = fopen(name, "w");
 for (i=0; i<items; i++) {
    fputs(lines[i], outfile);
 }

 fclose(infile);
 fclose(outfile);
}

问题描述及代码

如果我尝试读取 MAXLINELENMAXITEMS 中的 input.txt 文件,程序运行正常。现在假设我正在逐行读取更大的 "inputfile",其中最大行长度可以是任何内容,那么我将不得不使用字符指针 (char*) 来读取输入。 char* linesptr[MAXITEMS];

这是我的代码,我试图在其中逐行读取由换行符分隔的输入文件。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#define MAXNAMELEN 1000
#define MAXLINELEN 1000
#define MAXITEMS 100000

char* linesptr[MAXITEMS];

int
main(int argc, char ** argv) {

 FILE * infile, * outfile;
 char name[MAXNAMELEN];
 char line[MAXLINELEN];

 int i, items = 0;

 printf("Enter a source filename: ");
 fgets(name, MAXNAMELEN, stdin);
 name[strlen(name)-1] = '[=11=]'; // strip newline
 printf("%s infile \n",name);
 infile = fopen(name, "r");
 while (fgets(line, MAXLINELEN, infile)) {
    int length = strlen(line);
    line[length-1] = '[=11=]';
    linesptr[items] = line; *<- I am writing to the same mem location*
    printf("the input string %d is : %s \n",items,  linesptr[items]);
        items++;
 }

 qsort(linesptr, items, MAXLINELEN, strcmp); 
 printf("Enter a destination filename: ");
 fgets(name, sizeof(name), stdin);
 name[strlen(name)-1] = '[=11=]'; // strip newline
 outfile = fopen(name, "w");
 for (i=0; i<items; i++) {
    fputs(linesptr[i], outfile);
 }

 fclose(infile);
 fclose(outfile);
}

问题

我正在将指针地址复制到数组 linesptr 的第 n 个单元格中,其中 nth 是 value=items(这里是代码中的参考行:linesptr[items] = line;)。所以当你打印最终答案时,我将相同的内存地址引用到名为 line 的缓冲区,line 的内存位置将始终指向最近的 fgets()。我了解该错误,但我不知道如何解决该问题。如果能帮助修复代码中的错误,我将不胜感激。

将该行复制到动态分配的字符串。

while (fgets(line, MAXLINELEN, infile)) {
    int length = strlen(line);
    if (length > 0 && line[length-1] == '\n') {
        line[length-1] = '[=10=]';
        length--;
    }
    char *linecopy = malloc(length+1);
    strcpy(linecpy, line);
    linesptr[items] = linecpy;
    printf("the input string %d is : %s \n",items,  linesptr[items]);
    items++;
}

如果你想处理超过 MAXITEMS 行,你也应该使用 malloc() 分配 linesptr。当您达到 linesptr 的当前大小时,您可以使用 realloc() 使其更长。详细代码见Read unknown number of lines from stdin, C

有关对字符串指针数组进行排序的正确方法,请参阅 How to qsort an array of pointers to char in C?

这是读取文件(大数据)、排序并将其写入文件的完整工作解决方案:

#include <stdio.h>
#include <string.h> 
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#define MAXNAMELEN 1000
#define MAXLINELEN 5000
#define MAXITEMS 100000

char* linesptr[MAXITEMS];

int compare_function(const void *name1, const void *name2)
{
  const char *name1_ = *(const char **)name1;
  const char *name2_ = *(const char **)name2;
  return strcmp(name1_, name2_);
}

int
main(int argc, char ** argv) 
{

 FILE * infile, * outfile;
 char name[MAXNAMELEN];
 char line[MAXLINELEN];

 int i, items = 0;

 printf("Enter a source filename: ");
 fgets(name, MAXNAMELEN, stdin);
 name[strlen(name)-1] = '[=10=]'; // strip newline
 infile = fopen(name, "r");
 while (fgets(line, MAXLINELEN, infile)) {
    int length = strlen(line);
    line[length-1] = '[=10=]';
    char *linecopy = malloc(length);
    strcpy(linecopy, line);
    linesptr[items] = linecopy;
    items++;
 }

 qsort(linesptr, items, sizeof(char *), compare_function);

 printf("Enter a destination filename: ");
 fgets(name, sizeof(name), stdin);
 name[strlen(name)-1] = '[=10=]'; // strip newline
 outfile = fopen(name, "w");
 for (i=0; i<items; i++) {
    fprintf(outfile, "%s\n", linesptr[i]);
 }
 fclose(infile);
 fclose(outfile);
}

你要举个例子,这里是:

以下建议代码:

  1. 为可读性而写
  2. 检查并处理错误情况
  3. 利用了getline()realloc()
  4. 效率不如预期,因为它会为源文件中的每一行调用 realloc()
  5. properly/safely 使用 strcspn() 删除任何(可能的)尾随换行符
  6. 本可以通过将 'cleanup' 提取到子函数来简化代码,而不是在遇到错误时重复相同的 'cleanup' 代码。
  7. 使用 size_t 而不是 int 作为数组索引以避免隐式转换
  8. 尽可能减少 scope 变量
  9. 将适当的第三个参数传递给 qsort()
  10. qsort()
  11. 正确实现 compare() 辅助函数

现在,建议的代码:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>


#define MAXNAMELEN 1024

// prototypes
int compare(const void *, const void *);


int main( void )
{
    printf("Enter a source filename: ");
    char name[ MAXNAMELEN ];
    if( !fgets(name, sizeof( name ), stdin) )
    {
        perror( "fgets for input file name failed" );
        exit( EXIT_FAILURE );
    }

    // implied else, fgets for input file name successful

    name[strcspn( name, "\n" ) ] = '[=10=]'; // strip newline
    printf("%s infile \n",name);

    FILE *fp_in = fopen(name, "r");
    if( !fp_in )
    {
        perror( "fopen for input file failed" );
        exit( EXIT_FAILURE );
    }

    // implied else, fopen for input file successful

    char **linesarray = NULL;
    size_t numLines   = 0;

    char   *line      = NULL;
    size_t  lineLen   = 0;

    while( getline( &line, &lineLen, fp_in ) != -1 )
    {
        char ** temp = realloc( linesarray, (numLines+1) * sizeof( char* ) );
        if( !temp )
        {
            perror( "realloc failed" );
            fclose( fp_in );
            for( size_t i = 0; i< numLines; i++ )
            {
                free( linesarray[i]);
            }
            free( linesarray );
            exit( EXIT_FAILURE );
        }

        // implied else, realloc successful

        linesarray = temp;
        linesarray[ numLines ] = line;
        numLines++;

        // prep for next iteration
        line = NULL;
        lineLen = 0;
    }

    free( line );
    fclose( fp_in );

    //puts( "all file read in" );

    qsort( linesarray, numLines, sizeof( char * ), compare );

    //puts( "file sorted" );

    printf("Enter a destination filename: ");

    if( !fgets(name, sizeof(name), stdin) )
    {
        perror( "fgets for output file name failed" );

        for( size_t i = 0; i< numLines; i++ )
        {
            free( linesarray[i]);
        }
        free( linesarray );
        exit( EXIT_FAILURE );
    }

    // implied else, fgets() for output file name successful

    name[strcspn( name, "\n" ) ] = '[=10=]'; // strip newline

    FILE *fp_out = fopen(name, "w");
    if( !fp_out )
    {
        perror( "fopen for output file failed" );

        for( size_t i = 0; i< numLines; i++ )
        {
            free( linesarray[i]);
        }
        free( linesarray );
        exit( EXIT_FAILURE );
    }

    // implied else, fopen for output file successful

    for (size_t i=0; i<numLines; i++)
    {
        if( fputs(linesarray[i], fp_out ) == EOF )
        {
            perror( "fputs failed" );
            fclose( fp_out );

            for( size_t i = 0; i< numLines; i++ )
            {
                free( linesarray[i]);
            }
            free( linesarray );
            exit( EXIT_FAILURE );
        }
    }

    fclose( fp_out );

    for( size_t i = 0; i< numLines; i++ )
    {
        free( linesarray[i]);
    }
    free( linesarray );
}


int compare(const void *ls, const void *rs )
{
    char *leftSide  = *(char**)ls;
    char *rightSide = *(char**)rs;
    return strcmp( leftSide, rightSide );
}