将行拆分为单词并使用 strtok 将它们放入 char 数组中

Question

我有这个简单的行解析器到标记函数... 但是我缺少一些东西。

int parse_line(char *line,char **words){

   int wordc=0;

   /* get the first token */
   char *word = strtok(line, " ");
   words[wordc]=(char*)malloc(256*sizeof(char));
   strcpy(words[wordc++],word );

   /* walk through other tokens */
    while( word != NULL ) {
        word = strtok(NULL, " ");
        words[wordc]=(char*)malloc(256*sizeof(char));
        strcpy(words[wordc++],word );
    }

    return wordc;
}

当我运行它出现分段错误！我给出第一个参数 char[256] 行，第二个当然是 char** 字，但我有第一个 malloc 内存。像这样

  char **words = (char **)malloc(256 * sizeof(char *));

main:
.
.
.
char buffer[256];
char **words = (char **)malloc(256 * sizeof(char *));
.
.
.
n = read(stdin, buffer, 255);
if (n < 0){
   perror("ERROR");
   break;
}

parse_line(buffer,words);

当程序执行时 parse_line 它退出并出现段错误

找到段错误发生的地方。它就在那一行：

strcpy(words[wordc++],word );

特别是第一个 strcpy。在它甚至到达 while 循环之前

Answer 1

while( word != NULL ) {
    word = strtok(NULL, " ");
    words[wordc]=(char*)malloc(256*sizeof(char));
    strcpy(words[wordc++],word );
}

在该行的末尾，word 将始终设置为 NULL（如预期的那样），因此 strcpy(words[wordc++],word ) 将是未定义的行为（可能是崩溃）。

您需要重新组织循环，以免尝试复制 NULL 字符串。

@jxh 建议使用此解决方案解决 word 在任何一个 strcpy 中成为 NULL 的问题。

/* get the first token */
char *word = strtok(line, " ");

while( word != NULL ) {
    words[wordc]=(char*)malloc(256*sizeof(char));
    strcpy(words[wordc++],word );
    word = strtok(NULL, " ");
}

我会这样做（使用更少的内存）

/* get the first token */
char *word = strtok(line, " ");

while( word != NULL ) {
    words[wordc++] = strdup(word);
    word = strtok(NULL, " ");
}

Answer 2

你的答案是正确的！但是因为阅读，我又遇到了 segF！！！！！！！我没有注意到，当我运行程序时，它并没有停止读取输入！相反，它正在通过它。我所做的是将 read 更改为 fgets 并且它起作用了！！！还有你的改变！有人可以给我解释一下吗？？？？为什么它不会在读取函数处停止？？

Answer 3

以下建议代码：

干净地编译
执行所需的功能
正确检查错误
向用户显示结果
无法将所有分配的内存传递给 free()，因此存在大量内存泄漏

现在建议的代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

// avoid 'magic' numbers in code
#define MAX_WORDS 256
#define MAX_LINE_LEN 256


int parse_line( char *line, char **words )
{
    int wordc=0;

    /* get the first token */
    char *token = strtok(line, " ");
    while( wordc < MAX_WORDS && token ) 
    {   
        words[wordc] = strdup( token );
        if( ! words[wordc] )
        {
            perror( "strdup failed" );
            exit( EXIT_FAILURE );
        }

        // implied else, strdup successful

        wordc++;

        // get next token
        token = strtok(NULL, " ");
    }

    return wordc;
}



int main( void )
{
    char buffer[ MAX_LINE LENGTH ];

    // fix another problem with OPs code
    char **words = calloc( MAX_WORDS, sizeof( char* ) );
    if( ! words )
    {
        perror( "calloc failed" );
        exit( EXIT_FAILURE );
    }

    // implied else, calloc successful

    // note: would be much better to use 'fgets()' rather than 'read()'
    ssize_t n = read( 0, buffer, sizeof( buffer ) );
    if (n <= 0)
    {
       perror("read failed");
       exit( EXIT_FAILURE );
    }

    // implied else, read successful

    // note: 'read()' does not NUL terminate the data
    buffer[ n ] = '[=10=]';   

    int count = parse_line( buffer, words );

    for( int i = 0; i < count; i++ )
    {   
        printf( "%s\n", words[i] );
    } 
}

这里是一个典型的运行程序：

hello old friend  <-- user entered line
hello
old
friend

将行拆分为单词并使用 strtok 将它们放入 char 数组中

Split line into words and put them in char array using strtok

c

malloc

strtok

segmentation-fault