打印字符串数组会产生错误的输出

Question

我正在尝试解决一个难题，但我不知道我的代码有什么问题！

挑战是：

创建一个将字符串拆分为单词的函数。
分隔符是空格、制表符和换行符。
此函数 returns 一个数组，其中每个框包含一个字符串的地址，由一个单词表示。该数组的最后一个元素应等于 0 以强调数组的结尾。
数组中不能有任何空字符串。得出必要的结论。无法修改给定的字符串。
注意：唯一允许的函数是malloc()

bug/problem：我遇到了这个问题，我试图解决它，但我无法确定问题出在哪里。我创建了一个名为 split_whitespaces() 的函数来完成这项工作。当我在 split_whitespaces 函数中打印字符串数组时，我得到以下输出：

Inside the function:
arr_str[0] = This
arr_str[1] = is
arr_str[2] = just
arr_str[3] = a
arr_str[4] = test!

当我在 main 函数中打印字符串数组时，我得到以下输出：

Inside the main function:
arr_str[0] = @X@?~
arr_str[1] = `X@?~
arr_str[2] = just
arr_str[3] = a
arr_str[4] = test!

我创建了一个函数 word_count 来计算输入字符串中的单词数，这样我就可以使用 malloc 和 word_count + 1（空指针）分配内存。

int word_count(char *str) {
    int i;
    int w_count;
    int state;

    i = 0;
    w_count = 0;
    state = 0;
    while (str[i]) {
        if (!iswhitespace(str[i])) {
            if (!state)
                w_count++;
            state = 1;
            i++;
        } else {
            state = 0;
            i++;
        }
    }
    return (w_count);
}

另一个名为 strdup_w 的函数模仿 strdup 的行为，但仅针对单个单词：

char *strdup_w(char *str, int *index) {
    char *word;
    int len;
    int i;

    i = *index;
    len = 0;
    while (str[i] && !iswhitespace(str[i]))
        len++, i++;;
    word = (char *) malloc(len + 1);
    if (!word)
        return (NULL);
    i = 0;
    while (str[*index]) {
        if (!iswhitespace(str[*index])) {
            word[i++] = str[*index];
            (*index)++;
        } else
            break;
    }
    word[len] = '[=13=]';
    return (word);
}

这是我的完整代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char **split_whitespaces(char *str);
char *strdup_w(char *str, int *index);
int word_count(char *str);
int iswhitespace(char c);

int main(void) {
    char *str = "This is just a test!";
    char **arr_str;
    int i;

    i = 0;
    arr_str = split_whitespaces(str);
    printf("\nOutside the function:\n");
    while (arr_str[i]) {
        printf("arr_str[%d] = %s\n", i, arr_str[i]);
        i++;
    }
    return (0);
}

char **split_whitespaces(char *str) {
    char **arr_str;
    int i;
    int words;
    int w_i;

    i = 0;
    w_i = 0;
    words = word_count(str);
    arr_str = (char **)malloc(words + 1);
    if (!arr_str)
        return (NULL);
    printf("Inside the function:\n");
    while (w_i < words) {
        while (iswhitespace(str[i]) && str[i])
            if (!str[i++])
                break;
        arr_str[w_i] = strdup_w(str, &i);
        printf("arr_str[%d] = %s\n", w_i, arr_str[w_i]);
        w_i++;
    }
    arr_str[words] = 0;
    return (arr_str);
}

char *strdup_w(char *str, int *index) {
    char *word;
    int len;
    int i;

    i = *index;
    len = 0;
    while (str[i] && !iswhitespace(str[i]))
        len++, i++;;
    word = (char *)malloc(len + 1);
    if (!word)
        return (NULL);
    i = 0;
    while (str[*index]) {
        if (!iswhitespace(str[*index])) {
            word[i++] = str[*index];
            (*index)++;
        } else
            break;
    }
    word[len] = '[=14=]';
    return (word);
}

int word_count(char *str) {
    int i;
    int w_count;
    int state;

    i = 0;
    w_count = 0;
    state = 0;
    while (str[i]) {
        if (!iswhitespace(str[i])) {
            if (!state)
                w_count++;
            state = 1;
            i++;
        } else {
            state = 0;
            i++;
        }
    }
    return (w_count);
}

int iswhitespace(char c) {
    if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
        return (1);
    return (0);
}

抱歉，这是我第一次尝试寻求帮助。

Answer 1

代码中存在多个问题：

大小不正确 arr_str = (char **)malloc(words + 1); 您必须将元素的数量乘以元素的大小：
```
  arr_str = malloc(sizeof(*arr_str) * (words + 1));
```
使用后在main()函数中释放数组是一种很好的风格。
测试 while (iswhitespace(str[i]) && str[i]) 是多余的：如果 w_count 计算正确，测试 str[i] 应该是不必要的。您应该使用 strspn() 跳过白色 space 和 strcspn() 跳过单词字符。
if (!str[i++]) break; 在循环内是完全多余的：str[i] 已经过测试，不为空。
while (str[i] && !iswhitespace(str[i])) len++, i++;; 是糟糕的风格。如果循环体中有多个简单语句，请使用大括号。
strdup_w中的最后一个循环比较复杂，你可以简单地使用memcpy(word, str + *index, len); *index += len;

这是修改后的版本：

#include <stdio.h>
#include <stdlib.h>

char **split_whitespaces(const char *str);
char *strdup_w(const char *str, int *index);
int word_count(const char *str);
int iswhitespace(char c);

int main(void) {
    const char *str = "This is just a test!";
    char **arr_str;
    int i;

    arr_str = split_whitespaces(str);
    if (arr_str) {
        printf("\nOutside the function:\n");
        i = 0;
        while (arr_str[i]) {
            printf("arr_str[%d] = %s\n", i, arr_str[i]);
            i++;
        }
        while (i --> 0) {
            free(arr_str[i]);
        }
        free(arr_str);
    }
    return 0;
}

char **split_whitespaces(const char *str) {
    char **arr_str;
    int i;
    int words;
    int w_i;

    i = 0;
    w_i = 0;
    words = word_count(str);
    arr_str = malloc(sizeof(*arr_str) * (words + 1));
    if (!arr_str)
        return NULL;
    printf("Inside the function:\n");
    while (w_i < words) {
        while (iswhitespace(str[i]))
            i++;
        arr_str[w_i] = strdup_w(str, &i);
        if (!arr_str[w_i])
            break;
        printf("arr_str[%d] = %s\n", w_i, arr_str[w_i]);
        w_i++;
    }
    arr_str[words] = NULL;
    return arr_str;
}

char *strdup_w(const char *str, int *index) {
    char *word;
    int len;
    int start;
    int i;

    i = *index;
    start = i;
    while (str[i] && !iswhitespace(str[i])) {
        i++;
    }
    *index = i;
    len = i - start;
    word = malloc(len + 1);
    if (!word)
        return NULL;
    i = 0;
    while (i < len) {
        word[i] = str[start + i];
        i++;
    }
    word[i] = '[=11=]';
    return word;
}

int word_count(const char *str) {
    int i;
    int w_count;
    int state;

    i = 0;
    w_count = 0;
    state = 0;
    while (str[i]) {
        if (!iswhitespace(str[i])) {
            if (!state)
                w_count++;
            state = 1;
        } else {
            state = 0;
        }
        i++;
    }
    return w_count;
}

int iswhitespace(char c) {
    return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
}

Answer 2

来自我的最高评论...

在 split_whitespaces 中，尝试更改：

arr_str = (char **) malloc(words + 1);

进入：

arr_str = malloc(sizeof(*arr_str) * (words + 1));

正如你所说，words 是一个 count 而不是一个字节 length，所以你没有分配足够space，所以你有UB。

更新：

But watched some tutorials and they said that malloc takes one argument which is the size of the memory to be allocated (in bytes), that's why I allocated memory for 5 bytes! can you please tell my an alternative of using malloc without sizeof() function. I'll appreciate it. – Achraf EL Khnissi

确实没有 clean 方法来指定这个 without sizeof.

sizeof 是 而不是 函数 [尽管有语法]。它是一个编译器指令。它将“returns”其参数占用的字节数作为编译时常量。

如果我们有char buf[5];，有5个字节，所以sizeof(buf)[或sizeof buf]是5.

如果我们有：int buf[5];，则有 5 个元素，每个元素的大小 int [通常] 为 4 个字节，因此总共 space，以字节为单位，是 sizeof(int) * 5 或 4 * 5 即 20.

但是，int 可能因架构而异。在英特尔 8086 上 [大约 1980 年代]，int 是 2 个字节（即 16 位）。所以，上面的 4 * 5 是错误的。应该是 2 * 5.

如果我们使用 sizeof(int)，那么 sizeof(int) * 5 与架构无关。

类似地，在 32 位机器上，指针 [通常] 是 32 位。所以，sizeof(char *) 是 4 [字节]。在 64 位机器上，指针是 64 位，也就是 8 个字节。所以，sizeof(char *) 是 8。

因为arr_str是：char **arr_str，我们可以做到：

arr_str = malloc(sizeof(char *) * (words + 1));

但是，如果 arr_str 的定义曾经改变（例如）struct string *arr_str;），那么我们刚才所做的就是 break/fail 如果我们忘记将分配更改为：

arr_str = malloc(sizeof(struct string) * (words + 1));

所以，做：

arr_str = malloc(sizeof(*arr_str) * (words + 1));

是首选编写更简洁代码的惯用方式。更多语句将自动调整，无需手动查找所有受影响的代码行。

更新#2：

You might just add why you removed the (char **) cast :) -- chqrlie

请注意，我删除了 (char **) 强制转换。参见：Do I cast the result of malloc?

这只是添加 extra/unnecessary“东西”，因为 malloc 的 void * return 值可以分配给 any指针类型。

如果我们忘记做：#include <stdlib.h>，那么 malloc 将没有 no 函数原型，因此编译器将默认 return 输入 int.

没有强制转换，编译器会在语句[这就是我们想要的].

使用转换，此操作在编译时屏蔽 [或多或少]。在 64 位机器上，编译器将使用截断为 32 位的值 [因为它认为 malloc return 是一个 32 位值] 而不是malloc.

的完整 64 位 return 值

这种截断是一个“沉默的杀手”。应该标记为编译时错误的内容会产生运行时错误（可能是段错误或其他 UB），这更难调试。

打印字符串数组会产生错误的输出

Printing array of strings produces bad output

c

malloc