该功能仅适用于无用的 printf

The function only works with a useless printf

我通常会越来越努力地解决我在代码中发现的任何错误,但是这个错误对我来说完全不合逻辑。它适用于任何字符串和字符分隔符,但仅适用于函数 while 内无用的 printf ,否则它会打印

-> Lorem

然后

-> ▼

然后崩溃了。在此先感谢任何可以告诉我发生了什么的人。

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>

char **strsep_(char *str, char ch) {
    // Sub-string length
    uint8_t len = 0;
    // The number of sub-strings found means the same as the position where it will be stored in the main pointer
    // Obviously, the number tends to increase over time, and at the end of the algorithm, it means the main pointer length too
    uint8_t pos = 0;
    // Storage for any found sub-strings and one more byte as the pointer is null-terminated
    char **arr = (char**)malloc(sizeof(char **) + 1);
    while (*str) {
        printf("Erase me and it will not work! :)\n");
        if (*str == ch) {
            // The allocated memory should be one step ahead of the current usage
            arr = realloc(arr, sizeof(char **) * pos + 1);
            // Allocates enough memory in the current main pointer position and the '[=12=]' byte
            arr[pos] = malloc(sizeof(char *) * len + 1);
            // Copies the sub-string size (based in the length number) into the previously allocated space
            memcpy(arr[pos], (str - len), len);
            // `-_("")_-k
            arr[pos][len] = '[=12=]';
            len = 0;
            pos++;
        } else {
            len++;
        }
        *str++;
    }
    // Is not needed to reallocate additional memory if no separator character was found
    if (pos > 0) arr = realloc(arr, sizeof(char **) * pos + 1);
    // The last chunk of characters after the last separator character is properly allocated
    arr[pos] = malloc(sizeof(char *) * len + 1);
    memcpy(arr[pos], (str - len), len);
    // To prevent undefined behavior while iterating over the pointer
    arr[++pos] = NULL;

    return arr;
}

void strsep_free_(char **arr) {
    char **aux = arr;
    while (*arr) {
        free(*arr);
        *arr = NULL;
        arr++;
    }
    // One more time to fully deallocate the null-terminated pointer
    free(*arr);
    *arr = NULL;
    arr++;
    // Clearing The pointer itself 
    free(aux);
    aux = NULL;
}

int main(void) {
    char **s = strsep_("Lorem ipsum four words", ' ');
    char **i = s;
    while (*i != NULL) {
        printf("-> %s\n", *i);
        i++;
    }
    strsep_free_(s);
}

崩溃的可能原因很可能是:realloc(arr, sizeof(char **) * pos + 1)

这与 realloc(arr, (sizeof(char **) * pos) + 1) 相同,它没有为您的 "array" 分配足够的 space。你需要做 realloc(arr, sizeof(char **) * (pos + 1)).

arr[pos] 的分配相同,您也需要在那里正确使用括号。

您的程序有未定义的行为,这意味着它可能会以意想不到的方式运行,但也可能偶然地按预期运行。添加额外的 printf 以一种似乎 纠正 错误的方式改变了行为,但这只是巧合。在不同的机器上,甚至在不同时间在同一台机器上,行为可能会再次改变。

您的程序中存在多个导致未定义行为的错误:

  • 您没有分配适当大小的数组:它应该有 space fpr pos + 1 指针,因此 sizeof(char **) * (pos + 1)。错误的语句是:char **arr = (char**)malloc(sizeof(char **) + 1);arr = realloc(arr, sizeof(char **) * pos + 1);.

  • 此外,为每个子串分配的space也是不正确的:arr[pos] = malloc(sizeof(char *) * len + 1);应该读作arr[pos] = malloc(sizeof(char) * len + 1);,根据定义是arr[pos] = malloc(len + 1);。这不会导致未定义的行为,您只是分配了太多内存。如果您的系统支持,分配和复制可以在对 strndup(str - len, len).

  • 的一次调用中合并
  • 你从不检查内存分配失败,在内存分配失败的情况下导致未定义的行为。

  • lenpos使用uint8_t是有风险的:如果子串的数量超过255怎么办? poslen 会默默地回绕到 0,产生意外的结果和内存泄漏。使用这么小的类型没有任何优势,请改用 intsize_t

这是更正后的版本:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char **strsep_(const char *str, char ch) {
    // Sub-string length
    int len = 0;
    // The number of sub-strings found, index where to store the NULL at the end of the array.
    int pos = 0;
    // return value: array of pointers to substrings with an extra slot for a NULL terminator.
    char **arr = (char**)malloc(sizeof(*arr) * (pos + 1));
    if (arr == NULL)
        return NULL;
    for (;;) {
        if (*str == ch || *str == '[=10=]') {
            // alocate the substring and reallocate the array
            char *p = malloc(len + 1);
            char **new_arr = realloc(arr, sizeof(*arr) * (pos + 2));
            if (new_arr == NULL || p == NULL) {
                // allocation failure: free the memory allocated so far
                free(p);
                if (new_arr)
                    arr = new_arr;
                while (pos-- > 0)
                    free(arr[pos]);
                free(arr);
                return NULL;
            }
            arr = new_arr;
            memcpy(p, str - len, len);
            p[len] = '[=10=]';
            arr[pos] = p;
            pos++;
            len = 0;
            if (*str == '[=10=]')
                break;
        } else {
            len++;
        }
        str++;
    }
    arr[pos] = NULL;
    return arr;
}

void strsep_free_(char **arr) {
    int i;
    // Free the array elements 
    for (i = 0; arr[i] != NULL; i++) {
        free(arr[i]);
        arr[i] = NULL;  // extra safety, not really needed
    }
    // Free The array itself 
    free(arr);
}

int main(void) {
    char **s = strsep_("Lorem ipsum four words", ' ');
    int i;
    for (i = 0; s[i] != NULL; i++) {
        printf("-> %s\n", s[i]);
    }
    strsep_free_(s);
    return 0;
}

输出:

-> Lorem
-> ipsum
-> four
-> words

@chqrlie 的回答很好。就我而言,我认为在复制之前计算所有内容会更好,这应该有助于避免重新分配。

#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>

int count_chars(const char *str, const char ch)
{
    int i;
    int count;

    i = 0;
    count = 0;
    if (*str == ch)
        str++;

    while (str[i] != ch && str[i] != '[=10=]')
    {
        count++;
        i++;
    }

    return (count);
}

int count_delimeter(const char *str, const char ch)
{
    int i = 0;
    int count = 0;

    while (str[i])
    {
        if (str[i] == ch && str[i + 1] != ch)
            count++;
        i++;
    }

    return count;
}

char** strsep_(const char *str, const char ch)
{
    char **arr;
    int index = 0;
    int size = 0;
    int i = 0;

    size = count_delimeter(str, ch) + 1;

    if ((arr = malloc(sizeof(char *) * (size + 1))) == NULL)
        return (NULL);
    arr[size] = NULL;

    while (i < size)
    {
        if (str[index] == ch)
            index++;

        if (str[index] && str[index] == ch && str[index + 1] == ch)
        {
            while (str[index] && str[index] == ch && str[index + 1] == ch)
                index++;
            index++;
        }

        int len = count_chars(&str[index], ch);
        if ((arr[i] = malloc(sizeof(char) * (len + 1))) == NULL)
            return NULL;

        memcpy(arr[i], &str[index], len);
        index += len;
        arr[i++][len] = '[=10=]';
    }

    return arr;
}

int main(void)
{
    char *str = "Lorem   ipsum  ipsum Lorem lipsum gorem insum";
    char **s = strsep_(str, ' ');
    /* char *str = "Lorem + Ipsum"; */
    /* char **s = strsep_(str, '+'); */
    /* char *str = "lorem, torem, horem, lorem"; */
    /* char **s = strsep_(str, ','); */
    while (*s != NULL) {
        printf("-> [%s]\n", *s);
        s++;
    }

    /* dont forget to free */
    return 0;
}