如何有效地在 C 中预先附加一个字符串数组?

How to efficiently pre-append an array of strings in C?

我有一个用 C 语言编写的小型框架,允许用户使用 C 语言创建动态分配的字符串数组。该库有一个名为 init_string_vector 的函数允许用户创建一个动态分配的字符串数组,另一个函数 append_string_vector 允许用户一次向数组追加一个字符串。此外,我还创建了另一个名为 preappend_string_vector 的函数,它允许用户一次在数组中预先附加一个字符串。但是,preappend_string_vector 函数要求数组以迭代方式一次重新创建一个值,这可能很耗时。我曾尝试使用 memmove 和 memcpy 函数来更快地执行此操作,但到目前为止没有任何效果。谁能推荐一种在时域上更快的解决方案?解决方法如下图

vector.h

typedef enum
{
    FLOAT,
    DOUBLE,
    CHAR,
    INT,
    STRING
} dat_type;
// --------------------------------------------------------------------------------

typedef struct
{
    char **array;
    size_t len;
    int elem;
    dat_type dat;
} StringVector;
// --------------------------------------------------------------------------------

int string_vector_mem_alloc(StringVector *array, size_t num_indices);
// --------------------------------------------------------------------------------

StringVector init_string_vector();
// --------------------------------------------------------------------------------

int append_string_vector(StringVector *s, char *value);
// --------------------------------------------------------------------------------

int preappend_string_vector(StringVector *s, char *value);
// --------------------------------------------------------------------------------

vector.c

int string_vector_mem_alloc(StringVector *array, size_t num_indices) {
    // Determine the total memory allocation and assign to pointer
    void *pointer;
    pointer = malloc(num_indices * array->elem);

    // If memory is full fail gracefully
    if (pointer == NULL) {
        printf("Unable to allocate memory, exiting.\n");
        free(pointer);
        return 0;
    }
    // Allocate resources and instantiate Array
    else {
        array->array = pointer;
        array->len = 0;
        return 1;
    }
}
// --------------------------------------------------------------------------------

StringVector init_string_vector() {
    StringVector array;
    array.dat = STRING;
    array.elem = sizeof(char);
    string_vector_mem_alloc(&array, array.elem);
    return array;
}
// --------------------------------------------------------------------------------

int append_string_vector(StringVector *s, char *value) {
    value = strdup(value);
    if (!value) {
        return -1;
    }
    s->len++;
    char **resized = realloc(s->array, sizeof(char *)*s->len);
    if (!resized) {
        free(value);
        return -1;
    }
    resized[s->len-1] = value;
    s->array = resized;
    return 0;
}
// --------------------------------------------------------------------------------

int preappend_string_vector(StringVector *s, char *value) {
    if (!value) {
        return -1;
    }
    s->len++;
    char **resized = malloc(sizeof(char *)*s->len);
    //char **extra_resized = realloc(s->array, sizeof(char *)*s->len);
    if (!resized) {
        free(value);
        return -1;
    }
    resized[0] = value;
    for (int i = 1; i < s->len; i++) {
        resized[i] = s->array[i - 1];
    }
    s->array = resized;
    return 0;
}

main.c

#include <stdio.h>
#include "vector.h"

int main(int argc, const char * argv[]) {
    StringVector arr_test = init_string_vector();
    char value[] = "First Value";
    char new_value[] = "Second Value";
    append_string_vector(&arr_test, value);
    preappend_string_vector(&arr_test, new_value);
    printf("%s\n", arr_test.array[0]);
    // yields "Second Value"
}

一个元素添加到动态分配的数组中:

  • 为另一个元素分配空间(此内存从数组末尾开始扩展)
  • 将所有元素向数组末尾移动一位
  • 将新值放在第一个索引中
  • 增加存储的长度

在这个例子中,我复制了字符串输入,以匹配您的追加函数的行为。注意输入可以const限定。

请记住,memmove 允许源内存和目标内存重叠。

int prepend_string_vector(StringVector *s, const char *value) {
    char *str;

    if (!value || !(str = strdup(value)))
        return -1;

    void *mem = realloc(s->array, sizeof *s->array * (s->len + 1));

    if (!mem) {
        free(str);
        return -1;
    }

    s->array = mem;
    memmove(s->array + 1, s->array, sizeof *s->array * s->len);
    s->array[0] = str;
    s->len++;

    return 0;
}

这个操作是O(n)。如果您想要更快的前置,请考虑使用不同的数据结构(例如链表)。

这是我认为您应该做的事情的版本。结构中的数据类型char **适合字符串;它不适用于其他类型。字符串的正确大小是 sizeof(char *),而不是 sizeof(char)。 此代码通过跟踪分配的 space 数量和使用中的 space 数量来避免每次都重新分配数组。当它重新分配时,它大约使分配的 space 加倍。

它总是复制传入的字符串——你的代码不一致。函数 return 0 表示成功,-1 表示失败;您的代码不一致。不需要 inconsistently-named string_vector_mem_alloc() 函数。我可能 init_stringvector() 接受一个指向狭窄的指针,也许还有一个要预分配的大小。

/* vector.h */
#ifndef VECTOR_H_INCLUDED
#define VECTOR_H_INCLUDED

#include <stddef.h>     /* size_t */

typedef struct
{
    char **array;
    size_t len;     /* Number of elements in use */
    size_t max;     /* Number of elements allocated */
    size_t elem;    /* Size of each element */
} StringVector;

extern StringVector init_string_vector(void);

extern int append_string_vector(StringVector *s, const char *value);

extern int preappend_string_vector(StringVector *s, const char *value);

extern void destroy_string_vector(StringVector *s);

#endif /* VECTOR_H_INCLUDED */

/* vector.c */

//#include "vector.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

StringVector init_string_vector(void)
{
    StringVector array;
    array.len = 0;
    array.max = 0;
    array.elem = sizeof(char *);
    array.array = NULL;
    return array;
}

void destroy_string_vector(StringVector *s)
{
    if (s != NULL)
    {
        for (size_t i = 0; i < s->len; i++)
            free(s->array[i]);
        free(s->array);
        s->array = NULL;
        s->len = s->max = s->elem = 0;
    }
}

int append_string_vector(StringVector *s, const char *value)
{
    char *copy = strdup(value);
    if (copy == NULL)
        return -1;
    assert(s->len <= s->max);

    if (s->len == s->max)
    {
        size_t new_max = 2 * s->max + 2;
        void *new_ptr = realloc(s->array, new_max * s->elem);
        if (new_ptr == NULL)
        {
            free(copy);
            return -1;
        }
        s->max = new_max;
        s->array = new_ptr;
    }

    s->array[s->len++] = copy;
    return 0;
}

int preappend_string_vector(StringVector *s, const char *value)
{
    char *copy = strdup(value);
    if (!value)
        return -1;

    if (s->len == s->max)
    {
        size_t new_max = 2 * s->max + 2;
        void *new_ptr = realloc(s->array, new_max * s->elem);
        if (new_ptr == NULL)
        {
            free(copy);
            return -1;
        }
        s->max = new_max;
        s->array = new_ptr;
    }

    memmove(&s->array[1], &s->array[0], s->len * s->elem);
    s->len++;
    s->array[0] = copy;
    return 0;
}

static void dump_string_vector(const char *tag, const StringVector * s)
{
    printf("%s (%p):\n", tag, (void *)s);
    if (s != NULL)
    {
        printf("  len = %zu, max = %zu, elem = %zu\n", s->len, s->max, s->elem);
        for (size_t i = 0; i < s->len; i++)
            printf("  %zu [%s]\n", i, s->array[i]);
    }
}

/* main.c */

// #include <stdio.h>
// #include "vector.h"

int main(void)
{
    StringVector arr_test = init_string_vector();
    dump_string_vector("After initialization", &arr_test);
    char value[] = "First Value - appended";
    char new_value[] = "Second Value - prepended";
    append_string_vector(&arr_test, value);
    dump_string_vector("After append", &arr_test);
    printf("%s\n", arr_test.array[0]);
    preappend_string_vector(&arr_test, new_value);
    dump_string_vector("After prepend", &arr_test);
    printf("%s\n", arr_test.array[0]);
    printf("%s\n", arr_test.array[1]);

    size_t counter = 3;
    for (size_t i = 0; i < 4; i++)
    {
        char buffer[32];
        snprintf(buffer, sizeof(buffer), "%zu: appended", counter++);
        append_string_vector(&arr_test, buffer);
        snprintf(buffer, sizeof(buffer), "%zu: prepended", counter++);
        preappend_string_vector(&arr_test, buffer);
    }
    dump_string_vector("After loop", &arr_test);

    destroy_string_vector(&arr_test);
    return 0;
}

示例输出:

After initialization (0x7ff7b2bc52b0):
  len = 0, max = 0, elem = 8
After append (0x7ff7b2bc52b0):
  len = 1, max = 2, elem = 8
  0 [First Value - appended]
First Value - appended
After prepend (0x7ff7b2bc52b0):
  len = 2, max = 2, elem = 8
  0 [Second Value - prepended]
  1 [First Value - appended]
Second Value - prepended
First Value - appended
After loop (0x7ff7b2bc52b0):
  len = 10, max = 14, elem = 8
  0 [10: prepended]
  1 [8: prepended]
  2 [6: prepended]
  3 [4: prepended]
  4 [Second Value - prepended]
  5 [First Value - appended]
  6 [3: appended]
  7 [5: appended]
  8 [7: appended]
  9 [9: appended]