如何将 C 字符串数组缩减为唯一值

How to reduce a C array of strings to unique values

我正在研究动态分配数组的基本框架。在本例中,它是一个字符串数组。我正在尝试创建一个函数来从数组中删除所有 non-unique 字符串值并使用 Google 测试对其进行测试。当我测试标题为 unique_string_vec 的函数时,出现以下错误。然而,公平地说,代码似乎工作正常,编译器本身并没有抛出错误。相反,google 测试似乎抛出了一个错误。我正在尝试确定这是否是 google 测试的问题,或者我是否确实存在需要修复的内存管理问题。如有任何想法或建议,我们将不胜感激。

unit_tests: malloc.c:2617: sysmalloc: Assertion (old_top == initial_top (av) && old_size == 0)

我将在下面发布框架的相关部分,如果有任何关于如何解决该问题的想法,我将不胜感激。

vector.h

#ifndef ARRAY_H
#define ARRAY_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

typedef enum
{
    FLOAT,
    DOUBLE,
    CHAR,
    INT,
    STRING
} dat_type;
// --------------------------------------------------------------------------------

typedef struct
{
    char **array;
    size_t len;
    int elem;
    dat_type dat;
} StringVector;
// --------------------------------------------------------------------------------

int string_vector_mem_alloc(StringVector *array, size_t num_indices);
// --------------------------------------------------------------------------------

StringVector init_string_vector();
// --------------------------------------------------------------------------------

int append_string_vector(StringVector *s, char *value);
// --------------------------------------------------------------------------------

void pop_string_vector(StringVector *array, int index);
// --------------------------------------------------------------------------------

void unique_string_vec(StringVector *array);
// --------------------------------------------------------------------------------

void free_string_array(StringVector *array);
// --------------------------------------------------------------------------------
#endif /* ARRAY_H */

array.c

int string_vector_mem_alloc(StringVector *array, size_t num_indices) {
    // Determine the total memory allocation and assign to pointer
    void *pointer;
    pointer = malloc(num_indices * array->elem);

    // If memory is full fail gracefully
    if (pointer == NULL) {
        printf("Unable to allocate memory, exiting.\n");
        free(pointer);
        return 0;
    }
    // Allocate resources and instantiate Array
    else {
        array->array = pointer;
        array->len = 0;
        return 1;
    }
}
// --------------------------------------------------------------------------------

StringVector init_string_vector() {
    StringVector array;
    array.dat = STRING;
    array.elem = sizeof(char);
    string_vector_mem_alloc(&array, array.elem);
    return array;
}
// --------------------------------------------------------------------------------

int append_string_vector(StringVector *array, char *value) {
    value = strdup(value);
    if (!value) {
        return -1;
    }
    array->len++;
    char **resized = realloc(array->array, sizeof(char *)*array->len + 1);
    if (!resized) {
        free(value);
        return -1;
    }
    resized[array->len-1] = value;
    array->array = resized;
    return 0;
}
// --------------------------------------------------------------------------------

void pop_string_vector(StringVector *array, int index) {
    if (index >= array->len) {
        printf("Index %d out of bounds for pop_string_vector\n", index);
    }
    unsigned char **dst = (unsigned char **)array->array + index * array->elem;
    memmove(array->array + index, array->array + index + 1,  \
            sizeof *array->array * array->len -1);
    array->len -= 1;
}
// --------------------------------------------------------------------------------

void unique_string_vec(StringVector *array) {
    int repeat_status = 0;
    for (int i = 0; ; i++) {
        if (i >= array->len) break;
        for (int j = i + 1; ; j++) {
            if (j >= array->len) break;
            if (strcmp(array->array[i], array->array[j]) == 0){
                repeat_status = 1;
                pop_string_vector(array, j);
            }
        }
        if (repeat_status == 1) {
            pop_string_vector(array, i);
            repeat_status = 0;
            i -= 1;
        }
    }
}
// --------------------------------------------------------------------------------

void free_string_array(StringVector *array) {
    if (array != NULL) {
        for (int i = 0; i < array->len; i++) {
            free(array->array[i]);
        }
    }
    free(array->array);
    // Reset all variables in the struct
    array->array = NULL;
    array->len = 0;
    array->elem = 0;
}

test_vector.cpp

#include <gtest/gtest.h>

extern "C" {
#include "vector.h"
}

TEST(string, unique_string_vec) {
    StringVector arr_test = init_string_vector();
    char one[] = "Hello";
    char two[] = "World";
    char three[] = "Hello";
    char four[] = "Goodbye";
    append_string_vector(&arr_test, one);
    append_string_vector(&arr_test, two);
    append_string_vector(&arr_test, three);
    append_string_vector(&arr_test, four);
    // - Even though the code works correctly, I get an error thrown
    //   by google test when I apply this function
    unique_string_vec(&arr_test);
    // These produce the correct value
    printf("%s\n", arr_test.array[0]);
    printf("%s\n", arr_test.array[1]);
    printf("%d\n", arr_test.len);

//  EXPECT_EQ(arr_test.len, 2);
//  int result1 = strcmp(two, arr_test.array[0]);
//  int result2 = strcmp(four, arr_test.array[1]);

//  EXPECT_EQ(result1, 0);
//  EXPECT_EQ(result2, 0);
}

malloc.c:2617: sysmalloc: Assertion (old_top == initial_top (av) && old_size == 0)

这个错误意味着:你有损坏的堆。堆损坏示例:

  • 写入已分配缓冲区的末尾,
  • free()未分配的内存,
  • free()两次分配一些内存,等等

通过检查很难发现堆损坏错误。幸运的是,有一些工具可以 直接 指出问题所在:Valgrind and Address Sanitizer.

以下是 Address Sanitizer 对您的程序的评价:

==148888==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x604000000031 at pc 0x7facb56f055e bp 0x7ffe2ddb46b0 sp 0x7ffe2ddb3e60
READ of size 31 at 0x604000000031 thread T0
    #0 0x7facb56f055d in __interceptor_memmove ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:810
    #1 0x5621653f67f3 in pop_string_vector /tmp/array.c:54
    #2 0x5621653f695a in unique_string_vec /tmp/array.c:68
    #3 0x5621653f6e41 in main /tmp/array.c:106
    #4 0x7facb55017fc in __libc_start_main ../csu/libc-start.c:332
    #5 0x5621653f61a9 in _start (/tmp/a.out+0x11a9)

0x604000000031 is located 0 bytes to the right of 33-byte region [0x604000000010,0x604000000031)
allocated by thread T0 here:
    #0 0x7facb5765b48 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164
    #1 0x5621653f65fe in append_string_vector /tmp/array.c:38
    #2 0x5621653f6e35 in main /tmp/array.c:103
    #3 0x7facb55017fc in __libc_start_main ../csu/libc-start.c:332

SUMMARY: AddressSanitizer: heap-buffer-overflow ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:810 in __interceptor_memmove

以上错误不是损坏的原因(越界读取内存不会导致损坏),但它表明您没有正确使用堆。

修复第一个错误后,下一个(可能)将成为崩溃的根本原因。

事实证明,在本例中,问题出在 init_string_vector 函数中。标题为 array.elem 的变量被实例化为 sizeof(char),应该是 sizeof(char *)。这是一个简单的错误,但却导致了内存分配错误。