如何将 C 字符串数组缩减为唯一值
How to reduce a C array of strings to unique values
我正在研究动态分配数组的基本框架。在本例中,它是一个字符串数组。我正在尝试创建一个函数来从数组中删除所有 non-unique 字符串值并使用 Google 测试对其进行测试。当我测试标题为 unique_string_vec
的函数时,出现以下错误。然而,公平地说,代码似乎工作正常,编译器本身并没有抛出错误。相反,google 测试似乎抛出了一个错误。我正在尝试确定这是否是 google 测试的问题,或者我是否确实存在需要修复的内存管理问题。如有任何想法或建议,我们将不胜感激。
unit_tests: malloc.c:2617: sysmalloc: Assertion (old_top == initial_top (av) && old_size == 0)
我将在下面发布框架的相关部分,如果有任何关于如何解决该问题的想法,我将不胜感激。
vector.h
#ifndef ARRAY_H
#define ARRAY_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
typedef enum
{
FLOAT,
DOUBLE,
CHAR,
INT,
STRING
} dat_type;
// --------------------------------------------------------------------------------
typedef struct
{
char **array;
size_t len;
int elem;
dat_type dat;
} StringVector;
// --------------------------------------------------------------------------------
int string_vector_mem_alloc(StringVector *array, size_t num_indices);
// --------------------------------------------------------------------------------
StringVector init_string_vector();
// --------------------------------------------------------------------------------
int append_string_vector(StringVector *s, char *value);
// --------------------------------------------------------------------------------
void pop_string_vector(StringVector *array, int index);
// --------------------------------------------------------------------------------
void unique_string_vec(StringVector *array);
// --------------------------------------------------------------------------------
void free_string_array(StringVector *array);
// --------------------------------------------------------------------------------
#endif /* ARRAY_H */
array.c
int string_vector_mem_alloc(StringVector *array, size_t num_indices) {
// Determine the total memory allocation and assign to pointer
void *pointer;
pointer = malloc(num_indices * array->elem);
// If memory is full fail gracefully
if (pointer == NULL) {
printf("Unable to allocate memory, exiting.\n");
free(pointer);
return 0;
}
// Allocate resources and instantiate Array
else {
array->array = pointer;
array->len = 0;
return 1;
}
}
// --------------------------------------------------------------------------------
StringVector init_string_vector() {
StringVector array;
array.dat = STRING;
array.elem = sizeof(char);
string_vector_mem_alloc(&array, array.elem);
return array;
}
// --------------------------------------------------------------------------------
int append_string_vector(StringVector *array, char *value) {
value = strdup(value);
if (!value) {
return -1;
}
array->len++;
char **resized = realloc(array->array, sizeof(char *)*array->len + 1);
if (!resized) {
free(value);
return -1;
}
resized[array->len-1] = value;
array->array = resized;
return 0;
}
// --------------------------------------------------------------------------------
void pop_string_vector(StringVector *array, int index) {
if (index >= array->len) {
printf("Index %d out of bounds for pop_string_vector\n", index);
}
unsigned char **dst = (unsigned char **)array->array + index * array->elem;
memmove(array->array + index, array->array + index + 1, \
sizeof *array->array * array->len -1);
array->len -= 1;
}
// --------------------------------------------------------------------------------
void unique_string_vec(StringVector *array) {
int repeat_status = 0;
for (int i = 0; ; i++) {
if (i >= array->len) break;
for (int j = i + 1; ; j++) {
if (j >= array->len) break;
if (strcmp(array->array[i], array->array[j]) == 0){
repeat_status = 1;
pop_string_vector(array, j);
}
}
if (repeat_status == 1) {
pop_string_vector(array, i);
repeat_status = 0;
i -= 1;
}
}
}
// --------------------------------------------------------------------------------
void free_string_array(StringVector *array) {
if (array != NULL) {
for (int i = 0; i < array->len; i++) {
free(array->array[i]);
}
}
free(array->array);
// Reset all variables in the struct
array->array = NULL;
array->len = 0;
array->elem = 0;
}
test_vector.cpp
#include <gtest/gtest.h>
extern "C" {
#include "vector.h"
}
TEST(string, unique_string_vec) {
StringVector arr_test = init_string_vector();
char one[] = "Hello";
char two[] = "World";
char three[] = "Hello";
char four[] = "Goodbye";
append_string_vector(&arr_test, one);
append_string_vector(&arr_test, two);
append_string_vector(&arr_test, three);
append_string_vector(&arr_test, four);
// - Even though the code works correctly, I get an error thrown
// by google test when I apply this function
unique_string_vec(&arr_test);
// These produce the correct value
printf("%s\n", arr_test.array[0]);
printf("%s\n", arr_test.array[1]);
printf("%d\n", arr_test.len);
// EXPECT_EQ(arr_test.len, 2);
// int result1 = strcmp(two, arr_test.array[0]);
// int result2 = strcmp(four, arr_test.array[1]);
// EXPECT_EQ(result1, 0);
// EXPECT_EQ(result2, 0);
}
malloc.c:2617: sysmalloc: Assertion (old_top == initial_top (av) && old_size == 0)
这个错误意味着:你有损坏的堆。堆损坏示例:
- 写入已分配缓冲区的末尾,
free()
未分配的内存,
free()
两次分配一些内存,等等
通过检查很难发现堆损坏错误。幸运的是,有一些工具可以 直接 指出问题所在:Valgrind and Address Sanitizer.
以下是 Address Sanitizer 对您的程序的评价:
==148888==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x604000000031 at pc 0x7facb56f055e bp 0x7ffe2ddb46b0 sp 0x7ffe2ddb3e60
READ of size 31 at 0x604000000031 thread T0
#0 0x7facb56f055d in __interceptor_memmove ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:810
#1 0x5621653f67f3 in pop_string_vector /tmp/array.c:54
#2 0x5621653f695a in unique_string_vec /tmp/array.c:68
#3 0x5621653f6e41 in main /tmp/array.c:106
#4 0x7facb55017fc in __libc_start_main ../csu/libc-start.c:332
#5 0x5621653f61a9 in _start (/tmp/a.out+0x11a9)
0x604000000031 is located 0 bytes to the right of 33-byte region [0x604000000010,0x604000000031)
allocated by thread T0 here:
#0 0x7facb5765b48 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164
#1 0x5621653f65fe in append_string_vector /tmp/array.c:38
#2 0x5621653f6e35 in main /tmp/array.c:103
#3 0x7facb55017fc in __libc_start_main ../csu/libc-start.c:332
SUMMARY: AddressSanitizer: heap-buffer-overflow ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:810 in __interceptor_memmove
以上错误不是损坏的原因(越界读取内存不会导致损坏),但它表明您没有正确使用堆。
修复第一个错误后,下一个(可能)将成为崩溃的根本原因。
事实证明,在本例中,问题出在 init_string_vector
函数中。标题为 array.elem
的变量被实例化为 sizeof(char)
,应该是 sizeof(char *)
。这是一个简单的错误,但却导致了内存分配错误。
我正在研究动态分配数组的基本框架。在本例中,它是一个字符串数组。我正在尝试创建一个函数来从数组中删除所有 non-unique 字符串值并使用 Google 测试对其进行测试。当我测试标题为 unique_string_vec
的函数时,出现以下错误。然而,公平地说,代码似乎工作正常,编译器本身并没有抛出错误。相反,google 测试似乎抛出了一个错误。我正在尝试确定这是否是 google 测试的问题,或者我是否确实存在需要修复的内存管理问题。如有任何想法或建议,我们将不胜感激。
unit_tests: malloc.c:2617: sysmalloc: Assertion (old_top == initial_top (av) && old_size == 0)
我将在下面发布框架的相关部分,如果有任何关于如何解决该问题的想法,我将不胜感激。
vector.h
#ifndef ARRAY_H
#define ARRAY_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
typedef enum
{
FLOAT,
DOUBLE,
CHAR,
INT,
STRING
} dat_type;
// --------------------------------------------------------------------------------
typedef struct
{
char **array;
size_t len;
int elem;
dat_type dat;
} StringVector;
// --------------------------------------------------------------------------------
int string_vector_mem_alloc(StringVector *array, size_t num_indices);
// --------------------------------------------------------------------------------
StringVector init_string_vector();
// --------------------------------------------------------------------------------
int append_string_vector(StringVector *s, char *value);
// --------------------------------------------------------------------------------
void pop_string_vector(StringVector *array, int index);
// --------------------------------------------------------------------------------
void unique_string_vec(StringVector *array);
// --------------------------------------------------------------------------------
void free_string_array(StringVector *array);
// --------------------------------------------------------------------------------
#endif /* ARRAY_H */
array.c
int string_vector_mem_alloc(StringVector *array, size_t num_indices) {
// Determine the total memory allocation and assign to pointer
void *pointer;
pointer = malloc(num_indices * array->elem);
// If memory is full fail gracefully
if (pointer == NULL) {
printf("Unable to allocate memory, exiting.\n");
free(pointer);
return 0;
}
// Allocate resources and instantiate Array
else {
array->array = pointer;
array->len = 0;
return 1;
}
}
// --------------------------------------------------------------------------------
StringVector init_string_vector() {
StringVector array;
array.dat = STRING;
array.elem = sizeof(char);
string_vector_mem_alloc(&array, array.elem);
return array;
}
// --------------------------------------------------------------------------------
int append_string_vector(StringVector *array, char *value) {
value = strdup(value);
if (!value) {
return -1;
}
array->len++;
char **resized = realloc(array->array, sizeof(char *)*array->len + 1);
if (!resized) {
free(value);
return -1;
}
resized[array->len-1] = value;
array->array = resized;
return 0;
}
// --------------------------------------------------------------------------------
void pop_string_vector(StringVector *array, int index) {
if (index >= array->len) {
printf("Index %d out of bounds for pop_string_vector\n", index);
}
unsigned char **dst = (unsigned char **)array->array + index * array->elem;
memmove(array->array + index, array->array + index + 1, \
sizeof *array->array * array->len -1);
array->len -= 1;
}
// --------------------------------------------------------------------------------
void unique_string_vec(StringVector *array) {
int repeat_status = 0;
for (int i = 0; ; i++) {
if (i >= array->len) break;
for (int j = i + 1; ; j++) {
if (j >= array->len) break;
if (strcmp(array->array[i], array->array[j]) == 0){
repeat_status = 1;
pop_string_vector(array, j);
}
}
if (repeat_status == 1) {
pop_string_vector(array, i);
repeat_status = 0;
i -= 1;
}
}
}
// --------------------------------------------------------------------------------
void free_string_array(StringVector *array) {
if (array != NULL) {
for (int i = 0; i < array->len; i++) {
free(array->array[i]);
}
}
free(array->array);
// Reset all variables in the struct
array->array = NULL;
array->len = 0;
array->elem = 0;
}
test_vector.cpp
#include <gtest/gtest.h>
extern "C" {
#include "vector.h"
}
TEST(string, unique_string_vec) {
StringVector arr_test = init_string_vector();
char one[] = "Hello";
char two[] = "World";
char three[] = "Hello";
char four[] = "Goodbye";
append_string_vector(&arr_test, one);
append_string_vector(&arr_test, two);
append_string_vector(&arr_test, three);
append_string_vector(&arr_test, four);
// - Even though the code works correctly, I get an error thrown
// by google test when I apply this function
unique_string_vec(&arr_test);
// These produce the correct value
printf("%s\n", arr_test.array[0]);
printf("%s\n", arr_test.array[1]);
printf("%d\n", arr_test.len);
// EXPECT_EQ(arr_test.len, 2);
// int result1 = strcmp(two, arr_test.array[0]);
// int result2 = strcmp(four, arr_test.array[1]);
// EXPECT_EQ(result1, 0);
// EXPECT_EQ(result2, 0);
}
malloc.c:2617: sysmalloc: Assertion (old_top == initial_top (av) && old_size == 0)
这个错误意味着:你有损坏的堆。堆损坏示例:
- 写入已分配缓冲区的末尾,
free()
未分配的内存,free()
两次分配一些内存,等等
通过检查很难发现堆损坏错误。幸运的是,有一些工具可以 直接 指出问题所在:Valgrind and Address Sanitizer.
以下是 Address Sanitizer 对您的程序的评价:
==148888==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x604000000031 at pc 0x7facb56f055e bp 0x7ffe2ddb46b0 sp 0x7ffe2ddb3e60
READ of size 31 at 0x604000000031 thread T0
#0 0x7facb56f055d in __interceptor_memmove ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:810
#1 0x5621653f67f3 in pop_string_vector /tmp/array.c:54
#2 0x5621653f695a in unique_string_vec /tmp/array.c:68
#3 0x5621653f6e41 in main /tmp/array.c:106
#4 0x7facb55017fc in __libc_start_main ../csu/libc-start.c:332
#5 0x5621653f61a9 in _start (/tmp/a.out+0x11a9)
0x604000000031 is located 0 bytes to the right of 33-byte region [0x604000000010,0x604000000031)
allocated by thread T0 here:
#0 0x7facb5765b48 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164
#1 0x5621653f65fe in append_string_vector /tmp/array.c:38
#2 0x5621653f6e35 in main /tmp/array.c:103
#3 0x7facb55017fc in __libc_start_main ../csu/libc-start.c:332
SUMMARY: AddressSanitizer: heap-buffer-overflow ../../../../src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc:810 in __interceptor_memmove
以上错误不是损坏的原因(越界读取内存不会导致损坏),但它表明您没有正确使用堆。
修复第一个错误后,下一个(可能)将成为崩溃的根本原因。
事实证明,在本例中,问题出在 init_string_vector
函数中。标题为 array.elem
的变量被实例化为 sizeof(char)
,应该是 sizeof(char *)
。这是一个简单的错误,但却导致了内存分配错误。