C中这种狡猾的内存泄漏的原因是什么
What is the cause of this sly memory leak in C
我有一个函数,它接受一个只读字符串数组,复制它并将新的一个传递给另一个函数,该函数根据某些条件将一些字符串放入一个链表中,该链表是然后返回到最初的调用者。我需要同时复制数组和其中的字符串,因为必须保留原始数组,并且必须以某种方式修改字符串,然后它们才会出现在列表中。
结果总是正确的,但 valgrind 抱怨内存泄漏,我注意到它的发生取决于 选择要放入列表的字符串 的标准,或者如果数组包含重复的字符串(实际上,这是这两个因素的混合,我无法理解为什么它会这样)。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
const char *str;
struct node *next;
} NODE;
typedef NODE *LIST;
LIST bar(char **arr, size_t n)
{
LIST l = NULL;
for (size_t i = 0; i < n; i++) {
/* FOOTNOTE #1 */
if (i == 3 || i == 4) {
NODE *n = malloc(sizeof(NODE));
n->str = arr[i];
n->next = l;
l = n;
}
}
return l;
}
LIST foo(const char **arr, size_t n)
{
char **copy = malloc(sizeof(char *) * n);
for (size_t i = 0; i < n; i++) {
/* each original string is copied into a new one, which is modified in some way before
* being saved in the new array. Here the original strings are saved directly in the
* array because the code that alters them doesn't affect the memory leak
*/
// the malloc at this line is the one that valgrind doesn't like
copy[i] = malloc(strlen(arr[i]) + 1);
strcpy(copy[i], arr[i]);
}
LIST l = bar(copy, n);
// checks which strings haven't been put in the list and frees them
for (size_t i = 0; i < n; i++) {
NODE *n = l;
while (n != NULL && strcmp(copy[i], n->str) != 0) {
n = n->next;
}
if (n == NULL) {
free((char *) copy[i]);
}
}
// frees the array, at this point only the strings in the list should be still allocated
free(copy);
return l;
}
int main(void)
{
/* FOOTNOTE #2 */
const char *arr[] = {"amet", "sit", "dolor", "sit", "amet"};
LIST l = foo(arr, sizeof(arr) / sizeof(arr[0]));
// for every node in the list prints the string in it and frees both the string and the node
while (l != NULL) {
printf("%s", l->str);
if (l->next != NULL) {
printf("%s", ", ");
}
NODE *tmp = l;
l = l->next;
free((char *) tmp->str);
free(tmp);
}
free(l);
puts("");
}
脚注 #1:这是选择将哪些字符串放入列表的标准。如果我使用 i == 3 || i == 4
并且脚注 #2 中的数组包含重复的字符串,valgrind 会发现泄漏。如果我使用 i % 2 == 1
没问题,但如果数组不包含重复项,那么使用 i == 3 || i == 4
也没有问题。打败我了。
这是上述代码的 valgrind 输出(i == 3 || i == 4
有重复):
$ gcc prog.c -g -o prog
$ valgrind --tool=memcheck --leak-check=full ./prog
==12118== Memcheck, a memory error detector
==12118== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==12118== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==12118== Command: ./prog
==12118==
amet, sit
==12118==
==12118== HEAP SUMMARY:
==12118== in use at exit: 9 bytes in 2 blocks
==12118== total heap usage: 9 allocs, 7 frees, 1,120 bytes allocated
==12118==
==12118== 9 bytes in 2 blocks are definitely lost in loss record 1 of 1
==12118== at 0x483980B: malloc (vg_replace_malloc.c:309)
==12118== by 0x40128E: foo (test2.c:38)
==12118== by 0x4013E1: main (test2.c:66)
==12118==
==12118== LEAK SUMMARY:
==12118== definitely lost: 9 bytes in 2 blocks
==12118== indirectly lost: 0 bytes in 0 blocks
==12118== possibly lost: 0 bytes in 0 blocks
==12118== still reachable: 0 bytes in 0 blocks
==12118== suppressed: 0 bytes in 0 blocks
==12118==
==12118== For lists of detected and suppressed errors, rerun with: -s
==12118== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
感谢 Avi Berger 提供的提示,找到了罪魁祸首。鉴于字符串是动态分配的,在 foo()
末尾 free
它们的正确方法是检查数组中的地址是否等于列表中的地址:
while (n != NULL && copy[i] != n->str) {
n = n->next;
}
那是因为如果列表中的字符串在数组中有重复项,像我之前做的那样比较 "by value" 和 strcmp
将留下所有重复项,这就是内存所在的位置泄漏来自。
我有一个函数,它接受一个只读字符串数组,复制它并将新的一个传递给另一个函数,该函数根据某些条件将一些字符串放入一个链表中,该链表是然后返回到最初的调用者。我需要同时复制数组和其中的字符串,因为必须保留原始数组,并且必须以某种方式修改字符串,然后它们才会出现在列表中。
结果总是正确的,但 valgrind 抱怨内存泄漏,我注意到它的发生取决于 选择要放入列表的字符串 的标准,或者如果数组包含重复的字符串(实际上,这是这两个因素的混合,我无法理解为什么它会这样)。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
const char *str;
struct node *next;
} NODE;
typedef NODE *LIST;
LIST bar(char **arr, size_t n)
{
LIST l = NULL;
for (size_t i = 0; i < n; i++) {
/* FOOTNOTE #1 */
if (i == 3 || i == 4) {
NODE *n = malloc(sizeof(NODE));
n->str = arr[i];
n->next = l;
l = n;
}
}
return l;
}
LIST foo(const char **arr, size_t n)
{
char **copy = malloc(sizeof(char *) * n);
for (size_t i = 0; i < n; i++) {
/* each original string is copied into a new one, which is modified in some way before
* being saved in the new array. Here the original strings are saved directly in the
* array because the code that alters them doesn't affect the memory leak
*/
// the malloc at this line is the one that valgrind doesn't like
copy[i] = malloc(strlen(arr[i]) + 1);
strcpy(copy[i], arr[i]);
}
LIST l = bar(copy, n);
// checks which strings haven't been put in the list and frees them
for (size_t i = 0; i < n; i++) {
NODE *n = l;
while (n != NULL && strcmp(copy[i], n->str) != 0) {
n = n->next;
}
if (n == NULL) {
free((char *) copy[i]);
}
}
// frees the array, at this point only the strings in the list should be still allocated
free(copy);
return l;
}
int main(void)
{
/* FOOTNOTE #2 */
const char *arr[] = {"amet", "sit", "dolor", "sit", "amet"};
LIST l = foo(arr, sizeof(arr) / sizeof(arr[0]));
// for every node in the list prints the string in it and frees both the string and the node
while (l != NULL) {
printf("%s", l->str);
if (l->next != NULL) {
printf("%s", ", ");
}
NODE *tmp = l;
l = l->next;
free((char *) tmp->str);
free(tmp);
}
free(l);
puts("");
}
脚注 #1:这是选择将哪些字符串放入列表的标准。如果我使用 i == 3 || i == 4
并且脚注 #2 中的数组包含重复的字符串,valgrind 会发现泄漏。如果我使用 i % 2 == 1
没问题,但如果数组不包含重复项,那么使用 i == 3 || i == 4
也没有问题。打败我了。
这是上述代码的 valgrind 输出(i == 3 || i == 4
有重复):
$ gcc prog.c -g -o prog
$ valgrind --tool=memcheck --leak-check=full ./prog
==12118== Memcheck, a memory error detector
==12118== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==12118== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==12118== Command: ./prog
==12118==
amet, sit
==12118==
==12118== HEAP SUMMARY:
==12118== in use at exit: 9 bytes in 2 blocks
==12118== total heap usage: 9 allocs, 7 frees, 1,120 bytes allocated
==12118==
==12118== 9 bytes in 2 blocks are definitely lost in loss record 1 of 1
==12118== at 0x483980B: malloc (vg_replace_malloc.c:309)
==12118== by 0x40128E: foo (test2.c:38)
==12118== by 0x4013E1: main (test2.c:66)
==12118==
==12118== LEAK SUMMARY:
==12118== definitely lost: 9 bytes in 2 blocks
==12118== indirectly lost: 0 bytes in 0 blocks
==12118== possibly lost: 0 bytes in 0 blocks
==12118== still reachable: 0 bytes in 0 blocks
==12118== suppressed: 0 bytes in 0 blocks
==12118==
==12118== For lists of detected and suppressed errors, rerun with: -s
==12118== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
感谢 Avi Berger 提供的提示,找到了罪魁祸首。鉴于字符串是动态分配的,在 foo()
末尾 free
它们的正确方法是检查数组中的地址是否等于列表中的地址:
while (n != NULL && copy[i] != n->str) {
n = n->next;
}
那是因为如果列表中的字符串在数组中有重复项,像我之前做的那样比较 "by value" 和 strcmp
将留下所有重复项,这就是内存所在的位置泄漏来自。