realloc() use-after-free address sanitizer 递归问题

realloc() use-after-free address sanitizer issue with recursion

写了一个中序遍历二叉树的小程序,想练习一下realloc,写了下面的代码,用二叉查找树的元素动态填充数组:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>

struct TreeNode {
    int val;
    struct TreeNode *left;
    struct TreeNode *right;
};
 
void inorder(struct TreeNode *root, int *inorder_arr, int *size) {
    if (root) {
        inorder(root->left, inorder_arr, size);
        inorder_arr[(*size)-1] = root->val;
        (*size)++;
        inorder_arr = (int *)realloc(inorder_arr, (*size) * sizeof(int));
        inorder(root->right, inorder_arr, size);
    }
}

bool findTarget(struct TreeNode *root, int k) {
    if (!root) return false;
    int size =1;
    
    int *inorder_arr = (int *)malloc(sizeof(int));
    inorder(root, inorder_arr, &size);
    return false;
}

int main () {
    // this is for creating a binary search tree which its inorder traversal looks like this : 2,3,4,5,6,7
    struct TreeNode *root = (struct TreeNode *)malloc(sizeof(struct TreeNode));
    root->val = 5;
    root->left = (struct TreeNode *)calloc(1, sizeof(struct TreeNode));
    root->left->val = 3;
    root->right = (struct TreeNode *)calloc(1, sizeof(struct TreeNode));
    root->right->val = 6;
    root->right->right = (struct TreeNode *)calloc(1, sizeof(struct TreeNode));
    root->right->right->val = 7;
    root->left->right = (struct TreeNode *)calloc(1, sizeof(struct TreeNode));
    root->left->right->val = 4;
    root->left->left = (struct TreeNode *)calloc(1, sizeof(struct TreeNode));
    root->left->left->val = 2;
    printf("%d", FindTarget(root, 9));
}

~请忽略我的函数FindTarget总是returnfalse,这不是这个程序的全部目的,也不是目的我的问题。

另外,注意我的二叉搜索树如下所示可能很有用: binary search tree.png

~我的程序似乎可以运行,但是在使用地址清理器编译它时我遇到了一个大问题:

gcc -ggdb 2_sum_bst.c -fsanitize=address

当我 运行 我的程序“a.out”时,我得到以下错误:

=================================================================
==13399==ERROR: AddressSanitizer: heap-use-after-free on address 0x602000000014 at pc 0x55c7a8ef5d77 bp 0x7ffebafbf0c0 sp 0x7ffebafbf0b0
WRITE of size 4 at 0x602000000014 thread T0
    #0 0x55c7a8ef5d76 in inorder /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:15
    #1 0x55c7a8ef5cb8 in inorder /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:14
    #2 0x55c7a8ef5ef3 in findTarget /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:28
    #3 0x55c7a8ef628b in main /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:45
    #4 0x7f1186741b96 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21b96)
    #5 0x55c7a8ef5b79 in _start (/home/yarin/my_dev/C_C++_learning/basics/a.out+0xb79)

0x602000000014 is located 0 bytes to the right of 4-byte region [0x602000000010,0x602000000014)
freed by thread T0 here:
    #0 0x7f1186beff30 in realloc (/usr/lib/x86_64-linux-gnu/libasan.so.4+0xdef30)
    #1 0x55c7a8ef5da6 in inorder /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:17
    #2 0x55c7a8ef5cb8 in inorder /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:14
    #3 0x55c7a8ef5cb8 in inorder /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:14
    #4 0x55c7a8ef5ef3 in findTarget /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:28
    #5 0x55c7a8ef628b in main /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:45
    #6 0x7f1186741b96 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21b96)

previously allocated by thread T0 here:
    #0 0x7f1186befb40 in __interceptor_malloc (/usr/lib/x86_64-linux-gnu/libasan.so.4+0xdeb40)
    #1 0x55c7a8ef5ecf in findTarget /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:27
    #2 0x55c7a8ef628b in main /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:45
    #3 0x7f1186741b96 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21b96)

SUMMARY: AddressSanitizer: heap-use-after-free /home/yarin/my_dev/C_C++_learning/basics/2_sum_bst.c:15 in inorder
Shadow bytes around the buggy address:
  0x0c047fff7fb0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x0c047fff7fc0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x0c047fff7fd0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x0c047fff7fe0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x0c047fff7ff0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
=>0x0c047fff8000: fa fa[fd]fa fa fa 00 fa fa fa fa fa fa fa fa fa
  0x0c047fff8010: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c047fff8020: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c047fff8030: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c047fff8040: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x0c047fff8050: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==13399==ABORTING

~ 正如我们所见,我得到了 heap-use-after-free 我很好奇并开始调试程序

~ 我开始调试程序并意识到我的错误,当我的函数“inorder”进行递归调用时它开始回溯指向我的数组的指针在调用堆栈中的每次调用都是唯一的那个特定的调用,但我使用了 realloc 并且 realloc 释放内存并分配新内存并且 returns 指针指向该调用中的那个指针而不是调用堆栈中的所有指针,运行解决这个问题这有点复杂我想我怎么能在不使用全局指针的情况下解决这个问题,我考虑过使用静态指针但是也没有用..

到目前为止,我想不出不使用全局指针如何解决这个问题,我想听听建议,因为我是 C 编程的初学者并且想知道这种情况的最佳解决方案是不使用全局指针或不遍历整棵树获取其大小然后使用 malloc 为所有节点获取足够的内存,正如您在这里看到的我的目的是在我的程序遍历树时这样做.

提前感谢您的阅读和帮助!

问题的评论里说的很清楚,主要问题 是因为在发布代码的 inorder() 函数中, 原始计数器(findTarget() 中的 size)实际上已更新,但 指向动态分配存储开始的原始指针 (inorder_arr in findTarget()) 不是。

下面的代码旨在以更简单的方式重现相同的问题 上下文(简单地附加一个整数,没有任何递归)。

extend_dyn_array_bad()功能类似于inorder() 函数,因为它的参数使它能够改变 计数器但不是指向动态开始的指针 分配的存储空间。

另一方面,extend_dyn_array()函数接收 按地址的指针参数以与计数器相同的方式 参数:

  • an int,以便通过地址传递给函数 可能会改变它,需要一个 int * 参数,
  • an int *,以便通过地址传递给函数 可能会改变它,需要一个 int ** 参数。 (我们只需将 * 添加到原始上下文中的类型即可)。

处理单个变量的多级间接寻址 不是很容易阅读,这就是为什么我建议摆脱 进入时尽快使用这种额外的间接级别 函数,然后在这些本地执行所有实际工作 变量,完成后改变原始参数 多亏了这种额外的间接级别(见三个评论 extend_dyn_array() 函数中的步骤。

另请注意,问题的findTarget()函数依赖于 永远不会释放的数组的动态分配 (free()).

/**
  gcc -std=c99 -o prog_c prog_c.c \
      -pedantic -Wall -Wextra -Wconversion \
      -Wwrite-strings -Wold-style-definition -Wvla \
      -g -O0 -UNDEBUG -fsanitize=address,undefined
**/

#include <stdio.h>
#include <stdlib.h>

void
extend_dyn_array_bad(int *values,
                     int *count)
{
  ++(*count);
  values=realloc(values, sizeof(*values)*(size_t)(*count));
  if(values==NULL)
  {
    abort();
  }
  // after realloc, values may point to a different address
  // which is known and usable by this function but that
  // is not know by the v variable in main().
  values[*count-1]=*count; // initialise last element
}

void
extend_dyn_array(int **inout_values,
                 int *inout_count)
{
  //-- load inout-parameters --
  int *values=*inout_values;
  int count=*inout_count;
  //-- perform actual work --
  ++count;
  values=realloc(values, sizeof(*values)*(size_t)count);
  if(values==NULL)
  {
    abort();
  }
  values[count-1]=count; // initialise last element
  //-- store out-parameters --
  *inout_values=values;
  *inout_count=count;
}

int
main(void)
{
  int *v=NULL;
  int n=0;
  for(int i=0; i<10000; ++i)
  {
    extend_dyn_array(&v, &n); // both v and n may be altered
    // extend_dyn_array_bad(v, &n); // only n may be altered
  }
  for(int i=0;   i<5; ++i) { printf("%d ", v[i]); } printf("... ");
  for(int i=n-5; i<n; ++i) { printf("%d ", v[i]); } printf("\n");
  free(v);
  return 0;
}