为双指针的顶层分配内存

Allocate memory for top level of a double pointer

几天前,我在社区里问了这个,很快就得到了小伙伴们的答复。该解决方案在 Valgrind 上也很棒。

并且根据his/her的建议,我为split_dataset编写了以下函数作为决策树算法的一部分。下面是我写的代码,

#include <stdio.h>
#include <stdlib.h>

void printArray(double array[], unsigned int size)
{
  for (unsigned int i = 0; i < size; ++i) {
    printf("%.3f  ", array[i]);
  }
  printf("\n");
}

typedef struct
{
    size_t length;
    double **designMatrix_Y;
} DecisionTreeData;

DecisionTreeData *split_dataset(int index,                    //var index
                                double value,                 //best cutoff
                                int row,                      //nrows of design matrix, number of variables
                                double **designMatrix_Y)      //design matrix (X) and response Y
{
    // Buffers to hold rows of data as we are distributing rows based on the split.
    double **leftDesignMatrix_Y        = calloc(row, sizeof *leftDesignMatrix_Y);
    double **rightDesignMatrix_Y       = calloc(row, sizeof *rightDesignMatrix_Y);

    size_t left_count = 0;
    size_t right_count = 0;

    for (size_t i = 0; i < row; ++i)
    {

        if (designMatrix_Y[index][i] <= value)
        {
            // Copy the row into the left half
            leftDesignMatrix_Y[left_count]    = designMatrix_Y[i];
            left_count++;

        }
        else
        {
            // Copy the row into the right half
            rightDesignMatrix_Y[right_count]    = designMatrix_Y[i];
            right_count++;

        }

        //realloc the memory for exact space
        leftDesignMatrix_Y  = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
        rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));

    }

    DecisionTreeData *data_split = malloc(sizeof(DecisionTreeData) * 2);

    data_split[0] = (DecisionTreeData){left_count, leftDesignMatrix_Y};
    data_split[1] = (DecisionTreeData){right_count, rightDesignMatrix_Y};

    return data_split;
}

double *alloc_dvector(unsigned int length)
{
  return malloc((length * (sizeof(double))));
}

void dealloc_dvector(double *array)
{
  free((char *) array);
}

double **alloc_2dArray(unsigned int row, unsigned int col)
{
  double **Array = (double **) malloc((size_t) ((row) * (sizeof(double *))));

  for (unsigned int i = 0; i < row; i++) {
    Array[i] = alloc_dvector(col);
  }

  return Array;
}

void dealloc_2dArray(double **Array, unsigned int row, unsigned int col)
{
  for (unsigned int i = 0; i < row; i++)
  {
    dealloc_dvector(Array[i]);
  }
  free((char *) Array);
}

int main()
{

  double **designMatrix_Y = alloc_2dArray(4,4);

  for(int i = 0; i<4; i++)
  {
    for(int j= 0; j<4; j++)
    {
      designMatrix_Y[i][j] = i*j;
    }
  }


  DecisionTreeData *dataSplits = split_dataset(2,
                                               2, 
                                               4,
                                               designMatrix_Y);

  //rename for convenience
  size_t leftSize  = (dataSplits[0]).length;
  double **designMatrix_Y_L = (dataSplits[0]).designMatrix_Y;
  double **designMatrix_Y_R = (dataSplits[1]).designMatrix_Y;

  for(int i = 0; i<leftSize; ++i)
    printArray(designMatrix_Y_L[i],4);

  free(designMatrix_Y_L);
  free(designMatrix_Y_R);
  free(dataSplits);

  dealloc_2dArray(designMatrix_Y,4,4);

  return 0;
  

}

它在我的 mac mini M1 中运行良好(不支持 Valgrind,看不到日志)但在 Linux 上崩溃(分段错误)。另外,我已将来自 Valgrind 的错误消息与上面的代码一起附上。

==4777== LEAK SUMMARY:
==4777==    definitely lost: 0 bytes in 0 blocks
==4777==    indirectly lost: 0 bytes in 0 blocks
==4777==      possibly lost: 0 bytes in 0 blocks
==4777==    still reachable: 224 bytes in 8 blocks
==4777==         suppressed: 0 bytes in 0 blocks
==4777==
==4777== ERROR SUMMARY: 5 errors from 4 contexts (suppressed: 0 from 0)
==4777==
==4777== 1 errors in context 1 of 4:
==4777== Invalid read of size 8
==4777==    at 0x4006C7: printArray (test.c:7)
==4777==    by 0x400A5E: main (test.c:117)
==4777==  Address 0x0 is not stack'd, malloc'd or (recently) free'd
==4777==
==4777==
==4777== 1 errors in context 2 of 4:
==4777== Use of uninitialised value of size 8
==4777==    at 0x4006C7: printArray (test.c:7)
==4777==    by 0x400A5E: main (test.c:117)
==4777==
==4777==
==4777== 1 errors in context 3 of 4:
==4777== Invalid write of size 8
==4777==    at 0x4007B4: split_dataset (test.c:36)
==4777==    by 0x400A06: main (test.c:106)
==4777==  Address 0x52052e8 is 0 bytes after a block of size 8 alloc'd
==4777==    at 0x4C2C291: realloc (vg_replace_malloc.c:834)
==4777==    by 0x400809: split_dataset (test.c:49)
==4777==    by 0x400A06: main (test.c:106)
==4777==
==4777==
==4777== 2 errors in context 4 of 4:
==4777== Invalid write of size 8
==4777==    at 0x4007E7: split_dataset (test.c:43)
==4777==    by 0x400A06: main (test.c:106)
==4777==  Address 0x5205380 is 0 bytes after a block of size 0 alloc'd
==4777==    at 0x4C29EBD: malloc (vg_replace_malloc.c:306)
==4777==    by 0x4C2C210: realloc (vg_replace_malloc.c:834)
==4777==    by 0x400828: split_dataset (test.c:50)
==4777==    by 0x400A06: main (test.c:106)
==4777==
==4777== ERROR SUMMARY: 5 errors from 4 contexts (suppressed: 0 from 0)
(END)

根据错误代码,我意识到错误可能来自printArray()。所以,我 运行 再一次没有 printArray() 的 for 循环。来自 Valgrind 的日志更改如下。

==4021== HEAP SUMMARY:
==4021==     in use at exit: 0 bytes in 0 blocks
==4021==   total heap usage: 15 allocs, 15 frees, 336 bytes allocated
==4021==
==4021== All heap blocks were freed -- no leaks are possible
==4021==
==4021== ERROR SUMMARY: 3 errors from 2 contexts (suppressed: 0 from 0)
==4021==
==4021== 1 errors in context 1 of 2:
==4021== Invalid write of size 8
==4021==    at 0x4007B4: split_dataset (test.c:36)
==4021==    by 0x400A06: main (test.c:106)
==4021==  Address 0x52052e8 is 0 bytes after a block of size 8 alloc'd
==4021==    at 0x4C2C291: realloc (vg_replace_malloc.c:834)
==4021==    by 0x400809: split_dataset (test.c:49)
==4021==    by 0x400A06: main (test.c:106)
==4021==
==4021==
==4021== 2 errors in context 2 of 2:
==4021== Invalid write of size 8
==4021==    at 0x4007E7: split_dataset (test.c:43)
==4021==    by 0x400A06: main (test.c:106)
==4021==  Address 0x5205380 is 0 bytes after a block of size 0 alloc'd
==4021==    at 0x4C29EBD: malloc (vg_replace_malloc.c:306)
==4021==    by 0x4C2C210: realloc (vg_replace_malloc.c:834)
==4021==    by 0x400828: split_dataset (test.c:50)
==4021==    by 0x400A06: main (test.c:106)
==4021==
==4021== ERROR SUMMARY: 3 errors from 2 contexts (suppressed: 0 from 0)
(END)

所以,有人可以帮助解决这个问题吗?有什么建议吗?感谢您提前抽出宝贵时间进行我冗长的描述。如果有任何不清楚的地方,请告诉我。

gcc 版本 9.3.0

这两个重新分配不属于它们所在的位置 中 for-loop。 for-loop 之后 left/ride 拆分的最终大小是固定的。通过在每次迭代中调整它们的大小,您将它们固定为当前大小,这意味着下一次迭代将保证 out-of-range 一个槽(无论目标是哪一侧)。地址清理程序报告将证实这一点。

双方都被故意过度分配以容纳 row 潜在的指针。那些只需要在拆分迭代完成后尘埃落定后进行调整修剪。

下面这个(标记为此处):

for (size_t i = 0; i < row; ++i)
{

    if (designMatrix_Y[index][i] <= value)
    {
        // Copy the row into the left half
        leftDesignMatrix_Y[left_count]    = designMatrix_Y[i];
        left_count++;

    }
    else
    {
        // Copy the row into the right half
        rightDesignMatrix_Y[right_count]    = designMatrix_Y[i];
        right_count++;

    }

    // HERE HERE HERE HERE HERE
    //realloc the memory for exact space
    leftDesignMatrix_Y  = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
    rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));

}

应该是这样的:

for (size_t i = 0; i < row; ++i)
{

    if (designMatrix_Y[index][i] <= value)
    {
        // Copy the row into the left half
        leftDesignMatrix_Y[left_count]    = designMatrix_Y[i];
        left_count++;

    }
    else
    {
        // Copy the row into the right half
        rightDesignMatrix_Y[right_count]    = designMatrix_Y[i];
        right_count++;

    }
}

//realloc the memory for exact space
leftDesignMatrix_Y  = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));