为双指针的顶层分配内存
Allocate memory for top level of a double pointer
几天前,我在社区里问了这个,很快就得到了小伙伴们的答复。该解决方案在 Valgrind 上也很棒。
并且根据his/her的建议,我为split_dataset
编写了以下函数作为决策树算法的一部分。下面是我写的代码,
#include <stdio.h>
#include <stdlib.h>
void printArray(double array[], unsigned int size)
{
for (unsigned int i = 0; i < size; ++i) {
printf("%.3f ", array[i]);
}
printf("\n");
}
typedef struct
{
size_t length;
double **designMatrix_Y;
} DecisionTreeData;
DecisionTreeData *split_dataset(int index, //var index
double value, //best cutoff
int row, //nrows of design matrix, number of variables
double **designMatrix_Y) //design matrix (X) and response Y
{
// Buffers to hold rows of data as we are distributing rows based on the split.
double **leftDesignMatrix_Y = calloc(row, sizeof *leftDesignMatrix_Y);
double **rightDesignMatrix_Y = calloc(row, sizeof *rightDesignMatrix_Y);
size_t left_count = 0;
size_t right_count = 0;
for (size_t i = 0; i < row; ++i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count++;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count++;
}
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));
}
DecisionTreeData *data_split = malloc(sizeof(DecisionTreeData) * 2);
data_split[0] = (DecisionTreeData){left_count, leftDesignMatrix_Y};
data_split[1] = (DecisionTreeData){right_count, rightDesignMatrix_Y};
return data_split;
}
double *alloc_dvector(unsigned int length)
{
return malloc((length * (sizeof(double))));
}
void dealloc_dvector(double *array)
{
free((char *) array);
}
double **alloc_2dArray(unsigned int row, unsigned int col)
{
double **Array = (double **) malloc((size_t) ((row) * (sizeof(double *))));
for (unsigned int i = 0; i < row; i++) {
Array[i] = alloc_dvector(col);
}
return Array;
}
void dealloc_2dArray(double **Array, unsigned int row, unsigned int col)
{
for (unsigned int i = 0; i < row; i++)
{
dealloc_dvector(Array[i]);
}
free((char *) Array);
}
int main()
{
double **designMatrix_Y = alloc_2dArray(4,4);
for(int i = 0; i<4; i++)
{
for(int j= 0; j<4; j++)
{
designMatrix_Y[i][j] = i*j;
}
}
DecisionTreeData *dataSplits = split_dataset(2,
2,
4,
designMatrix_Y);
//rename for convenience
size_t leftSize = (dataSplits[0]).length;
double **designMatrix_Y_L = (dataSplits[0]).designMatrix_Y;
double **designMatrix_Y_R = (dataSplits[1]).designMatrix_Y;
for(int i = 0; i<leftSize; ++i)
printArray(designMatrix_Y_L[i],4);
free(designMatrix_Y_L);
free(designMatrix_Y_R);
free(dataSplits);
dealloc_2dArray(designMatrix_Y,4,4);
return 0;
}
它在我的 mac mini M1 中运行良好(不支持 Valgrind,看不到日志)但在 Linux 上崩溃(分段错误)。另外,我已将来自 Valgrind 的错误消息与上面的代码一起附上。
==4777== LEAK SUMMARY:
==4777== definitely lost: 0 bytes in 0 blocks
==4777== indirectly lost: 0 bytes in 0 blocks
==4777== possibly lost: 0 bytes in 0 blocks
==4777== still reachable: 224 bytes in 8 blocks
==4777== suppressed: 0 bytes in 0 blocks
==4777==
==4777== ERROR SUMMARY: 5 errors from 4 contexts (suppressed: 0 from 0)
==4777==
==4777== 1 errors in context 1 of 4:
==4777== Invalid read of size 8
==4777== at 0x4006C7: printArray (test.c:7)
==4777== by 0x400A5E: main (test.c:117)
==4777== Address 0x0 is not stack'd, malloc'd or (recently) free'd
==4777==
==4777==
==4777== 1 errors in context 2 of 4:
==4777== Use of uninitialised value of size 8
==4777== at 0x4006C7: printArray (test.c:7)
==4777== by 0x400A5E: main (test.c:117)
==4777==
==4777==
==4777== 1 errors in context 3 of 4:
==4777== Invalid write of size 8
==4777== at 0x4007B4: split_dataset (test.c:36)
==4777== by 0x400A06: main (test.c:106)
==4777== Address 0x52052e8 is 0 bytes after a block of size 8 alloc'd
==4777== at 0x4C2C291: realloc (vg_replace_malloc.c:834)
==4777== by 0x400809: split_dataset (test.c:49)
==4777== by 0x400A06: main (test.c:106)
==4777==
==4777==
==4777== 2 errors in context 4 of 4:
==4777== Invalid write of size 8
==4777== at 0x4007E7: split_dataset (test.c:43)
==4777== by 0x400A06: main (test.c:106)
==4777== Address 0x5205380 is 0 bytes after a block of size 0 alloc'd
==4777== at 0x4C29EBD: malloc (vg_replace_malloc.c:306)
==4777== by 0x4C2C210: realloc (vg_replace_malloc.c:834)
==4777== by 0x400828: split_dataset (test.c:50)
==4777== by 0x400A06: main (test.c:106)
==4777==
==4777== ERROR SUMMARY: 5 errors from 4 contexts (suppressed: 0 from 0)
(END)
根据错误代码,我意识到错误可能来自printArray()
。所以,我 运行 再一次没有 printArray()
的 for 循环。来自 Valgrind 的日志更改如下。
==4021== HEAP SUMMARY:
==4021== in use at exit: 0 bytes in 0 blocks
==4021== total heap usage: 15 allocs, 15 frees, 336 bytes allocated
==4021==
==4021== All heap blocks were freed -- no leaks are possible
==4021==
==4021== ERROR SUMMARY: 3 errors from 2 contexts (suppressed: 0 from 0)
==4021==
==4021== 1 errors in context 1 of 2:
==4021== Invalid write of size 8
==4021== at 0x4007B4: split_dataset (test.c:36)
==4021== by 0x400A06: main (test.c:106)
==4021== Address 0x52052e8 is 0 bytes after a block of size 8 alloc'd
==4021== at 0x4C2C291: realloc (vg_replace_malloc.c:834)
==4021== by 0x400809: split_dataset (test.c:49)
==4021== by 0x400A06: main (test.c:106)
==4021==
==4021==
==4021== 2 errors in context 2 of 2:
==4021== Invalid write of size 8
==4021== at 0x4007E7: split_dataset (test.c:43)
==4021== by 0x400A06: main (test.c:106)
==4021== Address 0x5205380 is 0 bytes after a block of size 0 alloc'd
==4021== at 0x4C29EBD: malloc (vg_replace_malloc.c:306)
==4021== by 0x4C2C210: realloc (vg_replace_malloc.c:834)
==4021== by 0x400828: split_dataset (test.c:50)
==4021== by 0x400A06: main (test.c:106)
==4021==
==4021== ERROR SUMMARY: 3 errors from 2 contexts (suppressed: 0 from 0)
(END)
所以,有人可以帮助解决这个问题吗?有什么建议吗?感谢您提前抽出宝贵时间进行我冗长的描述。如果有任何不清楚的地方,请告诉我。
gcc 版本 9.3.0
这两个重新分配不属于它们所在的位置 在 中 for-loop。 在 for-loop 之后 left/ride 拆分的最终大小是固定的。通过在每次迭代中调整它们的大小,您将它们固定为当前大小,这意味着下一次迭代将保证 out-of-range 一个槽(无论目标是哪一侧)。地址清理程序报告将证实这一点。
双方都被故意过度分配以容纳 row
潜在的指针。那些只需要在拆分迭代完成后尘埃落定后进行调整修剪。
下面这个(标记为此处):
for (size_t i = 0; i < row; ++i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count++;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count++;
}
// HERE HERE HERE HERE HERE
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));
}
应该是这样的:
for (size_t i = 0; i < row; ++i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count++;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count++;
}
}
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));
几天前,我在社区里问了这个
并且根据his/her的建议,我为split_dataset
编写了以下函数作为决策树算法的一部分。下面是我写的代码,
#include <stdio.h>
#include <stdlib.h>
void printArray(double array[], unsigned int size)
{
for (unsigned int i = 0; i < size; ++i) {
printf("%.3f ", array[i]);
}
printf("\n");
}
typedef struct
{
size_t length;
double **designMatrix_Y;
} DecisionTreeData;
DecisionTreeData *split_dataset(int index, //var index
double value, //best cutoff
int row, //nrows of design matrix, number of variables
double **designMatrix_Y) //design matrix (X) and response Y
{
// Buffers to hold rows of data as we are distributing rows based on the split.
double **leftDesignMatrix_Y = calloc(row, sizeof *leftDesignMatrix_Y);
double **rightDesignMatrix_Y = calloc(row, sizeof *rightDesignMatrix_Y);
size_t left_count = 0;
size_t right_count = 0;
for (size_t i = 0; i < row; ++i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count++;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count++;
}
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));
}
DecisionTreeData *data_split = malloc(sizeof(DecisionTreeData) * 2);
data_split[0] = (DecisionTreeData){left_count, leftDesignMatrix_Y};
data_split[1] = (DecisionTreeData){right_count, rightDesignMatrix_Y};
return data_split;
}
double *alloc_dvector(unsigned int length)
{
return malloc((length * (sizeof(double))));
}
void dealloc_dvector(double *array)
{
free((char *) array);
}
double **alloc_2dArray(unsigned int row, unsigned int col)
{
double **Array = (double **) malloc((size_t) ((row) * (sizeof(double *))));
for (unsigned int i = 0; i < row; i++) {
Array[i] = alloc_dvector(col);
}
return Array;
}
void dealloc_2dArray(double **Array, unsigned int row, unsigned int col)
{
for (unsigned int i = 0; i < row; i++)
{
dealloc_dvector(Array[i]);
}
free((char *) Array);
}
int main()
{
double **designMatrix_Y = alloc_2dArray(4,4);
for(int i = 0; i<4; i++)
{
for(int j= 0; j<4; j++)
{
designMatrix_Y[i][j] = i*j;
}
}
DecisionTreeData *dataSplits = split_dataset(2,
2,
4,
designMatrix_Y);
//rename for convenience
size_t leftSize = (dataSplits[0]).length;
double **designMatrix_Y_L = (dataSplits[0]).designMatrix_Y;
double **designMatrix_Y_R = (dataSplits[1]).designMatrix_Y;
for(int i = 0; i<leftSize; ++i)
printArray(designMatrix_Y_L[i],4);
free(designMatrix_Y_L);
free(designMatrix_Y_R);
free(dataSplits);
dealloc_2dArray(designMatrix_Y,4,4);
return 0;
}
它在我的 mac mini M1 中运行良好(不支持 Valgrind,看不到日志)但在 Linux 上崩溃(分段错误)。另外,我已将来自 Valgrind 的错误消息与上面的代码一起附上。
==4777== LEAK SUMMARY:
==4777== definitely lost: 0 bytes in 0 blocks
==4777== indirectly lost: 0 bytes in 0 blocks
==4777== possibly lost: 0 bytes in 0 blocks
==4777== still reachable: 224 bytes in 8 blocks
==4777== suppressed: 0 bytes in 0 blocks
==4777==
==4777== ERROR SUMMARY: 5 errors from 4 contexts (suppressed: 0 from 0)
==4777==
==4777== 1 errors in context 1 of 4:
==4777== Invalid read of size 8
==4777== at 0x4006C7: printArray (test.c:7)
==4777== by 0x400A5E: main (test.c:117)
==4777== Address 0x0 is not stack'd, malloc'd or (recently) free'd
==4777==
==4777==
==4777== 1 errors in context 2 of 4:
==4777== Use of uninitialised value of size 8
==4777== at 0x4006C7: printArray (test.c:7)
==4777== by 0x400A5E: main (test.c:117)
==4777==
==4777==
==4777== 1 errors in context 3 of 4:
==4777== Invalid write of size 8
==4777== at 0x4007B4: split_dataset (test.c:36)
==4777== by 0x400A06: main (test.c:106)
==4777== Address 0x52052e8 is 0 bytes after a block of size 8 alloc'd
==4777== at 0x4C2C291: realloc (vg_replace_malloc.c:834)
==4777== by 0x400809: split_dataset (test.c:49)
==4777== by 0x400A06: main (test.c:106)
==4777==
==4777==
==4777== 2 errors in context 4 of 4:
==4777== Invalid write of size 8
==4777== at 0x4007E7: split_dataset (test.c:43)
==4777== by 0x400A06: main (test.c:106)
==4777== Address 0x5205380 is 0 bytes after a block of size 0 alloc'd
==4777== at 0x4C29EBD: malloc (vg_replace_malloc.c:306)
==4777== by 0x4C2C210: realloc (vg_replace_malloc.c:834)
==4777== by 0x400828: split_dataset (test.c:50)
==4777== by 0x400A06: main (test.c:106)
==4777==
==4777== ERROR SUMMARY: 5 errors from 4 contexts (suppressed: 0 from 0)
(END)
根据错误代码,我意识到错误可能来自printArray()
。所以,我 运行 再一次没有 printArray()
的 for 循环。来自 Valgrind 的日志更改如下。
==4021== HEAP SUMMARY:
==4021== in use at exit: 0 bytes in 0 blocks
==4021== total heap usage: 15 allocs, 15 frees, 336 bytes allocated
==4021==
==4021== All heap blocks were freed -- no leaks are possible
==4021==
==4021== ERROR SUMMARY: 3 errors from 2 contexts (suppressed: 0 from 0)
==4021==
==4021== 1 errors in context 1 of 2:
==4021== Invalid write of size 8
==4021== at 0x4007B4: split_dataset (test.c:36)
==4021== by 0x400A06: main (test.c:106)
==4021== Address 0x52052e8 is 0 bytes after a block of size 8 alloc'd
==4021== at 0x4C2C291: realloc (vg_replace_malloc.c:834)
==4021== by 0x400809: split_dataset (test.c:49)
==4021== by 0x400A06: main (test.c:106)
==4021==
==4021==
==4021== 2 errors in context 2 of 2:
==4021== Invalid write of size 8
==4021== at 0x4007E7: split_dataset (test.c:43)
==4021== by 0x400A06: main (test.c:106)
==4021== Address 0x5205380 is 0 bytes after a block of size 0 alloc'd
==4021== at 0x4C29EBD: malloc (vg_replace_malloc.c:306)
==4021== by 0x4C2C210: realloc (vg_replace_malloc.c:834)
==4021== by 0x400828: split_dataset (test.c:50)
==4021== by 0x400A06: main (test.c:106)
==4021==
==4021== ERROR SUMMARY: 3 errors from 2 contexts (suppressed: 0 from 0)
(END)
所以,有人可以帮助解决这个问题吗?有什么建议吗?感谢您提前抽出宝贵时间进行我冗长的描述。如果有任何不清楚的地方,请告诉我。
gcc 版本 9.3.0
这两个重新分配不属于它们所在的位置 在 中 for-loop。 在 for-loop 之后 left/ride 拆分的最终大小是固定的。通过在每次迭代中调整它们的大小,您将它们固定为当前大小,这意味着下一次迭代将保证 out-of-range 一个槽(无论目标是哪一侧)。地址清理程序报告将证实这一点。
双方都被故意过度分配以容纳 row
潜在的指针。那些只需要在拆分迭代完成后尘埃落定后进行调整修剪。
下面这个(标记为此处):
for (size_t i = 0; i < row; ++i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count++;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count++;
}
// HERE HERE HERE HERE HERE
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));
}
应该是这样的:
for (size_t i = 0; i < row; ++i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count++;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count++;
}
}
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));