递归合并排序的分段错误
Segmentation fault on recursive merge sort
我正在尝试编写合并排序的完全递归版本
当我尝试订购超过 100k 条记录时,我在 merge
函数上遇到分段错误,我在网上看到这将是一个堆栈溢出问题,与 VLA 相关,但我无法弄清楚。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct _MBArray MBArray;
struct _MBArray {
void ** array;
int length;
};
void mergeSort(MBArray *mbarray, int start, int end, int (*compare)(void *, void *)) {
if (start == end) {
return;
} else {
int middle = (end + start) / 2;
mergeSort(mbarray, start, middle, compare);
mergeSort(mbarray, middle + 1, end, compare);
merge(mbarray, start, middle + 1, end, compare, (mbarray->array)[middle + 1]);
return;
}
}
void merge(MBArray *mbarray, int startA, int startB, int length, int (*compare)(void *, void *),void *elem) {
if (startA > startB - 1) {
return;
} else
if (startB > length) {
return;
} else
if ((*(compare))((mbarray->array)[startA], (mbarray->array)[startB])) {
merge(mbarray, startA + 1, startB, length, compare, (mbarray->array)[startB]);
return;
} else {
memcpy(mbarray->array + (startA + 1), mbarray->array + startA, (startB - startA) * sizeof(void *));
(mbarray->array)[startA] = elem;
merge(mbarray, startA + 1, startB + 1, length, compare, (mbarray->array)[startB + 1]);
return;
}
}
您的 merge
和 mergesort
函数不正确:
-
mergesort
函数被赋予数组中的索引值,使得 start
被包含并且 end
被排除在外。修复方法如下:
void mergeSort(MBArray *mbarray, int start, int end,
int (*compare)(void *, void *))
{
if (end - start > 1) {
int middle = start + (end - start) / 2;
mergeSort(mbarray, start, middle, compare);
mergeSort(mbarray, middle, end, compare);
merge(mbarray, start, middle, end, compare);
}
}
merge
函数应该被简化。如果你真的打算让这个函数递归而不是使用临时数组,你可能会递归 length/2
次,因此可能导致堆栈溢出比在合并函数中分配 VLA 更快。
这是一个基于 VLA 的 merge
函数:
void merge(MBArray *mbarray, int low, int middle, int end,
int (*compare)(void *, void *))
{
int templen = middle - low;
void *temp[templen];
memcpy(temp, mbarray->array + low, sizeof(temp));
int i = 0, j = middle, k = low;
while (i < templen) {
if (j >= end || compare(temp[i], mbarray->array[j])) {
mbarray->array[k++] = temp[i++];
else
mbarray->array[k++] = mbarray->array[j++];
}
}
完全递归 版本应该保存元素并在将其设置到最终位置之前递归。这在堆栈 space 中比分配 VLA 的成本要高得多,因此 堆栈溢出 的可能性更大。使用 malloc()
分配一次临时数组并将其传递给 mergesort
更安全,也更高效。
这里是修改后的完全递归merge
函数:
void mergeRecur(MBArray *mbarray, int dest, int a0, int a1, int b0, int b1,
int (*compare)(void *, void *))
{
if (a0 < a1) {
if (b0 >= b1 || compare(mbarray->array[a0], mbarray->array[b0])) {
void *temp = mbarray->array[a0];
mergeRecur(mbarray, dest + 1, a0 + 1, a1, b0, b1, compare);
mbarray->array[dest] = temp;
} else {
void *temp = mbarray->array[b0];
mergeRecur(mbarray, dest + 1, a0, a1, b0 + 1, b1, compare);
mbarray->array[dest] = temp;
}
}
}
void merge(MBArray *mbarray, int low, int middle, int end,
int (*compare)(void *, void *))
{
mergeRecur(mbarray, low, low, middle, middle, end, compare);
}
对于大型数组,我建议分配一次临时数组并调用使用临时数组而不是分配 VLA 的 mergesort
的递归版本:
#include <stdlib.h>
#include <string.h>
struct _MBArray {
void ** array;
int length;
};
void mergeTemp(MBArray *mbarray, int low, int middle, int end,
int (*compare)(void *, void *), void **temp)
{
int templen = middle - low;
int i = 0, j = middle, k = low;
memcpy(temp, mbarray->array + low, sizeof(*temp) * templen);
while (i < templen) {
if (j >= end || compare(temp[i], mbarray->array[j])) {
mbarray->array[k++] = temp[i++];
else
mbarray->array[k++] = mbarray->array[j++];
}
}
void mergeSortTemp(MBArray *mbarray, int start, int end,
int (*compare)(void *, void *), void **temp)
{
if (end - start > 1) {
int middle = start + (end - start) / 2;
mergeSortTemp(mbarray, start, middle, compare, temp);
mergeSortTemp(mbarray, middle, end, compare, temp);
mergeTemp(mbarray, start, middle, end, compare, temp);
}
}
void mergeSort(MBArray *mbarray, int (*compare)(void *, void *)) {
if (mbarray->length > 1) {
void **temp = malloc(sizeof(*temp) * (mbarray->length / 2));
mergeSortTemp(mbarray, 0, mbarray->length, compare, temp);
free(temp);
}
}
问题很可能是由于 merge() 为每个合并的元素调用自身。最好有一个 mergeSort() 的入口函数,它一次性分配第二个数组(指针),然后将第二个数组作为参数传递给递归 MergeSortR(),并在两个数组之间来回合并两个数组。
我正在尝试编写合并排序的完全递归版本
当我尝试订购超过 100k 条记录时,我在 merge
函数上遇到分段错误,我在网上看到这将是一个堆栈溢出问题,与 VLA 相关,但我无法弄清楚。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct _MBArray MBArray;
struct _MBArray {
void ** array;
int length;
};
void mergeSort(MBArray *mbarray, int start, int end, int (*compare)(void *, void *)) {
if (start == end) {
return;
} else {
int middle = (end + start) / 2;
mergeSort(mbarray, start, middle, compare);
mergeSort(mbarray, middle + 1, end, compare);
merge(mbarray, start, middle + 1, end, compare, (mbarray->array)[middle + 1]);
return;
}
}
void merge(MBArray *mbarray, int startA, int startB, int length, int (*compare)(void *, void *),void *elem) {
if (startA > startB - 1) {
return;
} else
if (startB > length) {
return;
} else
if ((*(compare))((mbarray->array)[startA], (mbarray->array)[startB])) {
merge(mbarray, startA + 1, startB, length, compare, (mbarray->array)[startB]);
return;
} else {
memcpy(mbarray->array + (startA + 1), mbarray->array + startA, (startB - startA) * sizeof(void *));
(mbarray->array)[startA] = elem;
merge(mbarray, startA + 1, startB + 1, length, compare, (mbarray->array)[startB + 1]);
return;
}
}
您的 merge
和 mergesort
函数不正确:
-
mergesort
函数被赋予数组中的索引值,使得start
被包含并且end
被排除在外。修复方法如下:
void mergeSort(MBArray *mbarray, int start, int end,
int (*compare)(void *, void *))
{
if (end - start > 1) {
int middle = start + (end - start) / 2;
mergeSort(mbarray, start, middle, compare);
mergeSort(mbarray, middle, end, compare);
merge(mbarray, start, middle, end, compare);
}
}
merge
函数应该被简化。如果你真的打算让这个函数递归而不是使用临时数组,你可能会递归 length/2
次,因此可能导致堆栈溢出比在合并函数中分配 VLA 更快。
这是一个基于 VLA 的 merge
函数:
void merge(MBArray *mbarray, int low, int middle, int end,
int (*compare)(void *, void *))
{
int templen = middle - low;
void *temp[templen];
memcpy(temp, mbarray->array + low, sizeof(temp));
int i = 0, j = middle, k = low;
while (i < templen) {
if (j >= end || compare(temp[i], mbarray->array[j])) {
mbarray->array[k++] = temp[i++];
else
mbarray->array[k++] = mbarray->array[j++];
}
}
完全递归 版本应该保存元素并在将其设置到最终位置之前递归。这在堆栈 space 中比分配 VLA 的成本要高得多,因此 堆栈溢出 的可能性更大。使用 malloc()
分配一次临时数组并将其传递给 mergesort
更安全,也更高效。
这里是修改后的完全递归merge
函数:
void mergeRecur(MBArray *mbarray, int dest, int a0, int a1, int b0, int b1,
int (*compare)(void *, void *))
{
if (a0 < a1) {
if (b0 >= b1 || compare(mbarray->array[a0], mbarray->array[b0])) {
void *temp = mbarray->array[a0];
mergeRecur(mbarray, dest + 1, a0 + 1, a1, b0, b1, compare);
mbarray->array[dest] = temp;
} else {
void *temp = mbarray->array[b0];
mergeRecur(mbarray, dest + 1, a0, a1, b0 + 1, b1, compare);
mbarray->array[dest] = temp;
}
}
}
void merge(MBArray *mbarray, int low, int middle, int end,
int (*compare)(void *, void *))
{
mergeRecur(mbarray, low, low, middle, middle, end, compare);
}
对于大型数组,我建议分配一次临时数组并调用使用临时数组而不是分配 VLA 的 mergesort
的递归版本:
#include <stdlib.h>
#include <string.h>
struct _MBArray {
void ** array;
int length;
};
void mergeTemp(MBArray *mbarray, int low, int middle, int end,
int (*compare)(void *, void *), void **temp)
{
int templen = middle - low;
int i = 0, j = middle, k = low;
memcpy(temp, mbarray->array + low, sizeof(*temp) * templen);
while (i < templen) {
if (j >= end || compare(temp[i], mbarray->array[j])) {
mbarray->array[k++] = temp[i++];
else
mbarray->array[k++] = mbarray->array[j++];
}
}
void mergeSortTemp(MBArray *mbarray, int start, int end,
int (*compare)(void *, void *), void **temp)
{
if (end - start > 1) {
int middle = start + (end - start) / 2;
mergeSortTemp(mbarray, start, middle, compare, temp);
mergeSortTemp(mbarray, middle, end, compare, temp);
mergeTemp(mbarray, start, middle, end, compare, temp);
}
}
void mergeSort(MBArray *mbarray, int (*compare)(void *, void *)) {
if (mbarray->length > 1) {
void **temp = malloc(sizeof(*temp) * (mbarray->length / 2));
mergeSortTemp(mbarray, 0, mbarray->length, compare, temp);
free(temp);
}
}
问题很可能是由于 merge() 为每个合并的元素调用自身。最好有一个 mergeSort() 的入口函数,它一次性分配第二个数组(指针),然后将第二个数组作为参数传递给递归 MergeSortR(),并在两个数组之间来回合并两个数组。