递归合并排序的分段错误

Segmentation fault on recursive merge sort

我正在尝试编写合并排序的完全递归版本 当我尝试订购超过 100k 条记录时,我在 merge 函数上遇到分段错误,我在网上看到这将是一个堆栈溢出问题,与 VLA 相关,但我无法弄清楚。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct _MBArray MBArray;

struct _MBArray {
    void ** array;
    int length;
};

void mergeSort(MBArray *mbarray, int start, int end, int (*compare)(void *, void *)) {
    if (start == end) {
        return;
    } else {
        int middle = (end + start) / 2;
        mergeSort(mbarray, start, middle, compare);
        mergeSort(mbarray, middle + 1, end, compare);
        merge(mbarray, start, middle + 1, end, compare, (mbarray->array)[middle + 1]);
        return;
    }
}

void merge(MBArray *mbarray, int startA, int startB, int length, int (*compare)(void *, void *),void *elem) {
    if (startA > startB - 1) {
        return;
    } else
    if (startB > length) {
        return;
    } else
    if ((*(compare))((mbarray->array)[startA], (mbarray->array)[startB])) {
        merge(mbarray, startA + 1, startB, length, compare, (mbarray->array)[startB]);
        return;
    } else {
        memcpy(mbarray->array + (startA + 1), mbarray->array + startA, (startB - startA) * sizeof(void *));
        (mbarray->array)[startA] = elem;
        merge(mbarray, startA + 1, startB + 1, length, compare, (mbarray->array)[startB + 1]);
        return;
    }
}

您的 mergemergesort 函数不正确:

  • mergesort 函数被赋予数组中的索引值,使得 start 被包含并且 end 被排除在外。修复方法如下:
void mergeSort(MBArray *mbarray, int start, int end,
               int (*compare)(void *, void *))
{
    if (end - start > 1) {
        int middle = start + (end - start) / 2;
        mergeSort(mbarray, start, middle, compare);
        mergeSort(mbarray, middle, end, compare);
        merge(mbarray, start, middle, end, compare);
    }
}

merge 函数应该被简化。如果你真的打算让这个函数递归而不是使用临时数组,你可能会递归 length/2 次,因此可能导致堆栈溢出比在合并函数中分配 VLA 更快。

这是一个基于 VLA 的 merge 函数:

void merge(MBArray *mbarray, int low, int middle, int end,
           int (*compare)(void *, void *))
{
    int templen = middle - low;
    void *temp[templen];
    memcpy(temp, mbarray->array + low, sizeof(temp));
    int i = 0, j = middle, k = low;
    while (i < templen) {
        if (j >= end || compare(temp[i], mbarray->array[j])) {
            mbarray->array[k++] = temp[i++];
        else
            mbarray->array[k++] = mbarray->array[j++];
    }
}

完全递归 版本应该保存元素并在将其设置到最终位置之前递归。这在堆栈 space 中比分配 VLA 的成本要高得多,因此 堆栈溢出 的可能性更大。使用 malloc() 分配一次临时数组并将其传递给 mergesort 更安全,也更高效。

这里是修改后的完全递归merge函数:

void mergeRecur(MBArray *mbarray, int dest, int a0, int a1, int b0, int b1,
                int (*compare)(void *, void *))
{
    if (a0 < a1) {
        if (b0 >= b1 || compare(mbarray->array[a0], mbarray->array[b0])) {
            void *temp = mbarray->array[a0];
            mergeRecur(mbarray, dest + 1, a0 + 1, a1, b0, b1, compare);
            mbarray->array[dest] = temp;
        } else {
            void *temp = mbarray->array[b0];
            mergeRecur(mbarray, dest + 1, a0, a1, b0 + 1, b1, compare);
            mbarray->array[dest] = temp;
        }
    }
}

void merge(MBArray *mbarray, int low, int middle, int end,
           int (*compare)(void *, void *))
{
    mergeRecur(mbarray, low, low, middle, middle, end, compare);
}

对于大型数组,我建议分配一次临时数组并调用使用临时数组而不是分配 VLA 的 mergesort 的递归版本:

#include <stdlib.h>
#include <string.h>

struct _MBArray {
    void ** array;
    int length;
};

void mergeTemp(MBArray *mbarray, int low, int middle, int end,
               int (*compare)(void *, void *), void **temp)
{
    int templen = middle - low;
    int i = 0, j = middle, k = low;

    memcpy(temp, mbarray->array + low, sizeof(*temp) * templen);
    while (i < templen) {
        if (j >= end || compare(temp[i], mbarray->array[j])) {
            mbarray->array[k++] = temp[i++];
        else
            mbarray->array[k++] = mbarray->array[j++];
    }
}

void mergeSortTemp(MBArray *mbarray, int start, int end,
                   int (*compare)(void *, void *), void **temp)
{
    if (end - start > 1) {
        int middle = start + (end - start) / 2;
        mergeSortTemp(mbarray, start, middle, compare, temp);
        mergeSortTemp(mbarray, middle, end, compare, temp);
        mergeTemp(mbarray, start, middle, end, compare, temp);
    }
}

void mergeSort(MBArray *mbarray, int (*compare)(void *, void *)) {
    if (mbarray->length > 1) {
        void **temp = malloc(sizeof(*temp) * (mbarray->length / 2));
        mergeSortTemp(mbarray, 0, mbarray->length, compare, temp);
        free(temp);
    }
}

问题很可能是由于 merge() 为每个合并的元素调用自身。最好有一个 mergeSort() 的入口函数,它一次性分配第二个数组(指针),然后将第二个数组作为参数传递给递归 MergeSortR(),并在两个数组之间来回合并两个数组。