对大型数组进行排序(快速排序)- 分段错误

Sorting large arrays (QuickSort) - segmentation fault

对于这个程序,我创建了一个随机整数数组,将该数组分成两个或四个部分,对每个部分进行排序,然后将它们组合回一个排序数组。该程序的插入排序部分适用于我需要的数组大小。问题在于快速排序。它仅适用于大小不超过 300 万个整数的数组。我需要它来处理大小高达 1 亿整数的数组。目前超过 300 万,它给我一个 "segmentation fault(core dumped)" 错误。低于 300 万似乎有效。有人看到这个问题吗?我假设有些东西溢出了。如果你看下面,你可以看到几个 malloc 声明,我试图解决这个问题。好像不行。

编辑:我做了一些调试并注释掉了代码的 "if (s_type == 'Q')" 部分的内容。它仍然给我一个大数组的分段错误。

void insertion_sort (int ar[], int size) {
    int c, d, t;
    for (c = 1; c <= size - 1; c++){
        d = c;

        while(d > 0 && ar[d] < ar[d - 1]) {
            t = ar[d];
            ar[d] = ar[d - 1];
            ar[d - 1] = t;

            d--;
        }
    }
}

void quick_sort (int *a, int n) {
    int i, j, p, t;
    if(n < 2)
        return;
    p = a[n / 2];
    for (i = 0, j = n - 1;; i++, j--) {
        while (a[i] < p)
            i++;
        while (p < a[j])
            j--;
        if (i >= j)
            break;
        t = a[i];
        a[i] = a[j];
        a[j] = t;
        }
        quick_sort(a, i);
        quick_sort(a + i, n - i);
}

void check_sort (int ara[], int size_t) {
    int b;
    int c_i;

    c_i = 0;

    for  (b = 1; b < size_t; b++) {
        if (ara[b - 1] > ara[b]) {
            printf("Array is not sorted correctly\n");  
            break;
        } else {
            c_i++;
        }
    }

    if (c_i == size_t - 1) {
        printf("Array is sorted correctly\n");
    }
}

void combine_array(int a_ar[], int b_ar[], int c_ar[], int size_1, int size_2) {
    int i, j, k;
    i = 0;
    j = 0;
    k = 0;

    while (i < size_1 && j < size_2) {
        if (a_ar[i] < b_ar[j]) {
            c_ar[k] = a_ar[i];
            i++;
        } else {
            c_ar[k] = b_ar[j];
            j++;
        }
        k++;
    }

    if (i >= size_1) {
        while (j < size_2) {
            c_ar[k] = b_ar[j];
            j++;
            k++;
        }
    }

    if (j >= size_2) {
        while (i < size_1) {
            c_ar[k] = a_ar[i];
            i++;
            k++;
        }
    }
}

long gRefTime;

long GetMilliSecondTime(struct timeb timeBuf) {
    long mliScndTime;
    mliScndTime = timeBuf.time;
    mliScndTime *= 1000;
    mliScndTime += timeBuf.millitm;
    return mliScndTime;
}

long GetCurrentTime(void) {
    long crntTime=0;
    struct timeb timeBuf;
    ftime(&timeBuf);
    crntTime = GetMilliSecondTime(timeBuf);
    return crntTime;
}

void SetTime(void) {
    gRefTime = GetCurrentTime();
}

long GetTime(void) {
    long crntTime = GetCurrentTime();
    return (crntTime - gRefTime);
}

int main (int argc, char *argv[]) {
    int a_size, t_num;
    char s_type;
    int i, j, k; 
    int two_s[1];
    int four_s[3];

    a_size = atoi(argv[1]);
    t_num = atoi(argv[2]);
    s_type = argv[3][0];

    pthread_t tid[t_num];
    pthread_attr_t attr;    

    struct sort_2 {
        int array_ss[(a_size/2)];
        int arr_s;
    };

    struct sort_2 firstS;
    struct sort_2 firstS1;

    int *array_m = malloc(a_size * sizeof(*array_m));

    for (i = 0; i < a_size; i++) {
        array_m[i] = rand();
    }

    //for (i = 0; i < a_size; i++) {
        //printf("%d \n", array_m[i]);
    //}

    printf("\n");

    if (t_num == 2) {
        two_s[0] = ((a_size/2));
        two_s[1] = (a_size);
        int *array_s1 = malloc(two_s[0] * sizeof(*array_s1));
        int *array_s2 = malloc(two_s[0] * sizeof(*array_s2));

        printf("First half \n");

        for (j = 0; j < two_s[0]; j ++) {
            array_s1[j] = array_m[j];
            //printf("%d \n", array_s1[j]);
        }

        printf("Second half \n");

        for (k = two_s[0]; k < two_s[1]; k++) {
            array_s2[k - two_s[0]] = array_m[k];
            //printf("%d \n", array_s2[k - two_s[0]]);
        }

    printf("\n");

    check_sort(array_m, a_size);

    if (s_type == 'I') { //Insertion sort

        SetTime();

        insertion_sort(array_s1, two_s[0]);
        insertion_sort(array_s2, two_s[0]);

        printf("Sorted first half \n");

        for (i = 0; i < two_s[0]; i++) {
            //printf("%d \n", array_s1[i]);
        }

        printf("Sorted second half \n");

        for (i = 0; i < two_s[0]; i++) {
            //printf("%d \n", array_s2[i]);
        }       

        combine_array(array_s1, array_s2, array_m, two_s[0], two_s[0]);

        printf("Time to sort and combine: %f \n", (GetTime()));

        printf("\n");

        printf("Combined and sorted sequentially via Insertion Sort \n");

        for (i = 0; i < a_size; i++) {
            //printf("%d \n", array_m[i]);
        }

        check_sort(array_m, a_size);

        //Start of thread section

        for (i = 0; i < a_size; i++) {
            array_m[i] = rand();
        }

        printf("First half \n");

        for (j = 0; j < two_s[0]; j ++) {
            array_s1[j] = array_m[j];
            firstS.array_ss[j] = array_s1[j];
        }

        firstS.arr_s = two_s[0];

        printf("Second half \n");

        for (k = two_s[0]; k < two_s[1]; k++) {
            array_s2[k - two_s[0]] = array_m[k];
            firstS1.array_ss[k] = array_s2[k - two_s[0]];
        }

        firstS1.arr_s = two_s[0];   

        //pthread_attr_init(&attr);
        //pthread_create(&tid, &attr, insertion_sort, *firstS);         
    }

    if (s_type == 'Q') { //Quick sort

        SetTime();

        quick_sort(array_s1, two_s[0]);
        quick_sort(array_s2, two_s[0]);

        printf("Sorted first half \n");

        for (i = 0; i < two_s[0]; i++) {
            //printf("%d \n", array_s1[i]);
        }

        printf("Sorted second half \n");

        for (i = 0; i < two_s[0]; i++) {
            //printf("%d \n", array_s2[i]);
        }       

        combine_array(array_s1, array_s2, array_m, two_s[0], two_s[0]);

        printf("Time to sort and combine: %f \n", (GetTime()));

        printf("\n");

        printf("Combined and sorted sequentially via Quick Sort \n");

        for (i = 0; i < a_size; i++) {
            //printf("%d \n", array_m[i]);
        }

        check_sort(array_m, a_size);

        for (i = 0; i < a_size; i++) {
            array_m[i] = rand();
        }       
    }
    }

    //Four part array

    if (t_num == 4) {
        two_s[0] = ((a_size/2));
        two_s[1] = (a_size);
        four_s[0] = ((a_size/4));
        //two_s[1] = (a_size);
        int *array_s14 = malloc(four_s[0] * sizeof(array_s14));
        int *array_s24 = malloc(four_s[0] * sizeof(array_s24));
        int *array_s34 = malloc(four_s[0] * sizeof(array_s34));
        int *array_s44 = malloc(four_s[0] * sizeof(array_s44));
        int *array_14 = malloc(two_s[0] * sizeof(array_14));
        int *array_24 = malloc(two_s[0] * sizeof(array_24));

        printf("First quarter \n");

        for (j = 0; j < four_s[0]; j++) {
            array_s14[j] = array_m[j];
            //printf("%d \n", array_s14[j]);
        }

        printf("Second quarter \n");

        for (k = 0; k < four_s[0]; k++) {
            array_s24[k] = array_m[k + four_s[0]];
            //printf("%d \n", array_s24[k]);
        }

        printf("Third quarter \n");

        for (j = 0; j < four_s[0]; j++) {
            array_s34[j] = array_m[j + (2 * four_s[0])];
            //printf("%d \n", array_s34[j]);
        }

        printf("Fourth quarter \n");

        for (k = 0; k < four_s[0]; k++) {
            array_s44[k] = array_m[k + (3 * four_s[0])];
            //printf("%d \n", array_s44[k]);
        }

    printf("\n");

    check_sort(array_m, a_size);

    if (s_type == 'I') { //Insertion sort

        SetTime();

        insertion_sort(array_s14, four_s[0]);
        printf("Sorted first quarter \n");
        insertion_sort(array_s24, four_s[0]);
        printf("Sorted second quarter \n");
        insertion_sort(array_s34, four_s[0]);
        printf("Sorted third quarter \n");
        insertion_sort(array_s44, four_s[0]);
        printf("Sorted fourth quater \n");      

        //printf("Sorted first half \n");

        //for (i = 0; i < two_s[0]; i++) {
            //printf("%d \n", array_s1[i]);
        //}

        //printf("Sorted second half \n");

        //for (i = 0; i < two_s[0]; i++) {
            //printf("%d \n", array_s2[i]);
        //}     

        combine_array(array_s14, array_s24, array_14, four_s[0], four_s[0]);
        combine_array(array_s34, array_s44, array_24, four_s[0], four_s[0]);
        combine_array(array_14, array_24, array_m, two_s[0], two_s[0]); 

        printf("Time to sort and combine: %f \n", (GetTime()));

        printf("\n");

        printf("Combined and sorted sequentially via Insertion Sort \n");

        for (i = 0; i < a_size; i++) {
            //printf("%d \n", array_m[i]);
        }

        check_sort(array_m, a_size);

        //Start of thread section

/*      for (i = 0; i < a_size; i++) {
            array_m[i] = rand();
        }

        printf("First half \n");

        for (j = 0; j < two_s[0]; j ++) {
            array_s1[j] = array_m[j];
            firstS.array_ss[j] = array_s1[j];
        }

        firstS.arr_s = two_s[0];

        printf("Second half \n");

        for (k = two_s[0]; k < two_s[1]; k++) {
            array_s2[k - two_s[0]] = array_m[k];
            firstS1.array_ss[k] = array_s2[k - two_s[0]];
        }

        firstS1.arr_s = two_s[0];    */

        //pthread_attr_init(&attr);
        //pthread_create(&tid, &attr, insertion_sort, *firstS);         
    }

    if (s_type == 'Q') { //Quick sort

        SetTime();

        quick_sort(array_s14, four_s[0]);
        printf("Sorted first quarter \n");
        quick_sort(array_s24, four_s[0]);
        printf("Sorted second quarter \n");
        quick_sort(array_s34, four_s[0]);
        printf("Sorted third quarter \n");
        quick_sort(array_s44, four_s[0]);
        printf("Sorted fourth quarter \n");     

/*      printf("Sorted first half \n");

        for (i = 0; i < two_s[0]; i++) {
            //printf("%d \n", array_s1[i]);
        }

        printf("Sorted second half \n");

        for (i = 0; i < two_s[0]; i++) {
            //printf("%d \n", array_s2[i]);
        }    */ 

        combine_array(array_s14, array_s24, array_14, four_s[0], four_s[0]);
        combine_array(array_s34, array_s44, array_24, four_s[0], four_s[0]);
        combine_array(array_14, array_24, array_m, two_s[0], two_s[0]);

        printf("Time to sort and combine: %f \n", (GetTime()));

        printf("\n");

        printf("Combined and sorted sequentially via Quick Sort \n");

        for (i = 0; i < a_size; i++) {
            //printf("%d \n", array_m[i]);
        }

        check_sort(array_m, a_size);

        for (i = 0; i < a_size; i++) {
            array_m[i] = rand();
        }       
    }
    }   

}

我在这里看到两个错误,第一个: 在使用 argv 之前不检查 argc 的值。如果你没有给你的程序提供参数,你最终会在这里向 atoi 发送未定义的地址:

a_size = atoi(argv[1]);
t_num = atoi(argv[2]);

第二个:

    a_size = atoi(argv[1]);

atoi() returns 一个不能优于 2147483647 (2^31) 的整数,否则它会溢出并最终小于 0。

    struct sort_2 {
    int array_ss[(a_size/2)];
    int arr_s;
};

struct sort_2 firstS;
struct sort_2 firstS1;

在这里,对于 a_size = 3 000 000,您要求堆栈上有 12MB 的 RAM,这会导致您的程序发生堆栈溢出,我建议您使用 malloc()。

如果此程序具有 32 位体系结构,则该程序可能无法在您的计算机上运行。我认为您不能在这样的机器中拥有超过 4Gb 的连续阵列。在我的上它运行良好:

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>

#define N 10000000000UL

typedef int T;

int compare(const void *_a, const void *_b)
{
    const T *a = _a, *b = _b;
    if (*a > *b) return 1;
    if (*a < *b) return -1;
    return 0;
}

int main()
{
    T *b;
    int i;

    printf("Trying %lld array (%lld bytes)\n",
            (long long)N, (long long) sizeof(T) * N);
    assert(b = malloc(sizeof(T) * N));
    printf("b = %#p\n", b);
    printf("filling\n");
    for (i = 0; i < N; i++)
        b[i] = rand();
    printf("quicksorting\n");
    qsort(b, N, sizeof(T), compare);
    for (i = 0; i < N; i++)
        printf("a[%d] = %d\n", i, b[i]);
}

您可以使用 NT 的不同值。