如何解释我旨在找到插入排序和归并排序之间阈值的实验结果?

How to explain the result of my experiment aimed to find the threshold between insertion sort and merge sort?

当我阅读Introduction to Algorithms这本书时,我遇到了这个问题。你知道小数组的插入排序比归并排序快。但是这本书问我们如何在实践中选择两种排序算法之间的阈值。所以我设计了下面的测试代码试图找到它:

#include <iostream>
#include <stdlib.h>
#include <vector>
#include <time.h>

#define N 10000
#define MAXSIZE 10000
#define RUN_TIME_FOR_TEST 1000

using std::vector;

// The parameters, begin and end, are the indices.
void insertion_sort(vector<int> &array_num, int begin, int end)
{
    for (int j = begin + 1; j <= end; j++)
    {
        int key = array_num[j];
        int i = j - 1;
        while (i > begin - 1 && array_num[i] > key)
        {
            array_num[i + 1] = array_num[i];
            i = i - 1;
        }
        array_num[i + 1] = key;
    }
}

void merge(vector<int> &array_num, int begin, int median, int end)
{
    vector<int> array_left, array_right;
    for (int i = begin; i <= median; i++) {
        array_left.push_back(array_num[i]);
    }
    for (int i = median + 1; i <= end; i++) {
        array_right.push_back(array_num[i]);
    }
    
    int i, j;
    i = j = 0;
    for (int k = begin; k <= end; k++) {
        if (i < median - begin + 1 && (j >= end - median || array_left[i] <= array_right[j])) {
            array_num[k] = array_left[i];
            i ++;
        }
        else if (j < end - median && (i >= median - begin + 1 || array_left[i] > array_right[j]))
        {
            array_num[k] = array_right[j];
            j ++;
        }
    }
}

// The parameters, begin and end, are the indices.
void merge_insertion_sort(vector<int> &array_num, int begin, int end, int threshold_merge_insert)
{
    if (begin < end) {
        int median = (begin + end) / 2;
        if (median - begin + 1 <= threshold_merge_insert) {
            insertion_sort(array_num, begin, median);
        }
        else
            merge_insertion_sort(array_num, begin, median, threshold_merge_insert);
        if (end - median <= threshold_merge_insert) {
            insertion_sort(array_num, median + 1, end);
        }
        else
            merge_insertion_sort(array_num, median + 1, end, threshold_merge_insert);
        merge(array_num, begin, median, end);
    }
    return;
}

int main()
{
    vector<int> array_num;
    for (int i = 0; i < N; i++) {
        int rand_num = rand() % MAXSIZE + 1;
        array_num.push_back(rand_num);
    }
    
//    std::cout << "The numbers in the array are: ";
//    for (int i = 0; i < N; i++) {
//        std::cout << array_num[i] << ' ';
//    }
    
//    insertion_sort(array_num, 0, N - 1);
    
    std::cout << "Test begins:";
    clock_t begin, end;
    for (int i = 0; i < N; i += 100) {
        vector<int> array_num_copy(array_num);
        begin = clock();
        for (int j = 0; j < RUN_TIME_FOR_TEST; j++) {
            merge_insertion_sort(array_num_copy, 0, N - 1, i);
        }
        end = clock();
        std::cout << "\nWhen the threshold between merge and insertion sort is " << i << ", the runtime is " << int(end - begin) / RUN_TIME_FOR_TEST << ".";
    }
    
//    std::cout << "\nAfter Merge Sort, they are: ";
//    for (int i = 0; i < N; i++) {
//        std::cout << array_num[i] << ' ';
//    }
    
    getchar();
}

然后我得到如下输出:

Test begins:
When the threshold between merge and insertion sort is 0, the runtime is 28639.
When the threshold between merge and insertion sort is 100, the runtime is 6509.
When the threshold between merge and insertion sort is 200, the runtime is 5286.
When the threshold between merge and insertion sort is 300, the runtime is 5322.
When the threshold between merge and insertion sort is 400, the runtime is 4264.
When the threshold between merge and insertion sort is 500, the runtime is 4263.
When the threshold between merge and insertion sort is 600, the runtime is 4267.
When the threshold between merge and insertion sort is 700, the runtime is 3365.
When the threshold between merge and insertion sort is 800, the runtime is 3366.
When the threshold between merge and insertion sort is 900, the runtime is 3367.
When the threshold between merge and insertion sort is 1000, the runtime is 3359.
When the threshold between merge and insertion sort is 1100, the runtime is 3368.
When the threshold between merge and insertion sort is 1200, the runtime is 3377.
When the threshold between merge and insertion sort is 1300, the runtime is 2527.
When the threshold between merge and insertion sort is 1400, the runtime is 2563.
When the threshold between merge and insertion sort is 1500, the runtime is 2528.
When the threshold between merge and insertion sort is 1600, the runtime is 2531.
When the threshold between merge and insertion sort is 1700, the runtime is 2583.
When the threshold between merge and insertion sort is 1800, the runtime is 2618.
When the threshold between merge and insertion sort is 1900, the runtime is 2543.
When the threshold between merge and insertion sort is 2000, the runtime is 2532.
When the threshold between merge and insertion sort is 2100, the runtime is 2539.
When the threshold between merge and insertion sort is 2200, the runtime is 2544.
When the threshold between merge and insertion sort is 2300, the runtime is 2533.
When the threshold between merge and insertion sort is 2400, the runtime is 2532.
When the threshold between merge and insertion sort is 2500, the runtime is 1738.
When the threshold between merge and insertion sort is 2600, the runtime is 1743.
When the threshold between merge and insertion sort is 2700, the runtime is 1842.
When the threshold between merge and insertion sort is 2800, the runtime is 1839.
When the threshold between merge and insertion sort is 2900, the runtime is 1792.
When the threshold between merge and insertion sort is 3000, the runtime is 1741.
When the threshold between merge and insertion sort is 3100, the runtime is 1740.
When the threshold between merge and insertion sort is 3200, the runtime is 1764.
When the threshold between merge and insertion sort is 3300, the runtime is 1762.
When the threshold between merge and insertion sort is 3400, the runtime is 1737.
When the threshold between merge and insertion sort is 3500, the runtime is 1743.
When the threshold between merge and insertion sort is 3600, the runtime is 1742.
When the threshold between merge and insertion sort is 3700, the runtime is 1741.
When the threshold between merge and insertion sort is 3800, the runtime is 1745.
When the threshold between merge and insertion sort is 3900, the runtime is 1755.
When the threshold between merge and insertion sort is 4000, the runtime is 1748.
When the threshold between merge and insertion sort is 4100, the runtime is 1742.
When the threshold between merge and insertion sort is 4200, the runtime is 1741.
When the threshold between merge and insertion sort is 4300, the runtime is 1746.
When the threshold between merge and insertion sort is 4400, the runtime is 1746.
When the threshold between merge and insertion sort is 4500, the runtime is 1742.
When the threshold between merge and insertion sort is 4600, the runtime is 1751.
When the threshold between merge and insertion sort is 4700, the runtime is 1741.
When the threshold between merge and insertion sort is 4800, the runtime is 1738.
When the threshold between merge and insertion sort is 4900, the runtime is 1746.
When the threshold between merge and insertion sort is 5000, the runtime is 1006.
When the threshold between merge and insertion sort is 5100, the runtime is 1019.
When the threshold between merge and insertion sort is 5200, the runtime is 997.
When the threshold between merge and insertion sort is 5300, the runtime is 1002.
When the threshold between merge and insertion sort is 5400, the runtime is 1000.
When the threshold between merge and insertion sort is 5500, the runtime is 1001.
When the threshold between merge and insertion sort is 5600, the runtime is 1000.
When the threshold between merge and insertion sort is 5700, the runtime is 998.
When the threshold between merge and insertion sort is 5800, the runtime is 1002.
When the threshold between merge and insertion sort is 5900, the runtime is 1003.
When the threshold between merge and insertion sort is 6000, the runtime is 1001.
When the threshold between merge and insertion sort is 6100, the runtime is 998.
When the threshold between merge and insertion sort is 6200, the runtime is 997.
When the threshold between merge and insertion sort is 6300, the runtime is 1001.
When the threshold between merge and insertion sort is 6400, the runtime is 1003.
When the threshold between merge and insertion sort is 6500, the runtime is 997.
When the threshold between merge and insertion sort is 6600, the runtime is 998.
When the threshold between merge and insertion sort is 6700, the runtime is 997.
When the threshold between merge and insertion sort is 6800, the runtime is 1003.
When the threshold between merge and insertion sort is 6900, the runtime is 1000.
When the threshold between merge and insertion sort is 7000, the runtime is 998.
When the threshold between merge and insertion sort is 7100, the runtime is 998.
When the threshold between merge and insertion sort is 7200, the runtime is 997.
When the threshold between merge and insertion sort is 7300, the runtime is 1003.
When the threshold between merge and insertion sort is 7400, the runtime is 1000.
When the threshold between merge and insertion sort is 7500, the runtime is 998.
When the threshold between merge and insertion sort is 7600, the runtime is 994.
When the threshold between merge and insertion sort is 7700, the runtime is 996.
When the threshold between merge and insertion sort is 7800, the runtime is 1003.
When the threshold between merge and insertion sort is 7900, the runtime is 1000.
When the threshold between merge and insertion sort is 8000, the runtime is 1002.
When the threshold between merge and insertion sort is 8100, the runtime is 998.
When the threshold between merge and insertion sort is 8200, the runtime is 998.
When the threshold between merge and insertion sort is 8300, the runtime is 1002.
When the threshold between merge and insertion sort is 8400, the runtime is 1000.
When the threshold between merge and insertion sort is 8500, the runtime is 999.
When the threshold between merge and insertion sort is 8600, the runtime is 999.
When the threshold between merge and insertion sort is 8700, the runtime is 998.
When the threshold between merge and insertion sort is 8800, the runtime is 1004.
When the threshold between merge and insertion sort is 8900, the runtime is 999.
When the threshold between merge and insertion sort is 9000, the runtime is 1002.
When the threshold between merge and insertion sort is 9100, the runtime is 1005.
When the threshold between merge and insertion sort is 9200, the runtime is 1001.
When the threshold between merge and insertion sort is 9300, the runtime is 1004.
When the threshold between merge and insertion sort is 9400, the runtime is 1003.
When the threshold between merge and insertion sort is 9500, the runtime is 1002.
When the threshold between merge and insertion sort is 9600, the runtime is 1000.
When the threshold between merge and insertion sort is 9700, the runtime is 997.
When the threshold between merge and insertion sort is 9800, the runtime is 1002.
When the threshold between merge and insertion sort is 9900, the runtime is 999.
Program ended with exit code: 0

从结果可以看出,当我们选择对更长的数组使用插入排序时,运行时间似乎变小了。我猜阈值可能更大,但是数组中的元素数量已经是10,000。有人可以帮我解释一下结果吗?

您的基准测试有一个主要问题:

  • array_num_copy 在内循环之外初始化:因此它在第一次迭代中未排序,但已经为该内循环的其余部分排序,因此有 99.9% 的时间。在排序的数组上重复 merge_insertion_sort 会引入强烈的偏差:insertion_sort 在排序的切片上是最优的,这解释了为什么使用大阈值可以获得更好的计时。在调用 merge_insertion_sort.
  • 之前,您应该始终复制原始数组

另请注意以下改进领域:

  • merge_insertion_sort 中的测试次优:您应该测试切片长度是否低于阈值并在整个切片上调用 insertion_sort

  • merge中的算法可以改进:你应该只保存切片的左半部分,直接将向量array_left分配到它的最终大小并使用array_left[i - begin] = array_num[i] 而不是更昂贵的 array_left.push_back(array_num[i]).

  • 你只测试一个数组大小和一个伪随机分布:对于不同的分布你会得到不同的结果找到一个好的阈值需要测试不同的分布并猜测它们在真实案例。缓存的影响也可能使结果产生偏差。

  • 您为阈值测试的值过于分散:您应该测试更多的小值和更少的大值。

这是一个修改版本,使用 end 作为第一个排除元素的索引,并对算法进行了各种改进:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <vector>

using std::vector;

// begin and end are the indices: begin is included, end is excluded
void insertion_sort(vector<int> &array_num, int begin, int end) {
    for (int j = begin + 1; j < end; j++) {
        int key = array_num[j];
        int i = j - 1;
        while (i >= begin && array_num[i] > key) {
            array_num[i + 1] = array_num[i];
            i = i - 1;
        }
        array_num[i + 1] = key;
    }
}

// begin, median and end are the indices:
// begin is included, median is the start of the right half, end is excluded
void merge(vector<int> &array_num, int begin, int median, int end) {
    int n1 = median - begin;
    vector<int> array_left(n1);

    for (int i = 0; i < n1; i++) {
        array_left[i] = array_num[begin + i];
    }

    for (int i = 0, j = median, k = begin; i < n1;) {
        if (j >= end || array_left[i] <= array_num[j]) {
            array_num[k++] = array_left[i++];
        } else {
            array_num[k++] = array_num[j++];
        }
    }
}

// begin and end are the indices: begin is included, end is excluded
void merge_insertion_sort(vector<int> &array_num, int begin, int end, int threshold_merge_insert)
{
    if (end - begin <= threshold_merge_insert) {
        insertion_sort(array_num, begin, end);
    } else {
        int median = begin + (end - begin) / 2;
        merge_insertion_sort(array_num, begin, median, threshold_merge_insert);
        merge_insertion_sort(array_num, median, end, threshold_merge_insert);
        merge(array_num, begin, median, end);
    }
}

#define N 10000
#define MAXVALUE 10000
#define MIN_THRESHOLD 1
#define MAX_THRESHOLD 10000
#define REPEAT_COUNT 100

int main() {
    vector<int> array_num;
    vector<int> array_num_rand(N);

    for (int i = 0; i < N; i++) {
        array_num_rand[i] = rand() % MAXVALUE + 1;
    }

    //    std::cout << "The numbers in the array are: ";
    //    for (int i = 0; i < N; i++) {
    //        std::cout << array_num[i] << ' ';
    //    }

    for (int i = MIN_THRESHOLD; i < MAX_THRESHOLD; i += (i + 9) / 10) {
        int sorted = 1;
        clock_t begin = clock();
        for (int j = 0; j < REPEAT_COUNT; j++) {
            array_num = array_num_rand;
            merge_insertion_sort(array_num, 0, N, i);
        }
        clock_t end = clock();
        for (int j = 1; j < N; j++) {
            if (array_num[j - 1] > array_num[j]) {
                printf("merge_insertion_sort(array_num, 0, %d, %d) -> sort error at %d\n",
                       N, i, j);
                sorted = 0;
                break;
            }
        }
        if (sorted) {
            printf("merge_insertion_sort(array_num, 0, %d, %d) -> %.0fus\n",
                   N, i, (end - begin) * 1000000. / CLOCKS_PER_SEC / REPEAT_COUNT);
        }
    }

    printf("Press enter to finish\n");
    getchar();
    return 0;
}

样本运行:

merge_insertion_sort(array_num, 0, 10000, 1) -> 1510us
merge_insertion_sort(array_num, 0, 10000, 2) -> 1217us
merge_insertion_sort(array_num, 0, 10000, 3) -> 1062us
merge_insertion_sort(array_num, 0, 10000, 4) -> 1054us
merge_insertion_sort(array_num, 0, 10000, 5) -> 927us
merge_insertion_sort(array_num, 0, 10000, 6) -> 899us
merge_insertion_sort(array_num, 0, 10000, 7) -> 876us
merge_insertion_sort(array_num, 0, 10000, 8) -> 896us
merge_insertion_sort(array_num, 0, 10000, 9) -> 828us
merge_insertion_sort(array_num, 0, 10000, 10) -> 706us
merge_insertion_sort(array_num, 0, 10000, 11) -> 680us
merge_insertion_sort(array_num, 0, 10000, 13) -> 687us
merge_insertion_sort(array_num, 0, 10000, 15) -> 709us
merge_insertion_sort(array_num, 0, 10000, 17) -> 685us
merge_insertion_sort(array_num, 0, 10000, 19) -> 680us
merge_insertion_sort(array_num, 0, 10000, 21) -> 627us
merge_insertion_sort(array_num, 0, 10000, 24) -> 612us
merge_insertion_sort(array_num, 0, 10000, 27) -> 653us
merge_insertion_sort(array_num, 0, 10000, 30) -> 591us
merge_insertion_sort(array_num, 0, 10000, 33) -> 641us
merge_insertion_sort(array_num, 0, 10000, 37) -> 622us
merge_insertion_sort(array_num, 0, 10000, 41) -> 531us
merge_insertion_sort(array_num, 0, 10000, 46) -> 562us
merge_insertion_sort(array_num, 0, 10000, 51) -> 538us
merge_insertion_sort(array_num, 0, 10000, 57) -> 572us
merge_insertion_sort(array_num, 0, 10000, 63) -> 560us
merge_insertion_sort(array_num, 0, 10000, 70) -> 542us
merge_insertion_sort(array_num, 0, 10000, 77) -> 566us
merge_insertion_sort(array_num, 0, 10000, 85) -> 526us
merge_insertion_sort(array_num, 0, 10000, 94) -> 532us
merge_insertion_sort(array_num, 0, 10000, 104) -> 533us
merge_insertion_sort(array_num, 0, 10000, 115) -> 561us
merge_insertion_sort(array_num, 0, 10000, 127) -> 536us
merge_insertion_sort(array_num, 0, 10000, 140) -> 538us
merge_insertion_sort(array_num, 0, 10000, 154) -> 543us
merge_insertion_sort(array_num, 0, 10000, 170) -> 576us
merge_insertion_sort(array_num, 0, 10000, 187) -> 570us
merge_insertion_sort(array_num, 0, 10000, 206) -> 571us
merge_insertion_sort(array_num, 0, 10000, 227) -> 630us
merge_insertion_sort(array_num, 0, 10000, 250) -> 574us
merge_insertion_sort(array_num, 0, 10000, 275) -> 611us
merge_insertion_sort(array_num, 0, 10000, 303) -> 596us
merge_insertion_sort(array_num, 0, 10000, 334) -> 680us
merge_insertion_sort(array_num, 0, 10000, 368) -> 703us
merge_insertion_sort(array_num, 0, 10000, 405) -> 708us
merge_insertion_sort(array_num, 0, 10000, 446) -> 675us
merge_insertion_sort(array_num, 0, 10000, 491) -> 709us
merge_insertion_sort(array_num, 0, 10000, 541) -> 690us
merge_insertion_sort(array_num, 0, 10000, 596) -> 675us
merge_insertion_sort(array_num, 0, 10000, 656) -> 1017us
merge_insertion_sort(array_num, 0, 10000, 722) -> 997us
merge_insertion_sort(array_num, 0, 10000, 795) -> 1000us
merge_insertion_sort(array_num, 0, 10000, 875) -> 1023us
merge_insertion_sort(array_num, 0, 10000, 963) -> 1002us
merge_insertion_sort(array_num, 0, 10000, 1060) -> 975us
merge_insertion_sort(array_num, 0, 10000, 1166) -> 990us
merge_insertion_sort(array_num, 0, 10000, 1283) -> 1594us
merge_insertion_sort(array_num, 0, 10000, 1412) -> 1578us
merge_insertion_sort(array_num, 0, 10000, 1554) -> 1561us
merge_insertion_sort(array_num, 0, 10000, 1710) -> 1636us
merge_insertion_sort(array_num, 0, 10000, 1881) -> 1712us
merge_insertion_sort(array_num, 0, 10000, 2070) -> 1583us
merge_insertion_sort(array_num, 0, 10000, 2277) -> 1558us
merge_insertion_sort(array_num, 0, 10000, 2505) -> 2896us
merge_insertion_sort(array_num, 0, 10000, 2756) -> 2867us
merge_insertion_sort(array_num, 0, 10000, 3032) -> 2868us
merge_insertion_sort(array_num, 0, 10000, 3336) -> 2825us
merge_insertion_sort(array_num, 0, 10000, 3670) -> 2915us
merge_insertion_sort(array_num, 0, 10000, 4037) -> 2864us
merge_insertion_sort(array_num, 0, 10000, 4441) -> 2928us
merge_insertion_sort(array_num, 0, 10000, 4886) -> 2968us
merge_insertion_sort(array_num, 0, 10000, 5375) -> 5561us
merge_insertion_sort(array_num, 0, 10000, 5913) -> 5688us
merge_insertion_sort(array_num, 0, 10000, 6505) -> 5615us
merge_insertion_sort(array_num, 0, 10000, 7156) -> 5576us
merge_insertion_sort(array_num, 0, 10000, 7872) -> 5542us
merge_insertion_sort(array_num, 0, 10000, 8660) -> 5543us
merge_insertion_sort(array_num, 0, 10000, 9526) -> 5554us
Press enter to finish

没有明显的最佳 阈值,但 30 到 200 之间的值会产生更好的计时。使用更大的阈值将进一步改善部分或完全排序的数组的计时。