即使使用中位数作为基准，快速排序也比合并排序慢 20 倍？

Question

我通过以 3 个元素的中位数为基准实现了这个版本的快速排序：

def rearrange_array(array, begin_index, end_index):
    # choose 3 random indexes in the array
    index1 = randint(begin_index, int(begin_index + (end_index - begin_index)/3))
    index2 = randint(int(begin_index + (end_index - begin_index)/3), int(begin_index + 2*(end_index - begin_index)/3))
    index3 = randint(int(begin_index + 2*(end_index - begin_index)/3), end_index)

    if array[index1] > array[index2]:
        tmp = array[index1]
        array[index1] = array[index2]
        array[index2] = tmp
    if array[index2] > array[index3]:
        tmp = array[index2]
        array[index2] = array[index3]
        array[index3] = tmp
    if array[index1] > array[index2]:
        tmp = array[index1]
        array[index1] = array[index2]
        array[index2] = tmp

    # swap index2 and begin_index
    tmp = array[index2]
    array[index2] = array[end_index]
    array[end_index] = tmp

def partition(array, start_index, end_index): # partition = conquer
    rearrange_array(array, start_index, end_index)
    pivot = array[end_index] # pivot is always the right most index
    i = start_index - 1
    # if an element is samller than pivot swap it in front
    for j in range(start_index, end_index):
        if array[j] <= pivot:
            i += 1
            # swap
            tmp = array[i]
            array[i] = array[j]
            array[j] = tmp
    # swap the pivot with the element after i
    # since i only goes forward once something smaller than the pivot is found, we know that
    # everything in front of i+1 will be smaller than pivot
    tmp = array[i+1]
    array[i+1] = array[end_index]
    array[end_index] = tmp
    return i + 1

def quicksort_recursive(array, start_index, end_index):
    if start_index < end_index:
        pivot_index = partition(array, start_index, end_index)
        quicksort_recursive(array, start_index, pivot_index - 1)
        quicksort_recursive(array, pivot_index, end_index)

为了测试排序算法，我将其用于 10 万个元素的数组，耗时约 16 秒

def get_array(length, maximum):
    array = []
    for _ in range(length):
        array.append(randint(0, maximum))
    return array
sys.setrecursionlimit(sys.getrecursionlimit() * 100)
a = get_array(100000, 100)
t = time()
quicksort_recursive(a, 0, len(a) - 1)
print(time() - t)

然后我将它与这个版本的快速排序进行了比较，后者只用了大约 0.8-1 秒：

def merge(array, left, right):
    # the program will only get here once we call mergesort on
    # a one element array
    i = j = k = 0
    # put the smallest one in the array
    while i < len(left) and j < len(right):
        if left[i] < right[j]:
            array[k] = left[i]
            i += 1
        else:
            array[k] = right[j]
            j += 1
        k += 1
    # if one array still has stuff left:
    while i < len(left):
        array[k] = left[i]
        i += 1
        k += 1
    while j < len(right):
        array[k] = right[j]
        j += 1
        k += 1

def mergesort_recursive(array):
    # 1. devide the array in 2
    if len(array) > 1:
        # divide
        middle_index = int(len(array)/2)
        left = array[:middle_index]
        right = array[middle_index:]
        # conquer
        mergesort_recursive(left)
        mergesort_recursive(right)
        # merge
        merge(array, left, right)

我对此有点困惑，难道快速排序不应该比归并排序快吗？我已经在 100 个元素的数组上试过了，它确实按正确的顺序排列了元素，所以我不确定发生了什么

谢谢！

Answer 1

示例，未使用中位数 3。在我的系统上，快速排序 0.312 秒，自下而上合并排序 0.436 秒，自上而下合并排序 0.500 秒。请注意，对于这些类型的程序，python 比 C 或 C++ 等编译语言慢 50 多倍。

快速排序霍尔分区方案

import random
from time import time

def qsort(a, lo, hi):
    if(lo >= hi):
        return
    p = a[(lo + hi) // 2]       # pivot, any a[] except a[hi]
    i = lo - 1
    j = hi + 1
    while(1):
        while(1):               # while(a[++i] < p)
            i += 1
            if(a[i] >= p):
                break
        while(1):               # while(a[--j] < p)
            j -= 1
            if(a[j] <= p):
                break
        if(i >= j):
            break
        a[i],a[j] = a[j],a[i]
    qsort(a, lo, j)
    qsort(a, j+1, hi)

# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
s = time()
qsort(a, 0, len(a)-1)
e = time()
print e - s

# check to see if data was sorted
f = 0
for i in range (1 ,len(a)):
    if(a[i-1] > a[i]):
        f = 1
        break
if(f == 0):
    print("sorted")
else:
    print("error")

自下而上归并排序

import random
from time import time

def sort(a):
    if(len(a) < 2):                     # if nothing to do, return
        return
    b = [0] * len(a)                    # allocate b
    mrgsrt(a, b, len(a))

def mrgsrt(a, b, n):
    s = 1                               # assume even pass count
    if((passcnt(n) & 1) == 1):          #  if odd count
        while(s < n):                   #   swap pairs in place
            if(a[s] < a[s-1]):
                a[s-1],a[s] = a[s],a[s-1]
            s = s + 2
        s = 2
    while(s < n):
        ee = 0                          # reset end index
        while(ee < n):                  # setup for next pair of runs
            ll = ee
            rr = ll + s
            if(rr >= n):                #  if only left run copy it
                b[ll:n] = a[ll:n]
                break
            ee = rr + s
            if(ee > n):
                ee = n
            mrg(a, b, ll, rr, ee)
        a,b = b,a                       # swap(a, b)
        s = s << 1                      # double run size

def mrg(a, b, ll, rr, ee):              # merge a pair of runs from a to b
    o = ll                              # o = b[]        index
    l = ll                              # l = a[] left   index
    r = rr                              # r = a[] right  index
    while True:
        if(a[l] <= a[r]):               # if a[l] <= a[r]
            b[o] = a[l]                 #   copy a[l]
            o += 1
            l += 1
            if(l < rr):                 #   if not end of left run
                continue                #     continue (back to while)
            b[o:ee] = a[r:ee]           #   else copy rest of right run
            return                      #     and return
        else:                           # else a[l] > a[r]
            b[o] = a[r]                 #   copy a[r]
            o += 1
            r += 1
            if(r < ee):                 #   if not end of right run
                continue                #     continue (back to while)
            b[o:ee] = a[l:rr]           #   else copy rest of left run
            return                      #     and return

def passcnt(n):                         # return # passes
    i = 0
    s = 1
    while(s < n):
        s = s << 1
        i = i + 1
    return(i)

# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
c = a[:]                                # c = sorted copy of a
c.sort()
s = time()
sort(a)
e = time()
print e - s

# check to see if data was sorted properly
f = 0
for i in range (0, len(a)):
    if(a[i] != c[i]):
        f = 1
        break
if(f == 0):
    print("sorted")
else:
    print("error")

自顶向下归并排序

import random
from time import time

def sort(a):
    if(len(a) < 2):                     # if nothing to do, return
        return
    b = [0] * len(a)                    # allocate b
    msa2a(a, b, 0, len(a))              # merge sort a to a

def msa2a(a, b, low, end):              # merge sort a to a
    if((end - low) < 2):                # if < 2 elements
        return                          #   return
    mid = (low+end)//2                  # set mid point
    msa2b(a, b, low, mid)               # merge sort left  half to b
    msa2b(a, b, mid, end)               # merge sort right half to b
    mrg(b, a, low, mid, end)            # merge halves   from b to a

def msa2b(a, b, low, end):              # merge sort a to b
    if((end - low) < 2):                # if < 2 elements
        b[low] = a[low]                 #   copy 1 element from a to b
        return                          #   return
    mid = (low+end)//2                  # set mid point
    msa2a(a, b, low, mid)               # merge sort left  half to a
    msa2a(a, b, mid, end)               # merge sort right half to a
    mrg(a, b, low, mid, end)            # merge halves   from a to b

def mrg(a, b, ll, rr, ee):              # merge a pair of runs from a to b
    o = ll                              # o = b[]        index
    l = ll                              # l = a[] left   index
    r = rr                              # r = a[] right  index
    while True:
        if(a[l] <= a[r]):               # if a[l] <= a[r]
            b[o] = a[l]                 #   copy a[l]
            o += 1
            l += 1
            if(l < rr):                 #   if not end of left run
                continue                #     continue (back to while)
            b[o:ee] = a[r:ee]           #   else copy rest of right run
            return                      #     and return
        else:                           # else a[l] > a[r]
            b[o] = a[r]                 #   copy a[r]
            o += 1
            r += 1
            if(r < ee):                 #   if not end of right run
                continue                #     continue (back to while)
            b[o:ee] = a[l:rr]           #   else copy rest of left run
            return                      #     and return

# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
c = a[:]                                # c = sorted copy of a
c.sort()
s = time()
sort(a)
e = time()
print e - s

# check to see if data was sorted properly
f = 0
for i in range (0, len(a)):
    if(a[i] != c[i]):
        f = 1
        break
if(f == 0):
    print("sorted")
else:
    print("error")

即使使用中位数作为基准，快速排序也比合并排序慢 20 倍？

quicksort ~20 times slower than mergesort even with median used as pivot?

python

sorting

algorithm

mergesort

quicksort