即使使用中位数作为基准,快速排序也比合并排序慢 20 倍?
quicksort ~20 times slower than mergesort even with median used as pivot?
我通过以 3 个元素的中位数为基准实现了这个版本的快速排序:
def rearrange_array(array, begin_index, end_index):
# choose 3 random indexes in the array
index1 = randint(begin_index, int(begin_index + (end_index - begin_index)/3))
index2 = randint(int(begin_index + (end_index - begin_index)/3), int(begin_index + 2*(end_index - begin_index)/3))
index3 = randint(int(begin_index + 2*(end_index - begin_index)/3), end_index)
if array[index1] > array[index2]:
tmp = array[index1]
array[index1] = array[index2]
array[index2] = tmp
if array[index2] > array[index3]:
tmp = array[index2]
array[index2] = array[index3]
array[index3] = tmp
if array[index1] > array[index2]:
tmp = array[index1]
array[index1] = array[index2]
array[index2] = tmp
# swap index2 and begin_index
tmp = array[index2]
array[index2] = array[end_index]
array[end_index] = tmp
def partition(array, start_index, end_index): # partition = conquer
rearrange_array(array, start_index, end_index)
pivot = array[end_index] # pivot is always the right most index
i = start_index - 1
# if an element is samller than pivot swap it in front
for j in range(start_index, end_index):
if array[j] <= pivot:
i += 1
# swap
tmp = array[i]
array[i] = array[j]
array[j] = tmp
# swap the pivot with the element after i
# since i only goes forward once something smaller than the pivot is found, we know that
# everything in front of i+1 will be smaller than pivot
tmp = array[i+1]
array[i+1] = array[end_index]
array[end_index] = tmp
return i + 1
def quicksort_recursive(array, start_index, end_index):
if start_index < end_index:
pivot_index = partition(array, start_index, end_index)
quicksort_recursive(array, start_index, pivot_index - 1)
quicksort_recursive(array, pivot_index, end_index)
为了测试排序算法,我将其用于 10 万个元素的数组,耗时约 16 秒
def get_array(length, maximum):
array = []
for _ in range(length):
array.append(randint(0, maximum))
return array
sys.setrecursionlimit(sys.getrecursionlimit() * 100)
a = get_array(100000, 100)
t = time()
quicksort_recursive(a, 0, len(a) - 1)
print(time() - t)
然后我将它与这个版本的快速排序进行了比较,后者只用了大约 0.8-1 秒:
def merge(array, left, right):
# the program will only get here once we call mergesort on
# a one element array
i = j = k = 0
# put the smallest one in the array
while i < len(left) and j < len(right):
if left[i] < right[j]:
array[k] = left[i]
i += 1
else:
array[k] = right[j]
j += 1
k += 1
# if one array still has stuff left:
while i < len(left):
array[k] = left[i]
i += 1
k += 1
while j < len(right):
array[k] = right[j]
j += 1
k += 1
def mergesort_recursive(array):
# 1. devide the array in 2
if len(array) > 1:
# divide
middle_index = int(len(array)/2)
left = array[:middle_index]
right = array[middle_index:]
# conquer
mergesort_recursive(left)
mergesort_recursive(right)
# merge
merge(array, left, right)
我对此有点困惑,难道快速排序不应该比归并排序快吗?我已经在 100 个元素的数组上试过了,它确实按正确的顺序排列了元素,所以我不确定发生了什么
谢谢!
示例,未使用中位数 3。在我的系统上,快速排序 0.312 秒,自下而上合并排序 0.436 秒,自上而下合并排序 0.500 秒。请注意,对于这些类型的程序,python 比 C 或 C++ 等编译语言慢 50 多倍。
快速排序霍尔分区方案
import random
from time import time
def qsort(a, lo, hi):
if(lo >= hi):
return
p = a[(lo + hi) // 2] # pivot, any a[] except a[hi]
i = lo - 1
j = hi + 1
while(1):
while(1): # while(a[++i] < p)
i += 1
if(a[i] >= p):
break
while(1): # while(a[--j] < p)
j -= 1
if(a[j] <= p):
break
if(i >= j):
break
a[i],a[j] = a[j],a[i]
qsort(a, lo, j)
qsort(a, j+1, hi)
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
s = time()
qsort(a, 0, len(a)-1)
e = time()
print e - s
# check to see if data was sorted
f = 0
for i in range (1 ,len(a)):
if(a[i-1] > a[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")
自下而上归并排序
import random
from time import time
def sort(a):
if(len(a) < 2): # if nothing to do, return
return
b = [0] * len(a) # allocate b
mrgsrt(a, b, len(a))
def mrgsrt(a, b, n):
s = 1 # assume even pass count
if((passcnt(n) & 1) == 1): # if odd count
while(s < n): # swap pairs in place
if(a[s] < a[s-1]):
a[s-1],a[s] = a[s],a[s-1]
s = s + 2
s = 2
while(s < n):
ee = 0 # reset end index
while(ee < n): # setup for next pair of runs
ll = ee
rr = ll + s
if(rr >= n): # if only left run copy it
b[ll:n] = a[ll:n]
break
ee = rr + s
if(ee > n):
ee = n
mrg(a, b, ll, rr, ee)
a,b = b,a # swap(a, b)
s = s << 1 # double run size
def mrg(a, b, ll, rr, ee): # merge a pair of runs from a to b
o = ll # o = b[] index
l = ll # l = a[] left index
r = rr # r = a[] right index
while True:
if(a[l] <= a[r]): # if a[l] <= a[r]
b[o] = a[l] # copy a[l]
o += 1
l += 1
if(l < rr): # if not end of left run
continue # continue (back to while)
b[o:ee] = a[r:ee] # else copy rest of right run
return # and return
else: # else a[l] > a[r]
b[o] = a[r] # copy a[r]
o += 1
r += 1
if(r < ee): # if not end of right run
continue # continue (back to while)
b[o:ee] = a[l:rr] # else copy rest of left run
return # and return
def passcnt(n): # return # passes
i = 0
s = 1
while(s < n):
s = s << 1
i = i + 1
return(i)
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
c = a[:] # c = sorted copy of a
c.sort()
s = time()
sort(a)
e = time()
print e - s
# check to see if data was sorted properly
f = 0
for i in range (0, len(a)):
if(a[i] != c[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")
自顶向下归并排序
import random
from time import time
def sort(a):
if(len(a) < 2): # if nothing to do, return
return
b = [0] * len(a) # allocate b
msa2a(a, b, 0, len(a)) # merge sort a to a
def msa2a(a, b, low, end): # merge sort a to a
if((end - low) < 2): # if < 2 elements
return # return
mid = (low+end)//2 # set mid point
msa2b(a, b, low, mid) # merge sort left half to b
msa2b(a, b, mid, end) # merge sort right half to b
mrg(b, a, low, mid, end) # merge halves from b to a
def msa2b(a, b, low, end): # merge sort a to b
if((end - low) < 2): # if < 2 elements
b[low] = a[low] # copy 1 element from a to b
return # return
mid = (low+end)//2 # set mid point
msa2a(a, b, low, mid) # merge sort left half to a
msa2a(a, b, mid, end) # merge sort right half to a
mrg(a, b, low, mid, end) # merge halves from a to b
def mrg(a, b, ll, rr, ee): # merge a pair of runs from a to b
o = ll # o = b[] index
l = ll # l = a[] left index
r = rr # r = a[] right index
while True:
if(a[l] <= a[r]): # if a[l] <= a[r]
b[o] = a[l] # copy a[l]
o += 1
l += 1
if(l < rr): # if not end of left run
continue # continue (back to while)
b[o:ee] = a[r:ee] # else copy rest of right run
return # and return
else: # else a[l] > a[r]
b[o] = a[r] # copy a[r]
o += 1
r += 1
if(r < ee): # if not end of right run
continue # continue (back to while)
b[o:ee] = a[l:rr] # else copy rest of left run
return # and return
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
c = a[:] # c = sorted copy of a
c.sort()
s = time()
sort(a)
e = time()
print e - s
# check to see if data was sorted properly
f = 0
for i in range (0, len(a)):
if(a[i] != c[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")
我通过以 3 个元素的中位数为基准实现了这个版本的快速排序:
def rearrange_array(array, begin_index, end_index):
# choose 3 random indexes in the array
index1 = randint(begin_index, int(begin_index + (end_index - begin_index)/3))
index2 = randint(int(begin_index + (end_index - begin_index)/3), int(begin_index + 2*(end_index - begin_index)/3))
index3 = randint(int(begin_index + 2*(end_index - begin_index)/3), end_index)
if array[index1] > array[index2]:
tmp = array[index1]
array[index1] = array[index2]
array[index2] = tmp
if array[index2] > array[index3]:
tmp = array[index2]
array[index2] = array[index3]
array[index3] = tmp
if array[index1] > array[index2]:
tmp = array[index1]
array[index1] = array[index2]
array[index2] = tmp
# swap index2 and begin_index
tmp = array[index2]
array[index2] = array[end_index]
array[end_index] = tmp
def partition(array, start_index, end_index): # partition = conquer
rearrange_array(array, start_index, end_index)
pivot = array[end_index] # pivot is always the right most index
i = start_index - 1
# if an element is samller than pivot swap it in front
for j in range(start_index, end_index):
if array[j] <= pivot:
i += 1
# swap
tmp = array[i]
array[i] = array[j]
array[j] = tmp
# swap the pivot with the element after i
# since i only goes forward once something smaller than the pivot is found, we know that
# everything in front of i+1 will be smaller than pivot
tmp = array[i+1]
array[i+1] = array[end_index]
array[end_index] = tmp
return i + 1
def quicksort_recursive(array, start_index, end_index):
if start_index < end_index:
pivot_index = partition(array, start_index, end_index)
quicksort_recursive(array, start_index, pivot_index - 1)
quicksort_recursive(array, pivot_index, end_index)
为了测试排序算法,我将其用于 10 万个元素的数组,耗时约 16 秒
def get_array(length, maximum):
array = []
for _ in range(length):
array.append(randint(0, maximum))
return array
sys.setrecursionlimit(sys.getrecursionlimit() * 100)
a = get_array(100000, 100)
t = time()
quicksort_recursive(a, 0, len(a) - 1)
print(time() - t)
然后我将它与这个版本的快速排序进行了比较,后者只用了大约 0.8-1 秒:
def merge(array, left, right):
# the program will only get here once we call mergesort on
# a one element array
i = j = k = 0
# put the smallest one in the array
while i < len(left) and j < len(right):
if left[i] < right[j]:
array[k] = left[i]
i += 1
else:
array[k] = right[j]
j += 1
k += 1
# if one array still has stuff left:
while i < len(left):
array[k] = left[i]
i += 1
k += 1
while j < len(right):
array[k] = right[j]
j += 1
k += 1
def mergesort_recursive(array):
# 1. devide the array in 2
if len(array) > 1:
# divide
middle_index = int(len(array)/2)
left = array[:middle_index]
right = array[middle_index:]
# conquer
mergesort_recursive(left)
mergesort_recursive(right)
# merge
merge(array, left, right)
我对此有点困惑,难道快速排序不应该比归并排序快吗?我已经在 100 个元素的数组上试过了,它确实按正确的顺序排列了元素,所以我不确定发生了什么
谢谢!
示例,未使用中位数 3。在我的系统上,快速排序 0.312 秒,自下而上合并排序 0.436 秒,自上而下合并排序 0.500 秒。请注意,对于这些类型的程序,python 比 C 或 C++ 等编译语言慢 50 多倍。
快速排序霍尔分区方案
import random
from time import time
def qsort(a, lo, hi):
if(lo >= hi):
return
p = a[(lo + hi) // 2] # pivot, any a[] except a[hi]
i = lo - 1
j = hi + 1
while(1):
while(1): # while(a[++i] < p)
i += 1
if(a[i] >= p):
break
while(1): # while(a[--j] < p)
j -= 1
if(a[j] <= p):
break
if(i >= j):
break
a[i],a[j] = a[j],a[i]
qsort(a, lo, j)
qsort(a, j+1, hi)
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
s = time()
qsort(a, 0, len(a)-1)
e = time()
print e - s
# check to see if data was sorted
f = 0
for i in range (1 ,len(a)):
if(a[i-1] > a[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")
自下而上归并排序
import random
from time import time
def sort(a):
if(len(a) < 2): # if nothing to do, return
return
b = [0] * len(a) # allocate b
mrgsrt(a, b, len(a))
def mrgsrt(a, b, n):
s = 1 # assume even pass count
if((passcnt(n) & 1) == 1): # if odd count
while(s < n): # swap pairs in place
if(a[s] < a[s-1]):
a[s-1],a[s] = a[s],a[s-1]
s = s + 2
s = 2
while(s < n):
ee = 0 # reset end index
while(ee < n): # setup for next pair of runs
ll = ee
rr = ll + s
if(rr >= n): # if only left run copy it
b[ll:n] = a[ll:n]
break
ee = rr + s
if(ee > n):
ee = n
mrg(a, b, ll, rr, ee)
a,b = b,a # swap(a, b)
s = s << 1 # double run size
def mrg(a, b, ll, rr, ee): # merge a pair of runs from a to b
o = ll # o = b[] index
l = ll # l = a[] left index
r = rr # r = a[] right index
while True:
if(a[l] <= a[r]): # if a[l] <= a[r]
b[o] = a[l] # copy a[l]
o += 1
l += 1
if(l < rr): # if not end of left run
continue # continue (back to while)
b[o:ee] = a[r:ee] # else copy rest of right run
return # and return
else: # else a[l] > a[r]
b[o] = a[r] # copy a[r]
o += 1
r += 1
if(r < ee): # if not end of right run
continue # continue (back to while)
b[o:ee] = a[l:rr] # else copy rest of left run
return # and return
def passcnt(n): # return # passes
i = 0
s = 1
while(s < n):
s = s << 1
i = i + 1
return(i)
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
c = a[:] # c = sorted copy of a
c.sort()
s = time()
sort(a)
e = time()
print e - s
# check to see if data was sorted properly
f = 0
for i in range (0, len(a)):
if(a[i] != c[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")
自顶向下归并排序
import random
from time import time
def sort(a):
if(len(a) < 2): # if nothing to do, return
return
b = [0] * len(a) # allocate b
msa2a(a, b, 0, len(a)) # merge sort a to a
def msa2a(a, b, low, end): # merge sort a to a
if((end - low) < 2): # if < 2 elements
return # return
mid = (low+end)//2 # set mid point
msa2b(a, b, low, mid) # merge sort left half to b
msa2b(a, b, mid, end) # merge sort right half to b
mrg(b, a, low, mid, end) # merge halves from b to a
def msa2b(a, b, low, end): # merge sort a to b
if((end - low) < 2): # if < 2 elements
b[low] = a[low] # copy 1 element from a to b
return # return
mid = (low+end)//2 # set mid point
msa2a(a, b, low, mid) # merge sort left half to a
msa2a(a, b, mid, end) # merge sort right half to a
mrg(a, b, low, mid, end) # merge halves from a to b
def mrg(a, b, ll, rr, ee): # merge a pair of runs from a to b
o = ll # o = b[] index
l = ll # l = a[] left index
r = rr # r = a[] right index
while True:
if(a[l] <= a[r]): # if a[l] <= a[r]
b[o] = a[l] # copy a[l]
o += 1
l += 1
if(l < rr): # if not end of left run
continue # continue (back to while)
b[o:ee] = a[r:ee] # else copy rest of right run
return # and return
else: # else a[l] > a[r]
b[o] = a[r] # copy a[r]
o += 1
r += 1
if(r < ee): # if not end of right run
continue # continue (back to while)
b[o:ee] = a[l:rr] # else copy rest of left run
return # and return
# test sort
a = [random.randint(0, 2147483647) for r in range(100*1024)]
c = a[:] # c = sorted copy of a
c.sort()
s = time()
sort(a)
e = time()
print e - s
# check to see if data was sorted properly
f = 0
for i in range (0, len(a)):
if(a[i] != c[i]):
f = 1
break
if(f == 0):
print("sorted")
else:
print("error")