if/else 内的全局范围对于 运行 中位数
Global scope inside if/else for a running median
我正在使用两个堆在 Python 中实现 运行 中值算法。但是,即使我向它们推送,堆的大小也不会增加...
我怀疑这与 if/else
语句中的范围界定有关。
我不太明白如何解决这个问题。
import os
import numpy
import functools
import heapq
from heapq import heapify, heappush, heappop
@functools.total_ordering
class ReverseCompare(object):
def __init__(self, obj):
self.obj = obj
def __eq__(self, other):
return isinstance(other, ReverseCompare) and self.obj == other.obj
def __le__(self, other):
return isinstance(other, ReverseCompare) and self.obj >= other.obj
def __str__(self):
return str(self.obj)
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, self.obj)
curMedian = 0
leftHeap = map(ReverseCompare, [])
rightHeap = []
heapq.heapify(rightHeap)
heapq.heapify(leftHeap)
def runningMed(n):
#importing the global variables
global curMedian
global leftHeap
global rightHeap
#The first time
if (curMedian == 0):
curMedian = n
return curMedian
#the second +... time
# print "debug"
# print rightHeap
# print leftHeap
# heapq.heappush(leftHeap, 3)
# heapq.heappush(rightHeap, 3)
# print rightHeap
print "length of heaps"
print len(rightHeap)
print len(leftHeap)
if (len(rightHeap) > len(leftHeap) + 2):
print "Debug long right heap"
if(n >= curMedian):
heapq.heappush(leftHeap, curMedian)
curMedian = heapq.heappop(rightHeap)
heappop.heappush(rightHeap, n)
else:
heapq.heappush(leftHeap, n)
elif (len(leftHeap) > len(rightHeap) + 2):
print "Debug long"
if(n <= curMedian):
heapq.heappush(rightHeap, curMedian)
curMedian = heapq.heappop(leftHeap)
heappop.heappush(leftHeap, n)
else:
heapq.heappush(rightHeap,n)
else:
print "Debug curMedian"
print n
print curMedian
if (n > curMedian):
heapq.heappush(rightHeap, n)
else:
heapq.heappush(leftHeap,n)
#TReturn the median:
if (len(leftHeap) == len(rightHeap)):
return curMedian
elif (len(leftHeap) > len(rightHeap)):
return (heapq.heappop(leftHeap) + curMedian)/2
else:
return (heapq.heappop(rightHeap) + curMedian)/2
if __name__ == "__main__":
#TODO: Input/output names must be changed
inputFile = open('numbers.txt', 'r')
outputFile = open('output.txt', 'w')
for line in inputFile:
num = int(line.rstrip('\n'))
med = runningMed(num)
outputFile.write(str(med) + '\n')
inputFile.close()
outputFile.close()
与作用域无关。堆不会增长,因为您在最后弹出新添加的元素:
return (heapq.heappop(leftHeap) + curMedian)/2
else:
return (heapq.heappop(rightHeap) + curMedian)/2
只看 max/min 元素而不弹出它:
return (leftHeap[0] + curMedian)/2
else:
return (rightHeap[0] + curMedian)/2
我在评论中提到的自己的版本:
from heapq import heappush, heappop
left, right = [], []
def runmed(n):
global left, right
if len(left) <= len(right):
heappush(left, -n)
else:
heappush(right, n)
if right and -left[0] > right[0]:
heappush(left, -heappop(right))
heappush(right, -heappop(left))
if len(left) > len(right):
return -left[0]
return (right[0] - left[0]) / 2.0
left
是较小一半数字的最大堆,包含取反以获得最大堆功能的那些数字。
right
是较大一半数字的最小堆。
left
始终与 right
大小相同或大一倍。
测试代码:
import random
numbers = []
for _ in range(15):
n = random.randrange(100)
numbers.append(n)
print '{:<4} is median of {}'.format(runmed(n), sorted(numbers))
输出:
38 is median of [38]
27.5 is median of [17, 38]
38 is median of [17, 38, 79]
27.5 is median of [4, 17, 38, 79]
17 is median of [4, 12, 17, 38, 79]
27.5 is median of [4, 12, 17, 38, 63, 79]
38 is median of [4, 12, 17, 38, 63, 69, 79]
35.0 is median of [4, 12, 17, 32, 38, 63, 69, 79]
38 is median of [4, 12, 17, 32, 38, 39, 63, 69, 79]
38.5 is median of [4, 12, 17, 32, 38, 39, 63, 69, 79, 82]
39 is median of [4, 12, 17, 32, 38, 39, 47, 63, 69, 79, 82]
38.5 is median of [4, 12, 17, 21, 32, 38, 39, 47, 63, 69, 79, 82]
38 is median of [4, 12, 17, 21, 25, 32, 38, 39, 47, 63, 69, 79, 82]
35.0 is median of [4, 12, 14, 17, 21, 25, 32, 38, 39, 47, 63, 69, 79, 82]
38 is median of [4, 12, 14, 17, 21, 25, 32, 38, 39, 47, 62, 63, 69, 79, 82]
Python 全局范围可能有点挑剔。如果将其包装在 class 中,每个实例都有自己的 leftHeap、rightHeap 和 curMedian,生活可能会更轻松。
我正在使用两个堆在 Python 中实现 运行 中值算法。但是,即使我向它们推送,堆的大小也不会增加...
我怀疑这与 if/else
语句中的范围界定有关。
我不太明白如何解决这个问题。
import os
import numpy
import functools
import heapq
from heapq import heapify, heappush, heappop
@functools.total_ordering
class ReverseCompare(object):
def __init__(self, obj):
self.obj = obj
def __eq__(self, other):
return isinstance(other, ReverseCompare) and self.obj == other.obj
def __le__(self, other):
return isinstance(other, ReverseCompare) and self.obj >= other.obj
def __str__(self):
return str(self.obj)
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, self.obj)
curMedian = 0
leftHeap = map(ReverseCompare, [])
rightHeap = []
heapq.heapify(rightHeap)
heapq.heapify(leftHeap)
def runningMed(n):
#importing the global variables
global curMedian
global leftHeap
global rightHeap
#The first time
if (curMedian == 0):
curMedian = n
return curMedian
#the second +... time
# print "debug"
# print rightHeap
# print leftHeap
# heapq.heappush(leftHeap, 3)
# heapq.heappush(rightHeap, 3)
# print rightHeap
print "length of heaps"
print len(rightHeap)
print len(leftHeap)
if (len(rightHeap) > len(leftHeap) + 2):
print "Debug long right heap"
if(n >= curMedian):
heapq.heappush(leftHeap, curMedian)
curMedian = heapq.heappop(rightHeap)
heappop.heappush(rightHeap, n)
else:
heapq.heappush(leftHeap, n)
elif (len(leftHeap) > len(rightHeap) + 2):
print "Debug long"
if(n <= curMedian):
heapq.heappush(rightHeap, curMedian)
curMedian = heapq.heappop(leftHeap)
heappop.heappush(leftHeap, n)
else:
heapq.heappush(rightHeap,n)
else:
print "Debug curMedian"
print n
print curMedian
if (n > curMedian):
heapq.heappush(rightHeap, n)
else:
heapq.heappush(leftHeap,n)
#TReturn the median:
if (len(leftHeap) == len(rightHeap)):
return curMedian
elif (len(leftHeap) > len(rightHeap)):
return (heapq.heappop(leftHeap) + curMedian)/2
else:
return (heapq.heappop(rightHeap) + curMedian)/2
if __name__ == "__main__":
#TODO: Input/output names must be changed
inputFile = open('numbers.txt', 'r')
outputFile = open('output.txt', 'w')
for line in inputFile:
num = int(line.rstrip('\n'))
med = runningMed(num)
outputFile.write(str(med) + '\n')
inputFile.close()
outputFile.close()
与作用域无关。堆不会增长,因为您在最后弹出新添加的元素:
return (heapq.heappop(leftHeap) + curMedian)/2
else:
return (heapq.heappop(rightHeap) + curMedian)/2
只看 max/min 元素而不弹出它:
return (leftHeap[0] + curMedian)/2
else:
return (rightHeap[0] + curMedian)/2
我在评论中提到的自己的版本:
from heapq import heappush, heappop
left, right = [], []
def runmed(n):
global left, right
if len(left) <= len(right):
heappush(left, -n)
else:
heappush(right, n)
if right and -left[0] > right[0]:
heappush(left, -heappop(right))
heappush(right, -heappop(left))
if len(left) > len(right):
return -left[0]
return (right[0] - left[0]) / 2.0
left
是较小一半数字的最大堆,包含取反以获得最大堆功能的那些数字。right
是较大一半数字的最小堆。left
始终与right
大小相同或大一倍。
测试代码:
import random
numbers = []
for _ in range(15):
n = random.randrange(100)
numbers.append(n)
print '{:<4} is median of {}'.format(runmed(n), sorted(numbers))
输出:
38 is median of [38]
27.5 is median of [17, 38]
38 is median of [17, 38, 79]
27.5 is median of [4, 17, 38, 79]
17 is median of [4, 12, 17, 38, 79]
27.5 is median of [4, 12, 17, 38, 63, 79]
38 is median of [4, 12, 17, 38, 63, 69, 79]
35.0 is median of [4, 12, 17, 32, 38, 63, 69, 79]
38 is median of [4, 12, 17, 32, 38, 39, 63, 69, 79]
38.5 is median of [4, 12, 17, 32, 38, 39, 63, 69, 79, 82]
39 is median of [4, 12, 17, 32, 38, 39, 47, 63, 69, 79, 82]
38.5 is median of [4, 12, 17, 21, 32, 38, 39, 47, 63, 69, 79, 82]
38 is median of [4, 12, 17, 21, 25, 32, 38, 39, 47, 63, 69, 79, 82]
35.0 is median of [4, 12, 14, 17, 21, 25, 32, 38, 39, 47, 63, 69, 79, 82]
38 is median of [4, 12, 14, 17, 21, 25, 32, 38, 39, 47, 62, 63, 69, 79, 82]
Python 全局范围可能有点挑剔。如果将其包装在 class 中,每个实例都有自己的 leftHeap、rightHeap 和 curMedian,生活可能会更轻松。