在 python 中为 defaultdict 类型的集合实现二叉搜索树
Implementing binary search tree for defaultdict type of collection in python
我用谷歌搜索了一个代码,用于使用二进制搜索在 python defaultdict 中搜索键。我已将其实施为
import collections
import bisect
class MyDict(collections.Mapping):
def __init__(self, contents):
"contents must be a sequence of key/value pairs"
self._list = sorted(contents)
def __iter__(self):
return (k for (k, _) in self._list)
def __contains__(self, k):
i = bisect.bisect_left(self._list, (k, None))
return i < len(self._list) and self._list[i][0] == k
def __len__(self):
return len(self._list)
def __getitem__(self, k):
i = bisect.bisect_left(self._list, (k, None))
if i >= len(self._list): raise KeyError(k)
return self._list[i][1]
mydictionary = {'60': set(['US0596951063']), '61': set(['']), '63': set(['US3385171059']), '65': set(['US0936981085']), '67': set(['']), '69': set(['']), '24': set(['', 'US0191181082']), '25': set(['']), '26': set(['US0301371037']), '27': set(['US0295951059']), '20': set(['', 'US0320157037', 'US0320151097']), '21': set(['']), '22': set(['']), '23': set(['']), '29': set(['']), '2': set(['']), '6': set(['']), '8': set(['US0077371096']), '99': set(['']), '98': set(['']), '91': set(['US1104481072']), '15': set(['']), '93': set(['']), '92': set(['US0976981045']), '94': set(['', 'US1025651084']), '97': set(['US0885771016']), '96': set(['', 'US0557101070']), '11': set(['']), '10': set(['']), '13': set(['']), '12': set(['US9099121074']), '59': set(['US09247Q1067']), '58': set(['US0576652004']), '17': set(['']), '16': set(['']), '19': set(['IL0006320183']), '54': set(['US0405151087']), '51': set(['']), '50': set(['US05343P1093']), '53': set(['']), '18': set(['US0080153077']), '90': set(['US0682211000']), '88': set(['', 'US0826571079']), '82': set(['US0582641025']), '80': set(['US0571491069']), '81': set(['US0928281023']), '86': set(['', 'US1030431050']), '87': set(['US05564T1034']), '84': set(['US0565251081']), '46': set(['US0303711081']), '47': set(['', 'US00753P1030', 'US00163U1060']), '45': set(['US22149T1025', 'US2274781044']), '42': set(['US2947007033']), '40': set(['US61747Y6914']), '41': set(['', 'US8814482035']), '5': set(['US0245914066', 'US0245911096']), '7': set(['US0048161048']), '9': set(['US0063513081']), '76': set(['US0905722072']), '74': set(['LR000A0D81P7']), '72': set(['US0668491006']), '71': set(['CA08135F1071']), '102': set(['US1484111018']), '100': set(['US13971R3066']), '101': set(['US1330341082']), '79': set(['']), '78': set(['']), '33': set(['', 'US00215F1075', 'US0490792050']), '32': set(['', 'US0373471012']), '31': set(['']), '30': set([''])}
d = MyDict(mydictionary)
r = d.__getitem__('84')
print r
我是 python 和算法的新手,谁能帮帮我。
我收到以下错误
Traceback (most recent call last):
File "search.py", line 24, in <module>
i = d.__getitem__('84')
File "search.py", line 17, in __getitem__
if i >= len(self._list): raise KeyError(k)
KeyError: '84'
def __getitem__(self, k):
# i = bisect.bisect_left(self._list, (k, None))
i = bisect.bisect_left(self._list, k)
if i >= len(self._list): raise KeyError(k)
return self._list[i][1]
您正在搜索 self._list,这是一个键列表 ["84",...] 用于不存在的项目 ("84",None)。
问题是,当您将字典传递给 __init__
:
self._list = sorted(contents)
然后self._list
是keys的列表dict
:
In [5]: my_dict
Out[5]: {'a': 1, 'b': 2}
In [6]: list(my_dict)
Out[6]: ['a', 'b']
In [7]: sorted(my_dict)
Out[7]: ['a', 'b']
所以现在您的 self._list
属性是一个字符串列表,但是 __getitem__
:
i = bisect.bisect_left(self._list, (k, None))
你试图用一个元组将它一分为二,('84', None')
强制比较一个字符串和一个 tuple
,但是在 Python 2 中,不幸的是, 这个是允许的,并且总是将字符串计算为小于 tuple
。
来自Python 2 docs
CPython implementation detail: Objects of different types except
numbers are ordered by their type names; objects of the same types
that don’t support proper comparison are ordered by their address.
因此,考虑:
In [7]: sorted(my_dict)
Out[7]: ['a', 'b']
In [8]: import bisect
In [9]: _list = sorted(my_dict)
In [10]: bisect.bisect_left(_list, ('a', None))
Out[11]: 2
由于您的 tuple
自动评估为大于列表中的所有内容,因此它 returns len(self._list)
。从而触发你的条件 raise KeyError
.
要从字典中获取 key/value 对的序列,请使用:
mydictionary.items()
注意,在Python3中,一般不允许不同类型之间的顺序比较。相反,抛出 TypeError
,这会更有帮助:
<ipython-input-1-8101e6bd7098> in __getitem__(self, k)
14 return len(self._list)
15 def __getitem__(self, k):
---> 16 i = bisect.bisect_left(self._list, (k, None))
17 if i >= len(self._list): raise KeyError(k)
18 return self._list[i][1]
TypeError: '<' not supported between instances of 'str' and 'tuple'
我用谷歌搜索了一个代码,用于使用二进制搜索在 python defaultdict 中搜索键。我已将其实施为
import collections
import bisect
class MyDict(collections.Mapping):
def __init__(self, contents):
"contents must be a sequence of key/value pairs"
self._list = sorted(contents)
def __iter__(self):
return (k for (k, _) in self._list)
def __contains__(self, k):
i = bisect.bisect_left(self._list, (k, None))
return i < len(self._list) and self._list[i][0] == k
def __len__(self):
return len(self._list)
def __getitem__(self, k):
i = bisect.bisect_left(self._list, (k, None))
if i >= len(self._list): raise KeyError(k)
return self._list[i][1]
mydictionary = {'60': set(['US0596951063']), '61': set(['']), '63': set(['US3385171059']), '65': set(['US0936981085']), '67': set(['']), '69': set(['']), '24': set(['', 'US0191181082']), '25': set(['']), '26': set(['US0301371037']), '27': set(['US0295951059']), '20': set(['', 'US0320157037', 'US0320151097']), '21': set(['']), '22': set(['']), '23': set(['']), '29': set(['']), '2': set(['']), '6': set(['']), '8': set(['US0077371096']), '99': set(['']), '98': set(['']), '91': set(['US1104481072']), '15': set(['']), '93': set(['']), '92': set(['US0976981045']), '94': set(['', 'US1025651084']), '97': set(['US0885771016']), '96': set(['', 'US0557101070']), '11': set(['']), '10': set(['']), '13': set(['']), '12': set(['US9099121074']), '59': set(['US09247Q1067']), '58': set(['US0576652004']), '17': set(['']), '16': set(['']), '19': set(['IL0006320183']), '54': set(['US0405151087']), '51': set(['']), '50': set(['US05343P1093']), '53': set(['']), '18': set(['US0080153077']), '90': set(['US0682211000']), '88': set(['', 'US0826571079']), '82': set(['US0582641025']), '80': set(['US0571491069']), '81': set(['US0928281023']), '86': set(['', 'US1030431050']), '87': set(['US05564T1034']), '84': set(['US0565251081']), '46': set(['US0303711081']), '47': set(['', 'US00753P1030', 'US00163U1060']), '45': set(['US22149T1025', 'US2274781044']), '42': set(['US2947007033']), '40': set(['US61747Y6914']), '41': set(['', 'US8814482035']), '5': set(['US0245914066', 'US0245911096']), '7': set(['US0048161048']), '9': set(['US0063513081']), '76': set(['US0905722072']), '74': set(['LR000A0D81P7']), '72': set(['US0668491006']), '71': set(['CA08135F1071']), '102': set(['US1484111018']), '100': set(['US13971R3066']), '101': set(['US1330341082']), '79': set(['']), '78': set(['']), '33': set(['', 'US00215F1075', 'US0490792050']), '32': set(['', 'US0373471012']), '31': set(['']), '30': set([''])}
d = MyDict(mydictionary)
r = d.__getitem__('84')
print r
我是 python 和算法的新手,谁能帮帮我。
我收到以下错误
Traceback (most recent call last):
File "search.py", line 24, in <module>
i = d.__getitem__('84')
File "search.py", line 17, in __getitem__
if i >= len(self._list): raise KeyError(k)
KeyError: '84'
def __getitem__(self, k):
# i = bisect.bisect_left(self._list, (k, None))
i = bisect.bisect_left(self._list, k)
if i >= len(self._list): raise KeyError(k)
return self._list[i][1]
您正在搜索 self._list,这是一个键列表 ["84",...] 用于不存在的项目 ("84",None)。
问题是,当您将字典传递给 __init__
:
self._list = sorted(contents)
然后self._list
是keys的列表dict
:
In [5]: my_dict
Out[5]: {'a': 1, 'b': 2}
In [6]: list(my_dict)
Out[6]: ['a', 'b']
In [7]: sorted(my_dict)
Out[7]: ['a', 'b']
所以现在您的 self._list
属性是一个字符串列表,但是 __getitem__
:
i = bisect.bisect_left(self._list, (k, None))
你试图用一个元组将它一分为二,('84', None')
强制比较一个字符串和一个 tuple
,但是在 Python 2 中,不幸的是, 这个是允许的,并且总是将字符串计算为小于 tuple
。
来自Python 2 docs
CPython implementation detail: Objects of different types except numbers are ordered by their type names; objects of the same types that don’t support proper comparison are ordered by their address.
因此,考虑:
In [7]: sorted(my_dict)
Out[7]: ['a', 'b']
In [8]: import bisect
In [9]: _list = sorted(my_dict)
In [10]: bisect.bisect_left(_list, ('a', None))
Out[11]: 2
由于您的 tuple
自动评估为大于列表中的所有内容,因此它 returns len(self._list)
。从而触发你的条件 raise KeyError
.
要从字典中获取 key/value 对的序列,请使用:
mydictionary.items()
注意,在Python3中,一般不允许不同类型之间的顺序比较。相反,抛出 TypeError
,这会更有帮助:
<ipython-input-1-8101e6bd7098> in __getitem__(self, k)
14 return len(self._list)
15 def __getitem__(self, k):
---> 16 i = bisect.bisect_left(self._list, (k, None))
17 if i >= len(self._list): raise KeyError(k)
18 return self._list[i][1]
TypeError: '<' not supported between instances of 'str' and 'tuple'