如何在 python OrderedDict 上使用字符串键而不是整数进行切片?
How can you slice with string keys instead of integers on a python OrderedDict?
由于 OrderedDict
具有列表(具有有序元素)和字典(具有键而不是索引)的特性,因此您可以使用键进行切片似乎很自然。
>>> from collections import OrderedDict
>>> cities = OrderedDict((('san francisco', 650), ('new york', 212), ('shanghai', 8621), ('barcelona', 42423)))
>>> test['shanghai':] # I want all the cities from shanghai to the end of the list
TypeError: unhashable type
有趣的是,这不是您因 OrderedDictionary.__getslice__
未实施而看到的错误。我尝试将自己的 __getslice__
方法添加到 OrderedDict
,但我将 运行 保留在这个 TypeError 问题中。似乎 Python 正在做某种类型检查以强制切片键只是整数,甚至在它们被传递给 __getslice__
函数之前,多么不符合 Python!
>>> class BetterOrderedDict(OrderedDict):
def __getslice__(self, start=None, end=None, step=1):
return 'potato'
>>> test = BetterOrderedDict((('one', 1), ('two', 2), ('three', 3), ('four', 4)))
>>> print test[1:4]
'potato' # ok this makes sense so far
>>> test['one':'four']
TypeError: unhashable type # WTF, strings are hashable!
所以我的问题是,为什么我不能实现非 int 切片,什么样的类型检查会阻止切片键甚至到达我的 __getslice__
函数,我可以通过实现来覆盖它吗我的 BetterOrderedDict
在 C 中有绑定?
__getslice__
是不推荐使用的实现切片的方式。相反,您应该使用 __getitem__
:
处理 slice
个对象
from collections import OrderedDict
class SlicableDict(OrderedDict):
def __getitem__(self, key):
if isinstance(key, slice):
return 'potato({},{},{})'.format(key.start, key.stop, key.step)
return super(SlicableDict, self).__getitem__(key)
>>> s = SlicableDict(a=1, b=2, c=3)
>>> s
SlicableDict([('a', 1), ('c', 3), ('b', 2)])
>>> s['a']
1
>>> s['a':'c']
'potato(a,c,None)'
如果你需要的不仅仅是土豆,你还可以通过以下方式实现所有三个切片操作:
def _key_slice_to_index_slice(items, key_slice):
try:
if key_slice.start is None:
start = None
else:
start = next(idx for idx, (key, value) in enumerate(items)
if key == key_slice.start)
if key_slice.stop is None:
stop = None
else:
stop = next(idx for idx, (key, value) in enumerate(items)
if key == key_slice.stop)
except StopIteration:
raise KeyError
return slice(start, stop, key_slice.step)
class SlicableDict(OrderedDict):
def __getitem__(self, key):
if isinstance(key, slice):
items = self.items()
index_slice = _key_slice_to_index_slice(items, key)
return SlicableDict(items[index_slice])
return super(SlicableDict, self).__getitem__(key)
def __setitem__(self, key, value):
if isinstance(key, slice):
items = self.items()
index_slice = _key_slice_to_index_slice(items, key)
items[index_slice] = value.items()
self.clear()
self.update(items)
return
return super(SlicableDict, self).__setitem__(key, value)
def __delitem__(self, key):
if isinstance(key, slice):
items = self.items()
index_slice = _key_slice_to_index_slice(items, key)
del items[index_slice]
self.clear()
self.update(items)
return
return super(SlicableDict, self).__delitem__(key)
试试这个(非常丑陋的)实现
class SliceOrdered(OrderedDict):
def __getitem__(self, key):
if isinstance(key, slice):
tmp = OrderedDict()
i_self = iter(self)
for k in i_self:
if key.start <= k <= key.stop:
tmp[k] = self[k]
if key.step is not None and key.step > 1:
for _ in range(key.step-1):
try:
next(i_self)
except StopIteration:
break
return tmp
else:
return super(SliceOrdered, self).__getitem__(key)
演示版 (Python3.4)
>>> s = SliceOrdered([('a',2), ('b',2), ('c',3), ('d',4)])
>>> s['a':'c']
OrderedDict([('a', 2), ('b', 2), ('c', 3)])
>>> s['a':'d':2]
OrderedDict([('a', 2), ('c', 3)])
N.B. 这可能只有效,因为在这个例子中, OrderedDict
不仅被订购, 也是排序的。在未排序的字典中,切片 'a':'c'
不一定包含 'b'
,因此我的 if key.start <= k <= key.stop
逻辑可能会失败。以下代码应尊重这一点:
class SliceOrdered(OrderedDict):
def __getitem__(self, key):
if not isinstance(key, slice):
return super(SliceOrdered,self).__getitem__(key)
tmp = OrderedDict()
step = key.step or 1
accumulating = False
i_self = iter(self)
for k in i_self:
if k == key.start:
accumulating = True
if accumulating:
tmp[k] = self[k]
for _ in range(step-1):
next(i_self)
if k == key.stop:
accumulating = False
break
return tmp
这是您期望的切片功能的实际实现。
OrderedDict
内部以双向链表的形式维护key的顺序。 Quoting the actual comment from Python 2.7.9,
# The internal self.__map dict maps keys to links in a doubly linked list.
# The circular doubly linked list starts and ends with a sentinel element.
# The sentinel element never gets deleted (this simplifies the algorithm).
# Each link is stored as a list of length three: [PREV, NEXT, KEY].
现在,为了切片字典,我们需要迭代双向链表,__root
,它实际上是一个私有变量,受name mangling mechanism.
保护。
注意: 这涉及到使用 OrderedDict
的内部数据结构的 hacky name unmangling。
from collections import OrderedDict
class SlicableDict(OrderedDict):
def __getitem__(self, key):
if isinstance(key, slice):
# Unmangle `__root` to access the doubly linked list
root = getattr(self, "_OrderedDict__root")
# By default, make `start` as the first element, `end` as the last
start, end = root[1][2], root[0][2]
start = key.start or start
end = key.stop or end
step = key.step or 1
curr, result, begun, counter = root[1], [], False, 0
# Begin iterating
curr, result, begun = root[1], [], False
while curr is not root:
# If the end value is reached, `break` and `return`
if curr[2] == end:
break
# If starting value is matched, start appending to `result`
if curr[2] == start:
begun = True
if begun:
if counter % step == 0:
result.append((curr[2], self[curr[2]]))
counter += 1
# Make the `curr` point to the next element
curr = curr[1]
return result
return super(SlicableDict, self).__getitem__(key)
少量样本运行:
>>> s = SlicableDict(a=1, b=2, c=3, d=4)
>>> s
SlicableDict([('a', 1), ('c', 3), ('b', 2), ('e', 5), ('d', 4), ('f', 6)])
>>> s['a':'c']
[('a', 1)]
>>> s['a':]
[('a', 1), ('c', 3), ('b', 2), ('e', 5), ('d', 4)]
>>> s[:'a']
[]
>>> s['a':'f':2]
[('a', 1), ('b', 2), ('d', 4)]
由于 OrderedDict
具有列表(具有有序元素)和字典(具有键而不是索引)的特性,因此您可以使用键进行切片似乎很自然。
>>> from collections import OrderedDict
>>> cities = OrderedDict((('san francisco', 650), ('new york', 212), ('shanghai', 8621), ('barcelona', 42423)))
>>> test['shanghai':] # I want all the cities from shanghai to the end of the list
TypeError: unhashable type
有趣的是,这不是您因 OrderedDictionary.__getslice__
未实施而看到的错误。我尝试将自己的 __getslice__
方法添加到 OrderedDict
,但我将 运行 保留在这个 TypeError 问题中。似乎 Python 正在做某种类型检查以强制切片键只是整数,甚至在它们被传递给 __getslice__
函数之前,多么不符合 Python!
>>> class BetterOrderedDict(OrderedDict):
def __getslice__(self, start=None, end=None, step=1):
return 'potato'
>>> test = BetterOrderedDict((('one', 1), ('two', 2), ('three', 3), ('four', 4)))
>>> print test[1:4]
'potato' # ok this makes sense so far
>>> test['one':'four']
TypeError: unhashable type # WTF, strings are hashable!
所以我的问题是,为什么我不能实现非 int 切片,什么样的类型检查会阻止切片键甚至到达我的 __getslice__
函数,我可以通过实现来覆盖它吗我的 BetterOrderedDict
在 C 中有绑定?
__getslice__
是不推荐使用的实现切片的方式。相反,您应该使用 __getitem__
:
slice
个对象
from collections import OrderedDict
class SlicableDict(OrderedDict):
def __getitem__(self, key):
if isinstance(key, slice):
return 'potato({},{},{})'.format(key.start, key.stop, key.step)
return super(SlicableDict, self).__getitem__(key)
>>> s = SlicableDict(a=1, b=2, c=3)
>>> s
SlicableDict([('a', 1), ('c', 3), ('b', 2)])
>>> s['a']
1
>>> s['a':'c']
'potato(a,c,None)'
如果你需要的不仅仅是土豆,你还可以通过以下方式实现所有三个切片操作:
def _key_slice_to_index_slice(items, key_slice):
try:
if key_slice.start is None:
start = None
else:
start = next(idx for idx, (key, value) in enumerate(items)
if key == key_slice.start)
if key_slice.stop is None:
stop = None
else:
stop = next(idx for idx, (key, value) in enumerate(items)
if key == key_slice.stop)
except StopIteration:
raise KeyError
return slice(start, stop, key_slice.step)
class SlicableDict(OrderedDict):
def __getitem__(self, key):
if isinstance(key, slice):
items = self.items()
index_slice = _key_slice_to_index_slice(items, key)
return SlicableDict(items[index_slice])
return super(SlicableDict, self).__getitem__(key)
def __setitem__(self, key, value):
if isinstance(key, slice):
items = self.items()
index_slice = _key_slice_to_index_slice(items, key)
items[index_slice] = value.items()
self.clear()
self.update(items)
return
return super(SlicableDict, self).__setitem__(key, value)
def __delitem__(self, key):
if isinstance(key, slice):
items = self.items()
index_slice = _key_slice_to_index_slice(items, key)
del items[index_slice]
self.clear()
self.update(items)
return
return super(SlicableDict, self).__delitem__(key)
试试这个(非常丑陋的)实现
class SliceOrdered(OrderedDict):
def __getitem__(self, key):
if isinstance(key, slice):
tmp = OrderedDict()
i_self = iter(self)
for k in i_self:
if key.start <= k <= key.stop:
tmp[k] = self[k]
if key.step is not None and key.step > 1:
for _ in range(key.step-1):
try:
next(i_self)
except StopIteration:
break
return tmp
else:
return super(SliceOrdered, self).__getitem__(key)
演示版 (Python3.4)
>>> s = SliceOrdered([('a',2), ('b',2), ('c',3), ('d',4)])
>>> s['a':'c']
OrderedDict([('a', 2), ('b', 2), ('c', 3)])
>>> s['a':'d':2]
OrderedDict([('a', 2), ('c', 3)])
N.B. 这可能只有效,因为在这个例子中, OrderedDict
不仅被订购, 也是排序的。在未排序的字典中,切片 'a':'c'
不一定包含 'b'
,因此我的 if key.start <= k <= key.stop
逻辑可能会失败。以下代码应尊重这一点:
class SliceOrdered(OrderedDict):
def __getitem__(self, key):
if not isinstance(key, slice):
return super(SliceOrdered,self).__getitem__(key)
tmp = OrderedDict()
step = key.step or 1
accumulating = False
i_self = iter(self)
for k in i_self:
if k == key.start:
accumulating = True
if accumulating:
tmp[k] = self[k]
for _ in range(step-1):
next(i_self)
if k == key.stop:
accumulating = False
break
return tmp
这是您期望的切片功能的实际实现。
OrderedDict
内部以双向链表的形式维护key的顺序。 Quoting the actual comment from Python 2.7.9,
# The internal self.__map dict maps keys to links in a doubly linked list.
# The circular doubly linked list starts and ends with a sentinel element.
# The sentinel element never gets deleted (this simplifies the algorithm).
# Each link is stored as a list of length three: [PREV, NEXT, KEY].
现在,为了切片字典,我们需要迭代双向链表,__root
,它实际上是一个私有变量,受name mangling mechanism.
注意: 这涉及到使用 OrderedDict
的内部数据结构的 hacky name unmangling。
from collections import OrderedDict
class SlicableDict(OrderedDict):
def __getitem__(self, key):
if isinstance(key, slice):
# Unmangle `__root` to access the doubly linked list
root = getattr(self, "_OrderedDict__root")
# By default, make `start` as the first element, `end` as the last
start, end = root[1][2], root[0][2]
start = key.start or start
end = key.stop or end
step = key.step or 1
curr, result, begun, counter = root[1], [], False, 0
# Begin iterating
curr, result, begun = root[1], [], False
while curr is not root:
# If the end value is reached, `break` and `return`
if curr[2] == end:
break
# If starting value is matched, start appending to `result`
if curr[2] == start:
begun = True
if begun:
if counter % step == 0:
result.append((curr[2], self[curr[2]]))
counter += 1
# Make the `curr` point to the next element
curr = curr[1]
return result
return super(SlicableDict, self).__getitem__(key)
少量样本运行:
>>> s = SlicableDict(a=1, b=2, c=3, d=4)
>>> s
SlicableDict([('a', 1), ('c', 3), ('b', 2), ('e', 5), ('d', 4), ('f', 6)])
>>> s['a':'c']
[('a', 1)]
>>> s['a':]
[('a', 1), ('c', 3), ('b', 2), ('e', 5), ('d', 4)]
>>> s[:'a']
[]
>>> s['a':'f':2]
[('a', 1), ('b', 2), ('d', 4)]