从 pydantic 对象列表中删除重复项
Removing duplicates from the list of pydantic objects
我试图从 pydantic 对象列表中删除重复项,但遇到了一个我无法解决的问题。唯一的工作方式很慢。
有没有比我的方法更快的删除重复项的方法?
代码:
Pydantic 模型 (a.py)
from pydantic import BaseModel
class Photo(BaseModel):
title: str
url: str
主文件(b.py)
from collections import OrderedDict
from a import Photo
# 3 objects, 2 duplicates
a_obj = {
'title': 'SOME TITLE v1',
'url': 'http://some.url'
}
b_obj = {
'title': 'SOME TITLE v2',
'url': 'http://different.url'
}
c_obj = {
'title': 'SOME TITLE v1',
'url': 'http://some.url'
}
# Creating list of pydantic objects
pd_obj_list = list()
pd_obj_list += [Photo(**a_obj)]
pd_obj_list += [Photo(**b_obj)]
pd_obj_list += [Photo(**c_obj)]
# My Attempts to Remove Duplicates
# Using OrderedDict.fromkeys
final_list_0 = list(OrderedDict.fromkeys(pd_obj_list))
# returns TypeError: unhashable type: 'Photo'
# Using Set
final_list_1 = list(set(pd_obj_list))
# returns TypeError: unhashable type: 'Photo'
# Using enumerate
final_list_2 = [i for n, i in enumerate(pd_obj_list) if i not in pd_obj_list[:n]]
# It works but too slow when I have ~10k objects in the list
使用:
pd_obj_list = [Photo(**a_obj), Photo(**b_obj), Photo(**c_obj)]
final_list_0 = list(OrderedDict(((photo.title, photo.url), photo) for photo in pd_obj_list).values())
print(final_list_0)
输出
[Photo(title='SOME TITLE v1', url='http://some.url'), Photo(title='SOME TITLE v2', url='http://different.url')]
如果照片 不可变 你可以定义 __hash__
如下:
from collections import OrderedDict
from pydantic import BaseModel
class Photo(BaseModel):
title: str
url: str
def __hash__(self):
return hash((self.title, self.url))
# 3 objects, 2 duplicates
a_obj = {
'title': 'SOME TITLE v1',
'url': 'http://some.url'
}
b_obj = {
'title': 'SOME TITLE v2',
'url': 'http://different.url'
}
c_obj = {
'title': 'SOME TITLE v1',
'url': 'http://some.url'
}
pd_obj_list = [Photo(**a_obj), Photo(**b_obj), Photo(**c_obj)]
final_list_0 = list(OrderedDict.fromkeys(pd_obj_list))
print(final_list_0)
输出
[Photo(title='SOME TITLE v1', url='http://some.url'), Photo(title='SOME TITLE v2', url='http://different.url')]
我试图从 pydantic 对象列表中删除重复项,但遇到了一个我无法解决的问题。唯一的工作方式很慢。
有没有比我的方法更快的删除重复项的方法?
代码:
Pydantic 模型 (a.py)
from pydantic import BaseModel
class Photo(BaseModel):
title: str
url: str
主文件(b.py)
from collections import OrderedDict
from a import Photo
# 3 objects, 2 duplicates
a_obj = {
'title': 'SOME TITLE v1',
'url': 'http://some.url'
}
b_obj = {
'title': 'SOME TITLE v2',
'url': 'http://different.url'
}
c_obj = {
'title': 'SOME TITLE v1',
'url': 'http://some.url'
}
# Creating list of pydantic objects
pd_obj_list = list()
pd_obj_list += [Photo(**a_obj)]
pd_obj_list += [Photo(**b_obj)]
pd_obj_list += [Photo(**c_obj)]
# My Attempts to Remove Duplicates
# Using OrderedDict.fromkeys
final_list_0 = list(OrderedDict.fromkeys(pd_obj_list))
# returns TypeError: unhashable type: 'Photo'
# Using Set
final_list_1 = list(set(pd_obj_list))
# returns TypeError: unhashable type: 'Photo'
# Using enumerate
final_list_2 = [i for n, i in enumerate(pd_obj_list) if i not in pd_obj_list[:n]]
# It works but too slow when I have ~10k objects in the list
使用:
pd_obj_list = [Photo(**a_obj), Photo(**b_obj), Photo(**c_obj)]
final_list_0 = list(OrderedDict(((photo.title, photo.url), photo) for photo in pd_obj_list).values())
print(final_list_0)
输出
[Photo(title='SOME TITLE v1', url='http://some.url'), Photo(title='SOME TITLE v2', url='http://different.url')]
如果照片 不可变 你可以定义 __hash__
如下:
from collections import OrderedDict
from pydantic import BaseModel
class Photo(BaseModel):
title: str
url: str
def __hash__(self):
return hash((self.title, self.url))
# 3 objects, 2 duplicates
a_obj = {
'title': 'SOME TITLE v1',
'url': 'http://some.url'
}
b_obj = {
'title': 'SOME TITLE v2',
'url': 'http://different.url'
}
c_obj = {
'title': 'SOME TITLE v1',
'url': 'http://some.url'
}
pd_obj_list = [Photo(**a_obj), Photo(**b_obj), Photo(**c_obj)]
final_list_0 = list(OrderedDict.fromkeys(pd_obj_list))
print(final_list_0)
输出
[Photo(title='SOME TITLE v1', url='http://some.url'), Photo(title='SOME TITLE v2', url='http://different.url')]