搜索功能太慢 recordclass python 优化
search function is too slow recordclass python optimization
Dict()
很耗内存,所以我尝试使用其他方式。使用 dataobject 占用了 6Gb,现在是 700M。但是,当涉及到搜索时,我实现的速度非常慢
我知道我无法与python竞争,但至少让它变得更好
如果你有什么想法请 Cpython
首先:我尝试了链接节点,但仍然很慢
from recordclass import dataobject
class node(dataobject):
elt1:tuple
elt2:list
_next:str
def find(n1,elt1):
if n1 is None:
return None
if n1.elt1==elt1:
#print(n1.elt2)
return n1.elt2
else:
return find(n1._next,elt1)
#or
def find1(n1,elt1):
while n1 is not None:
if n1.elt1==elt1:
#print(n1.elt2)
return n1.elt2
else:
n1=n1._next
n1=None
daca=dict()
for i in range(0,100,2):
n1=node(i,i+1,n1)
daca[i]=i+1
#find(n1,12) compared to daca[12], dictionary is 7 times faster than find
其次:我尝试将所有节点附加到列表中,但速度仍然很慢
from recordclass import dataobject
class node(dataobject):
elt1:tuple
elt2:list
def find(n1,elt):
return list(filter(lambda x: x.elt1==elt ,n1))
n1=[]
daca=dict()
for i in range(0,100,2):
n1.append(node(i,i+1) )
daca[i]=i+1
#find(n1,12) compared to daca[12], dictionary is 7 times faster than find
很难咬python dict 来按键搜索值。
Recordclass 库可以通过以下方式帮助减少内存占用。
from recordclass import make_arrayclass, litelist
from random import randint
tracemalloc
模块用于评估内存占用:
import tracemalloc
class Tracer:
def __enter__(self):
if tracemalloc.is_tracing():
raise ValueError('nesting tracemalloc is not allowed')
self.allocated = None
tracemalloc.start()
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
self.allocated = current
首先估计dict的"weight"部分:
with Tracer() as t0:
d0 = {i:None for i in range(5_000_000)}
print("dict:", t0.allocated // 1_000_000, 'Mb')
del d0, t0
结果是 307 Mb
其次,让我们估算具有 5_000_000 个条目的字典的内存占用量。键是随机整数的三元组,值是包含 6 个随机整数的列表。
with Tracer() as t1:
d1 = {}
for i in range(N):
key = (randint(0,N), randint(0,N), randint(0,N))
val = [randint(0,N) for i in range(10)]
d1[key] = val
print("regular:", t1.allocated // 1_000_000, 'Mb')
del d1, t1
结果是 3387 Mb。所以dict的部分比较少
为了减少元组和列表的内存占用,可以使用 recordclass
库中的 make_arrayclass
和 litelist
:
Triple = make_arrayclass("Triple", 3, hashable=True)
with Tracer() as t2:
d2 = {}
for i in range(N):
key = Triple(randint(0,N), randint(0,N), randint(0,N))
val = litelist([randint(0,N) for i in range(6)])
d2[key] = val
print("recordclass:", t2.allocated // 1_000_000, 'Mb')
del d2, t2
结果是 2107 Mb。所以这节省了大约 1 Gb。
P.S.: Python 使用 3.7.
Dict()
很耗内存,所以我尝试使用其他方式。使用 dataobject 占用了 6Gb,现在是 700M。但是,当涉及到搜索时,我实现的速度非常慢
我知道我无法与python竞争,但至少让它变得更好
如果你有什么想法请 Cpython
首先:我尝试了链接节点,但仍然很慢
from recordclass import dataobject
class node(dataobject):
elt1:tuple
elt2:list
_next:str
def find(n1,elt1):
if n1 is None:
return None
if n1.elt1==elt1:
#print(n1.elt2)
return n1.elt2
else:
return find(n1._next,elt1)
#or
def find1(n1,elt1):
while n1 is not None:
if n1.elt1==elt1:
#print(n1.elt2)
return n1.elt2
else:
n1=n1._next
n1=None
daca=dict()
for i in range(0,100,2):
n1=node(i,i+1,n1)
daca[i]=i+1
#find(n1,12) compared to daca[12], dictionary is 7 times faster than find
其次:我尝试将所有节点附加到列表中,但速度仍然很慢
from recordclass import dataobject
class node(dataobject):
elt1:tuple
elt2:list
def find(n1,elt):
return list(filter(lambda x: x.elt1==elt ,n1))
n1=[]
daca=dict()
for i in range(0,100,2):
n1.append(node(i,i+1) )
daca[i]=i+1
#find(n1,12) compared to daca[12], dictionary is 7 times faster than find
很难咬python dict 来按键搜索值。
Recordclass 库可以通过以下方式帮助减少内存占用。
from recordclass import make_arrayclass, litelist
from random import randint
tracemalloc
模块用于评估内存占用:
import tracemalloc
class Tracer:
def __enter__(self):
if tracemalloc.is_tracing():
raise ValueError('nesting tracemalloc is not allowed')
self.allocated = None
tracemalloc.start()
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
self.allocated = current
首先估计dict的"weight"部分:
with Tracer() as t0:
d0 = {i:None for i in range(5_000_000)}
print("dict:", t0.allocated // 1_000_000, 'Mb')
del d0, t0
结果是 307 Mb
其次,让我们估算具有 5_000_000 个条目的字典的内存占用量。键是随机整数的三元组,值是包含 6 个随机整数的列表。
with Tracer() as t1:
d1 = {}
for i in range(N):
key = (randint(0,N), randint(0,N), randint(0,N))
val = [randint(0,N) for i in range(10)]
d1[key] = val
print("regular:", t1.allocated // 1_000_000, 'Mb')
del d1, t1
结果是 3387 Mb。所以dict的部分比较少
为了减少元组和列表的内存占用,可以使用 recordclass
库中的 make_arrayclass
和 litelist
:
Triple = make_arrayclass("Triple", 3, hashable=True)
with Tracer() as t2:
d2 = {}
for i in range(N):
key = Triple(randint(0,N), randint(0,N), randint(0,N))
val = litelist([randint(0,N) for i in range(6)])
d2[key] = val
print("recordclass:", t2.allocated // 1_000_000, 'Mb')
del d2, t2
结果是 2107 Mb。所以这节省了大约 1 Gb。
P.S.: Python 使用 3.7.