Django - 如何制作复杂的数学注释(k 最近邻)
Django - how to make a complex math annotation (k Nearest Neighbors)
我有这个型号:
class Image(models.Model):
title = models.CharField(max_length=200)
image = models.ImageField(upload_to='img/')
signature = models.TextField(null = True)
签名是以json编码的numpy一维向量。为了进行查询,我必须将每个对象签名解码为 nparray,并在每个对象的签名和给定向量之间生成点积,然后在每个原始数据旁边注释为浮点(名为 "score")字段。最后我必须从最大到最小订购。
我在 view.py
中试过了
def image_sorted(request):
query_signature = extract_feat(settings.MEDIA_ROOT + "/cache" + "/003_ant_image_0003.jpg") # a NParray object
image_list = Image.objects.annotate(score=np.dot(
JSONVectConverter.json_to_vect(F('signature')), query_signature.T
).astype(float)).order_by('score') #JSONVectConverter is a class of mine
return render(request, 'images/sorted.html', {'image_sorted': image_list})
当然不行。我认为 "F()" 运算符超出范围...
如果您想知道,我正在为我的大学论文编写图像检索网络应用程序。
谢谢。
编辑:
我发现 this 是完全相同的问题(他使用 postgres 而不是 MySQL)
EDIT2:我现在只记得我采用的最后一个解决方案是什么!首先,我从数据库中取出每个向量并将其保存在 RAM 中,然后我进行一些简单的计算以找到 K 最近邻。然后,我使用索引(主键)从数据库中检索相应的图像。所以我将此任务与 Django ORM 分离。这是代码(来自 Rest API)
def query_over_db(query_signature, page):
query_signature = np.array(query_signature)
t0 = time.time()
descriptor_matrix = cache.get('descriptor_matrix')
id_vector = cache.get('id_vector')
if not descriptor_matrix:
id_vector = []
descriptor_matrix = []
images_dict = Image.objects.all().values('id', 'signature')
for image in images_dict:
s = image['signature']
descriptor = np.array(s)
descriptor_matrix.append(descriptor)
id_vector.append(image['id'])
cache.set('id_vector', id_vector)
cache.set('descriptor_matrix', descriptor_matrix)
t1 = time.time()
print("time to pull out the descriptors : " + str(t1 - t0))
t1 = time.time()
#result = np.abs(np.dot(descriptor_matrix, query_signature.T))
#result = np.sum((descriptor_matrix - query_signature)**2, axis=1)
result = ne.evaluate('sum((descriptor_matrix - query_signature)**2, axis=1)')
t2 = time.time()
print("time to calculate similarity: " + str(t2 - t1))
perm = np.argsort(result)[(page - 1) * 30:page * 30]
print(perm.shape)
print(len(id_vector))
perm_id = np.array(id_vector)[perm]
print(len(perm_id))
print("printing sort")
print(np.sort(result)[0])
t4 = time.time()
print("time to order the result: " + str(t4 - t2))
qs = Image.objects.defer('signature').filter(id__in=perm_id.tolist())
qs_new = []
for i in range(len(perm_id)):
qs_new.append(qs.get(id=perm_id[i]))
t3 = time.time()
print("time to get the results from the DB : " + str(t3 - t2))
print("total time : " + str(t3 - t0))
print(result[perm])
return qs_new
我没有尝试过这么复杂的东西,但是我在这里解决了一个类似的问题:
我没试过这个,但你可以试一试:
from django.db.models import Case, When, FloatField
query_signature = extract_feat(settings.MEDIA_ROOT + "/cache" + "/003_ant_image_0003.jpg") # a NParray object
value_dict = {}
for image in Image.objects.all():
value_dict[image.signature] = np.dot(
JSONVectConverter.json_to_vect(image.signature),
query_signature.T
).astype(float)
whens = [
When(signature=k, then=v) for k, v in value_dict.items()
]
qs = Image.objects.all().annotate(
score=Case(
*whens,
default=0,
output_field=FloatField()
)
).order_by('score')
希望对您有所帮助
这就是最终的工作代码:
def image_sorted(request):
query_signature = extract_feat(settings.MEDIA_ROOT + "/cache" + "/001_accordion_image_0001.jpg") # a NParray object
#query_signature = extract_feat(settings.MEDIA_ROOT + "/cache" + "/003_ant_image_0003.jpg") # a NParray object
value_dict = {}
for image in Image.objects.all():
S = image.signature
value_dict[image.signature] = np.dot(
JSONVectConverter.json_to_vect(S),
query_signature.T
).astype(float)
whens = [
When(signature=k, then=v) for k, v in value_dict.items()
]
qs = Image.objects.all().annotate(
score=Case(
*whens,
default=0,
output_field=FloatField()
)
).order_by('-score')
for image in qs:
print(image.score)
return render(request, 'images/sorted.html', {'image_sorted': qs})
感谢奥马尔帮助我!当然如果有更好的解决方案我还在。
我有这个型号:
class Image(models.Model):
title = models.CharField(max_length=200)
image = models.ImageField(upload_to='img/')
signature = models.TextField(null = True)
签名是以json编码的numpy一维向量。为了进行查询,我必须将每个对象签名解码为 nparray,并在每个对象的签名和给定向量之间生成点积,然后在每个原始数据旁边注释为浮点(名为 "score")字段。最后我必须从最大到最小订购。
我在 view.py
中试过了def image_sorted(request):
query_signature = extract_feat(settings.MEDIA_ROOT + "/cache" + "/003_ant_image_0003.jpg") # a NParray object
image_list = Image.objects.annotate(score=np.dot(
JSONVectConverter.json_to_vect(F('signature')), query_signature.T
).astype(float)).order_by('score') #JSONVectConverter is a class of mine
return render(request, 'images/sorted.html', {'image_sorted': image_list})
当然不行。我认为 "F()" 运算符超出范围...
如果您想知道,我正在为我的大学论文编写图像检索网络应用程序。
谢谢。
编辑: 我发现 this 是完全相同的问题(他使用 postgres 而不是 MySQL)
EDIT2:我现在只记得我采用的最后一个解决方案是什么!首先,我从数据库中取出每个向量并将其保存在 RAM 中,然后我进行一些简单的计算以找到 K 最近邻。然后,我使用索引(主键)从数据库中检索相应的图像。所以我将此任务与 Django ORM 分离。这是代码(来自 Rest API)
def query_over_db(query_signature, page):
query_signature = np.array(query_signature)
t0 = time.time()
descriptor_matrix = cache.get('descriptor_matrix')
id_vector = cache.get('id_vector')
if not descriptor_matrix:
id_vector = []
descriptor_matrix = []
images_dict = Image.objects.all().values('id', 'signature')
for image in images_dict:
s = image['signature']
descriptor = np.array(s)
descriptor_matrix.append(descriptor)
id_vector.append(image['id'])
cache.set('id_vector', id_vector)
cache.set('descriptor_matrix', descriptor_matrix)
t1 = time.time()
print("time to pull out the descriptors : " + str(t1 - t0))
t1 = time.time()
#result = np.abs(np.dot(descriptor_matrix, query_signature.T))
#result = np.sum((descriptor_matrix - query_signature)**2, axis=1)
result = ne.evaluate('sum((descriptor_matrix - query_signature)**2, axis=1)')
t2 = time.time()
print("time to calculate similarity: " + str(t2 - t1))
perm = np.argsort(result)[(page - 1) * 30:page * 30]
print(perm.shape)
print(len(id_vector))
perm_id = np.array(id_vector)[perm]
print(len(perm_id))
print("printing sort")
print(np.sort(result)[0])
t4 = time.time()
print("time to order the result: " + str(t4 - t2))
qs = Image.objects.defer('signature').filter(id__in=perm_id.tolist())
qs_new = []
for i in range(len(perm_id)):
qs_new.append(qs.get(id=perm_id[i]))
t3 = time.time()
print("time to get the results from the DB : " + str(t3 - t2))
print("total time : " + str(t3 - t0))
print(result[perm])
return qs_new
我没有尝试过这么复杂的东西,但是我在这里解决了一个类似的问题:
我没试过这个,但你可以试一试:
from django.db.models import Case, When, FloatField
query_signature = extract_feat(settings.MEDIA_ROOT + "/cache" + "/003_ant_image_0003.jpg") # a NParray object
value_dict = {}
for image in Image.objects.all():
value_dict[image.signature] = np.dot(
JSONVectConverter.json_to_vect(image.signature),
query_signature.T
).astype(float)
whens = [
When(signature=k, then=v) for k, v in value_dict.items()
]
qs = Image.objects.all().annotate(
score=Case(
*whens,
default=0,
output_field=FloatField()
)
).order_by('score')
希望对您有所帮助
这就是最终的工作代码:
def image_sorted(request):
query_signature = extract_feat(settings.MEDIA_ROOT + "/cache" + "/001_accordion_image_0001.jpg") # a NParray object
#query_signature = extract_feat(settings.MEDIA_ROOT + "/cache" + "/003_ant_image_0003.jpg") # a NParray object
value_dict = {}
for image in Image.objects.all():
S = image.signature
value_dict[image.signature] = np.dot(
JSONVectConverter.json_to_vect(S),
query_signature.T
).astype(float)
whens = [
When(signature=k, then=v) for k, v in value_dict.items()
]
qs = Image.objects.all().annotate(
score=Case(
*whens,
default=0,
output_field=FloatField()
)
).order_by('-score')
for image in qs:
print(image.score)
return render(request, 'images/sorted.html', {'image_sorted': qs})
感谢奥马尔帮助我!当然如果有更好的解决方案我还在。