如何防止 Django 保存操作以避免对现有数据重复调用数据库
How to prevent Django save action to avoid repetitive DB calls on already existing data
我当前用于保存文章的代码保存新文章并打印 'The Post Already Exists!' 如果发现重复,但它仍在进行数据库调用,所以现在我的文章中有 ID 空白 table 因为重复文章未保存造成的误存。如果发现重复项以保持文章 ID 的一致性,我如何改进我的代码以防止保存操作?
if not Posts.objects.filter(title=post_title, slug=post_slug).exists():
post = Posts(
title = post_title,
link = post_link,
summary = post_summary,
image_url = image_link,
slug = post_slug,
pub_date = date_published,
guid = item.guid,
feed_title = channel_feed_title,
feed_link = channel_feed_link,
feed_description = channel_feed_desc,
source_id = selected_source_id,
category_id = selected_category_id
)
post.save()
for i in range(len(article_list)):
post_tags = post.tags.add(article_list[i])
else:
print("The Post Already Exists! Skipping...")
我一直收到这样的错误:
django.db.utils.IntegrityError: duplicate key value violates unique constraint "Posts_posts_slug_key" DETAIL: Key (slug)=() already exists.
我的模型
class Categories(models.Model):
category_name = models.CharField(max_length=500, verbose_name='Categories')
date_created = models.DateField(auto_now_add=True, verbose_name='Date Created')
last_modified = models.DateField(auto_now=True, verbose_name='Last Modified')
def __str__(self):
return f"{self.category_name}"
class Meta:
verbose_name_plural = 'Categories'
class Source(models.Model):
name = models.CharField(max_length=500, verbose_name='Website Name')
feed_url = models.URLField(max_length=500, verbose_name='RSS Feed Link')
date_created = models.DateField(auto_now_add=True, verbose_name='Date Created')
last_modified = models.DateField(auto_now=True, verbose_name='Last Modified')
def __str__(self):
return f"{self.name}"
class Meta:
verbose_name_plural = 'Feed Sources'
class Posts(models.Model):
title = models.CharField(max_length=500, verbose_name='Post Title')
link = models.URLField(max_length=500, verbose_name='Post Link')
summary = models.TextField(verbose_name='Post Summary')
image_url = models.URLField(max_length=500, null=True, verbose_name='Post Image URL')
slug = models.SlugField(unique=True, max_length=500)
tags = TaggableManager()
pub_date = models.DateTimeField(verbose_name='Date Published')
guid = models.CharField(max_length=500, verbose_name='Guid Code', null=True)
feed_title = models.CharField(max_length=500, verbose_name='Feed Channel Title')
feed_link = models.URLField(max_length=500, verbose_name='Feed Channel Link')
feed_description = models.TextField(verbose_name='Feed Channel Description')
date_created = models.DateField(auto_now_add=True, verbose_name='Date Created')
last_modified = models.DateField(auto_now=True, verbose_name='Last Modified')
source = models.ForeignKey(Source, on_delete=models.CASCADE, verbose_name='Source')
category = models.ForeignKey(Categories, on_delete=models.CASCADE, verbose_name='Category')
def __str__(self):
return f"{self.title} - {self.feed_title}"
class Meta:
verbose_name_plural = 'Posts'
由于您只有 slug
作为唯一字段,因此您必须检查是否已经存在具有相同 slug 的 post。如果没有,请创建它。
您现在遇到的问题是您还检查了标题(作为 AND 运算符)。这可能会让您认为 post 不存在,但您仍然无法将其添加到数据库中,因为 slug 已经存在。但不是标题不同的鼻涕虫。
这意味着您只需删除查询中的标题:
Posts.objects.filter(slug=post_slug).exists()
根据我之前的回答(https://docs.djangoproject.com/en/4.0/ref/models/querysets/#get-or-create),这里还有一些解决问题的方法:
try:
post = Posts.objects.get(slug=post_slug)
except Posts.DoesNotExist:
post = Posts(title=post_title, link=...)
post.save()
此处,对于并发请求,可能会多次尝试使用相同参数保存 Post。为了避免这种竞争条件,可以使用 get_or_create() 重写上面的示例,如下所示:
obj, created = Posts.objects.get_or_create(
slug=post_slug,
defaults={'title':post_title, 'link': ...},
)
我当前用于保存文章的代码保存新文章并打印 'The Post Already Exists!' 如果发现重复,但它仍在进行数据库调用,所以现在我的文章中有 ID 空白 table 因为重复文章未保存造成的误存。如果发现重复项以保持文章 ID 的一致性,我如何改进我的代码以防止保存操作?
if not Posts.objects.filter(title=post_title, slug=post_slug).exists():
post = Posts(
title = post_title,
link = post_link,
summary = post_summary,
image_url = image_link,
slug = post_slug,
pub_date = date_published,
guid = item.guid,
feed_title = channel_feed_title,
feed_link = channel_feed_link,
feed_description = channel_feed_desc,
source_id = selected_source_id,
category_id = selected_category_id
)
post.save()
for i in range(len(article_list)):
post_tags = post.tags.add(article_list[i])
else:
print("The Post Already Exists! Skipping...")
我一直收到这样的错误:
django.db.utils.IntegrityError: duplicate key value violates unique constraint "Posts_posts_slug_key" DETAIL: Key (slug)=() already exists.
我的模型
class Categories(models.Model):
category_name = models.CharField(max_length=500, verbose_name='Categories')
date_created = models.DateField(auto_now_add=True, verbose_name='Date Created')
last_modified = models.DateField(auto_now=True, verbose_name='Last Modified')
def __str__(self):
return f"{self.category_name}"
class Meta:
verbose_name_plural = 'Categories'
class Source(models.Model):
name = models.CharField(max_length=500, verbose_name='Website Name')
feed_url = models.URLField(max_length=500, verbose_name='RSS Feed Link')
date_created = models.DateField(auto_now_add=True, verbose_name='Date Created')
last_modified = models.DateField(auto_now=True, verbose_name='Last Modified')
def __str__(self):
return f"{self.name}"
class Meta:
verbose_name_plural = 'Feed Sources'
class Posts(models.Model):
title = models.CharField(max_length=500, verbose_name='Post Title')
link = models.URLField(max_length=500, verbose_name='Post Link')
summary = models.TextField(verbose_name='Post Summary')
image_url = models.URLField(max_length=500, null=True, verbose_name='Post Image URL')
slug = models.SlugField(unique=True, max_length=500)
tags = TaggableManager()
pub_date = models.DateTimeField(verbose_name='Date Published')
guid = models.CharField(max_length=500, verbose_name='Guid Code', null=True)
feed_title = models.CharField(max_length=500, verbose_name='Feed Channel Title')
feed_link = models.URLField(max_length=500, verbose_name='Feed Channel Link')
feed_description = models.TextField(verbose_name='Feed Channel Description')
date_created = models.DateField(auto_now_add=True, verbose_name='Date Created')
last_modified = models.DateField(auto_now=True, verbose_name='Last Modified')
source = models.ForeignKey(Source, on_delete=models.CASCADE, verbose_name='Source')
category = models.ForeignKey(Categories, on_delete=models.CASCADE, verbose_name='Category')
def __str__(self):
return f"{self.title} - {self.feed_title}"
class Meta:
verbose_name_plural = 'Posts'
由于您只有 slug
作为唯一字段,因此您必须检查是否已经存在具有相同 slug 的 post。如果没有,请创建它。
您现在遇到的问题是您还检查了标题(作为 AND 运算符)。这可能会让您认为 post 不存在,但您仍然无法将其添加到数据库中,因为 slug 已经存在。但不是标题不同的鼻涕虫。
这意味着您只需删除查询中的标题:
Posts.objects.filter(slug=post_slug).exists()
根据我之前的回答(https://docs.djangoproject.com/en/4.0/ref/models/querysets/#get-or-create),这里还有一些解决问题的方法:
try:
post = Posts.objects.get(slug=post_slug)
except Posts.DoesNotExist:
post = Posts(title=post_title, link=...)
post.save()
此处,对于并发请求,可能会多次尝试使用相同参数保存 Post。为了避免这种竞争条件,可以使用 get_or_create() 重写上面的示例,如下所示:
obj, created = Posts.objects.get_or_create(
slug=post_slug,
defaults={'title':post_title, 'link': ...},
)