如果 slug 已存在于 postgresql 数据库中,则跳过保存行 - python
Skip saving row if slug already exists in postgresql database - python
我正在我的 Django
视图中设置一个调用 API 并将数据保存到我的 Postgresql
数据库中的函数。在我得到一个 IntegrityError slugkey 已经存在之前,一切都工作正常,所以我试图找到一种方法来跳过或忽略该行,如果 slugify slug 已经存在。
我整天都被这个问题困住了,我需要一个解决方案来遵守预期的时间表..
这是我的 Django 模型:
class Product(models.Model):
destination = models.CharField(max_length=255, default='')
title = models.CharField(max_length=255, default='')
slug = models.SlugField(unique=True, max_length=255, default='')
description = models.TextField(max_length=2047, default='')
link = models.TextField(max_length=500, default='')
ptags = TaggableManager()
image = models.ImageField(max_length=500, default='images/zero-image-found.png')
timestamp = models.DateTimeField(auto_now=True)
def _ptags(self):
return [t.name for t in self.ptags.all()]
def get_absolute_url(self):
return reverse('experience',
kwargs={'slug': self.slug})
def save(self, *args, **kwargs):
if not self.id:
self.slug = slugify(self.title)
super(Product, self).save(*args, **kwargs)
def __str__(self):
return self.destination
这是我在 Views 中的功能:
def api_data(request):
if request.GET.get('mybtn'): # to improve, == 'something':
resp_1 = requests.get(
"https://www.headout.com/api/public/v1/product/listing/list-by/city?language=fr&cityCode=ROME&limit=5000¤cyCode=CAD",
headers={
"Headout-Auth": HEADOUT_PRODUCTION_API_KEY
})
resp_1_data = resp_1.json()
base_url_2 = "https://www.headout.com/api/public/v1/product/get/"
for item in resp_1_data['items']:
print('parsing, translating and saving item {}'.format(item['id']))
# concat ID to the URL string
url = '{}{}'.format(base_url_2, item['id'] + '?language=fr')
# make the HTTP request
resp_2 = requests.get(
url,
headers={
"Headout-Auth": HEADOUT_PRODUCTION_API_KEY
})
resp_2_data = resp_2.json()
try:
descriptiontxt = resp_2_data['contentListHtml'][0]['html'][0:2040] + ' ...'
except (IndexError, KeyError) as e:
continue
#Parsing the description to get only the text in <p>
soup = BeautifulSoup(descriptiontxt, 'lxml')
try:
parsed = soup.find('p').text
except AttributeError:
continue
if len(parsed) == 0:
continue
#Translation
translation = Translator().translate(text=parsed, dest='fr').text
titlename = item['name']
titlefr = Translator().translate(text=titlename, dest='fr').text
destinationname = item['city']['name']
destinationfr = Translator().translate(text=destinationname, dest='fr').text
Product.objects.get_or_create(
title=titlefr,
destination=destinationfr,
description=translation,
link=item['canonicalUrl'],
image=item['image']['url'],
)
time.sleep(2)
return render(request, "form.html")
否则我会不断收到 IntegrityError,我需要再次 运行 我的脚本,这需要很长时间。
那么我该如何解决这个问题?
请帮忙。
编辑
Traceback (most recent call last):
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 473, in get_or_create
return self.get(**lookup), False
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 385, in get
self.model._meta.object_name
search.models.DoesNotExist: Product matching query does not exist.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
return self.cursor.execute(sql, params)
psycopg2.errors.UniqueViolation: duplicate key value violates unique constraint "search_product_slug_key"
DETAIL: Key (slug)=(skydive-dubai-parachutisme-en-tandem-a-palm-drop-zone-burj-khalifa-gratuit) already exists.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\exception.py", line 39, in inner
response = get_response(request)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\base.py", line 187, in _get_response
response = self.process_exception_by_middleware(e, request)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\base.py", line 185, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:\Users\loicq\Desktop\Coding\UVERGO_SEARCH\venv\src\search\views.py", line 170, in api_data
brandlogo='https://cdn-imgix-open.headout.com/logo/www-desktop-8743256.png?w=300&h=50&fit=fill'
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\manager.py", line 85, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 475, in get_or_create
return self._create_object_from_params(lookup, params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 513, in _create_object_from_params
six.reraise(*exc_info)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\utils\six.py", line 686, in reraise
raise value
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 505, in _create_object_from_params
obj = self.create(**params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 399, in create
obj.save(force_insert=True, using=self.db)
File "C:\Users\loicq\Desktop\Coding\UVERGO_SEARCH\venv\src\search\models.py", line 55, in save
super(Product, self).save(*args, **kwargs)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 796, in save
force_update=force_update, update_fields=update_fields)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 824, in save_base
updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 908, in _save_table
result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 947, in _do_insert
using=using, raw=raw)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\manager.py", line 85, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 1045, in _insert
return query.get_compiler(using=using).execute_sql(return_id)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\sql\compiler.py", line 1054, in execute_sql
cursor.execute(sql, params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 79, in execute
return super(CursorDebugWrapper, self).execute(sql, params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
return self.cursor.execute(sql, params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\utils.py", line 94, in __exit__
six.reraise(dj_exc_type, dj_exc_value, traceback)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\utils\six.py", line 685, in reraise
raise value.with_traceback(tb)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "search_product_slug_key"
DETAIL: Key (slug)=(skydive-dubai-parachutisme-en-tandem-a-palm-drop-zone-burj-khalifa-gratuit) already exists.
您需要在保存前检查 slug 是否已经存在
def save(self, *args, **kwargs):
if not self.id:
if Product.objects.filter(slug=slugify(self.title)).exists():
self.slug = slugify("f{self.title}-{Product.objects.filter(slug__startswith=slugify(self.title).count + 1}" )
else:
self.slug = slugify(self.title)
super(Product, self).save(*args, **kwargs)
get_or_create uses 所有给定的值(defaults
关键字除外)来查找重复项。
将您的代码更改为此
Product.objects.get_or_create(
title=titlefr,
slug=slugify(titlefr),
defaults={
'destination': destinationfr,
'description': translation,
'link': item['canonicalUrl'],
'image': item['image']['url'],
}
)
因此只有 title
和 slug
将用于查找可能的重复项。 defaults
中的所有其他值将不会用于过滤,但将用于创建。
此外,我建议您将 slug
字段初始化移动到 clean_fields()
method。
我正在我的 Django
视图中设置一个调用 API 并将数据保存到我的 Postgresql
数据库中的函数。在我得到一个 IntegrityError slugkey 已经存在之前,一切都工作正常,所以我试图找到一种方法来跳过或忽略该行,如果 slugify slug 已经存在。
我整天都被这个问题困住了,我需要一个解决方案来遵守预期的时间表..
这是我的 Django 模型:
class Product(models.Model):
destination = models.CharField(max_length=255, default='')
title = models.CharField(max_length=255, default='')
slug = models.SlugField(unique=True, max_length=255, default='')
description = models.TextField(max_length=2047, default='')
link = models.TextField(max_length=500, default='')
ptags = TaggableManager()
image = models.ImageField(max_length=500, default='images/zero-image-found.png')
timestamp = models.DateTimeField(auto_now=True)
def _ptags(self):
return [t.name for t in self.ptags.all()]
def get_absolute_url(self):
return reverse('experience',
kwargs={'slug': self.slug})
def save(self, *args, **kwargs):
if not self.id:
self.slug = slugify(self.title)
super(Product, self).save(*args, **kwargs)
def __str__(self):
return self.destination
这是我在 Views 中的功能:
def api_data(request):
if request.GET.get('mybtn'): # to improve, == 'something':
resp_1 = requests.get(
"https://www.headout.com/api/public/v1/product/listing/list-by/city?language=fr&cityCode=ROME&limit=5000¤cyCode=CAD",
headers={
"Headout-Auth": HEADOUT_PRODUCTION_API_KEY
})
resp_1_data = resp_1.json()
base_url_2 = "https://www.headout.com/api/public/v1/product/get/"
for item in resp_1_data['items']:
print('parsing, translating and saving item {}'.format(item['id']))
# concat ID to the URL string
url = '{}{}'.format(base_url_2, item['id'] + '?language=fr')
# make the HTTP request
resp_2 = requests.get(
url,
headers={
"Headout-Auth": HEADOUT_PRODUCTION_API_KEY
})
resp_2_data = resp_2.json()
try:
descriptiontxt = resp_2_data['contentListHtml'][0]['html'][0:2040] + ' ...'
except (IndexError, KeyError) as e:
continue
#Parsing the description to get only the text in <p>
soup = BeautifulSoup(descriptiontxt, 'lxml')
try:
parsed = soup.find('p').text
except AttributeError:
continue
if len(parsed) == 0:
continue
#Translation
translation = Translator().translate(text=parsed, dest='fr').text
titlename = item['name']
titlefr = Translator().translate(text=titlename, dest='fr').text
destinationname = item['city']['name']
destinationfr = Translator().translate(text=destinationname, dest='fr').text
Product.objects.get_or_create(
title=titlefr,
destination=destinationfr,
description=translation,
link=item['canonicalUrl'],
image=item['image']['url'],
)
time.sleep(2)
return render(request, "form.html")
否则我会不断收到 IntegrityError,我需要再次 运行 我的脚本,这需要很长时间。
那么我该如何解决这个问题?
请帮忙。
编辑
Traceback (most recent call last):
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 473, in get_or_create
return self.get(**lookup), False
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 385, in get
self.model._meta.object_name
search.models.DoesNotExist: Product matching query does not exist.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
return self.cursor.execute(sql, params)
psycopg2.errors.UniqueViolation: duplicate key value violates unique constraint "search_product_slug_key"
DETAIL: Key (slug)=(skydive-dubai-parachutisme-en-tandem-a-palm-drop-zone-burj-khalifa-gratuit) already exists.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\exception.py", line 39, in inner
response = get_response(request)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\base.py", line 187, in _get_response
response = self.process_exception_by_middleware(e, request)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\base.py", line 185, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:\Users\loicq\Desktop\Coding\UVERGO_SEARCH\venv\src\search\views.py", line 170, in api_data
brandlogo='https://cdn-imgix-open.headout.com/logo/www-desktop-8743256.png?w=300&h=50&fit=fill'
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\manager.py", line 85, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 475, in get_or_create
return self._create_object_from_params(lookup, params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 513, in _create_object_from_params
six.reraise(*exc_info)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\utils\six.py", line 686, in reraise
raise value
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 505, in _create_object_from_params
obj = self.create(**params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 399, in create
obj.save(force_insert=True, using=self.db)
File "C:\Users\loicq\Desktop\Coding\UVERGO_SEARCH\venv\src\search\models.py", line 55, in save
super(Product, self).save(*args, **kwargs)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 796, in save
force_update=force_update, update_fields=update_fields)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 824, in save_base
updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 908, in _save_table
result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 947, in _do_insert
using=using, raw=raw)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\manager.py", line 85, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 1045, in _insert
return query.get_compiler(using=using).execute_sql(return_id)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\sql\compiler.py", line 1054, in execute_sql
cursor.execute(sql, params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 79, in execute
return super(CursorDebugWrapper, self).execute(sql, params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
return self.cursor.execute(sql, params)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\utils.py", line 94, in __exit__
six.reraise(dj_exc_type, dj_exc_value, traceback)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\utils\six.py", line 685, in reraise
raise value.with_traceback(tb)
File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "search_product_slug_key"
DETAIL: Key (slug)=(skydive-dubai-parachutisme-en-tandem-a-palm-drop-zone-burj-khalifa-gratuit) already exists.
您需要在保存前检查 slug 是否已经存在
def save(self, *args, **kwargs):
if not self.id:
if Product.objects.filter(slug=slugify(self.title)).exists():
self.slug = slugify("f{self.title}-{Product.objects.filter(slug__startswith=slugify(self.title).count + 1}" )
else:
self.slug = slugify(self.title)
super(Product, self).save(*args, **kwargs)
get_or_create uses 所有给定的值(defaults
关键字除外)来查找重复项。
将您的代码更改为此
Product.objects.get_or_create(
title=titlefr,
slug=slugify(titlefr),
defaults={
'destination': destinationfr,
'description': translation,
'link': item['canonicalUrl'],
'image': item['image']['url'],
}
)
因此只有 title
和 slug
将用于查找可能的重复项。 defaults
中的所有其他值将不会用于过滤,但将用于创建。
此外,我建议您将 slug
字段初始化移动到 clean_fields()
method。