如果 slug 已存在于 postgresql 数据库中,则跳过保存行 - python

Skip saving row if slug already exists in postgresql database - python

我正在我的 Django 视图中设置一个调用 API 并将数据保存到我的 Postgresql 数据库中的函数。在我得到一个 IntegrityError slugkey 已经存在之前,一切都工作正常,所以我试图找到一种方法来跳过或忽略该行,如果 slugify slug 已经存在。

我整天都被这个问题困住了,我需要一个解决方案来遵守预期的时间表..

这是我的 Django 模型:

class Product(models.Model):
    destination = models.CharField(max_length=255, default='')
    title = models.CharField(max_length=255, default='')
    slug = models.SlugField(unique=True, max_length=255, default='')
    description = models.TextField(max_length=2047, default='')
    link = models.TextField(max_length=500, default='')

    ptags = TaggableManager()

    image = models.ImageField(max_length=500, default='images/zero-image-found.png')
    timestamp = models.DateTimeField(auto_now=True)

    def _ptags(self):
        return [t.name for t in self.ptags.all()]

    def get_absolute_url(self):
        return reverse('experience',
                       kwargs={'slug': self.slug})

    def save(self, *args, **kwargs):
        if not self.id:
            self.slug = slugify(self.title)
        super(Product, self).save(*args, **kwargs)

    def __str__(self):
        return self.destination

这是我在 Views 中的功能:

def api_data(request):
    if request.GET.get('mybtn'):  # to improve, == 'something':
        resp_1 = requests.get(
            "https://www.headout.com/api/public/v1/product/listing/list-by/city?language=fr&cityCode=ROME&limit=5000&currencyCode=CAD",
            headers={
                "Headout-Auth": HEADOUT_PRODUCTION_API_KEY
            })
        resp_1_data = resp_1.json()
        base_url_2 = "https://www.headout.com/api/public/v1/product/get/"


        for item in resp_1_data['items']:
            print('parsing, translating and saving item {}'.format(item['id']))
            # concat ID to the URL string
            url = '{}{}'.format(base_url_2, item['id'] + '?language=fr')

            # make the HTTP request
            resp_2 = requests.get(
                url,
                headers={
                    "Headout-Auth": HEADOUT_PRODUCTION_API_KEY
                })
            resp_2_data = resp_2.json()

            try:
                descriptiontxt = resp_2_data['contentListHtml'][0]['html'][0:2040] + ' ...'
            except (IndexError, KeyError) as e:
                continue

            #Parsing the description to get only the text in <p>
            soup = BeautifulSoup(descriptiontxt, 'lxml')

            try:
                parsed = soup.find('p').text
            except AttributeError:
                continue

            if len(parsed) == 0:
                continue

            #Translation
            translation = Translator().translate(text=parsed, dest='fr').text

            titlename = item['name']
            titlefr = Translator().translate(text=titlename, dest='fr').text

            destinationname = item['city']['name']
            destinationfr = Translator().translate(text=destinationname, dest='fr').text


            Product.objects.get_or_create(
                title=titlefr,
                destination=destinationfr,
                description=translation,
                link=item['canonicalUrl'],
                image=item['image']['url'],
            )

            time.sleep(2)

    return render(request, "form.html")

否则我会不断收到 IntegrityError,我需要再次 运行 我的脚本,这需要很长时间。

那么我该如何解决这个问题?

请帮忙。

编辑

Traceback (most recent call last):
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 473, in get_or_create
    return self.get(**lookup), False
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 385, in get
    self.model._meta.object_name
search.models.DoesNotExist: Product matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
    return self.cursor.execute(sql, params)
psycopg2.errors.UniqueViolation: duplicate key value violates unique constraint "search_product_slug_key"
DETAIL:  Key (slug)=(skydive-dubai-parachutisme-en-tandem-a-palm-drop-zone-burj-khalifa-gratuit) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\exception.py", line 39, in inner
    response = get_response(request)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\base.py", line 187, in _get_response
    response = self.process_exception_by_middleware(e, request)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\core\handlers\base.py", line 185, in _get_response
    response = wrapped_callback(request, *callback_args, **callback_kwargs)
  File "C:\Users\loicq\Desktop\Coding\UVERGO_SEARCH\venv\src\search\views.py", line 170, in api_data
    brandlogo='https://cdn-imgix-open.headout.com/logo/www-desktop-8743256.png?w=300&h=50&fit=fill'
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\manager.py", line 85, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 475, in get_or_create
    return self._create_object_from_params(lookup, params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 513, in _create_object_from_params
    six.reraise(*exc_info)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\utils\six.py", line 686, in reraise
    raise value
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 505, in _create_object_from_params
    obj = self.create(**params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 399, in create
    obj.save(force_insert=True, using=self.db)
  File "C:\Users\loicq\Desktop\Coding\UVERGO_SEARCH\venv\src\search\models.py", line 55, in save
    super(Product, self).save(*args, **kwargs)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 796, in save
    force_update=force_update, update_fields=update_fields)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 824, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 908, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\base.py", line 947, in _do_insert
    using=using, raw=raw)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\manager.py", line 85, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\query.py", line 1045, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\models\sql\compiler.py", line 1054, in execute_sql
    cursor.execute(sql, params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 79, in execute
    return super(CursorDebugWrapper, self).execute(sql, params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
    return self.cursor.execute(sql, params)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\utils.py", line 94, in __exit__
    six.reraise(dj_exc_type, dj_exc_value, traceback)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\utils\six.py", line 685, in reraise
    raise value.with_traceback(tb)
  File "C:\Users\loicq\desktop\coding\uvergo_search\venv\lib\site-packages\django\db\backends\utils.py", line 64, in execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "search_product_slug_key"
DETAIL:  Key (slug)=(skydive-dubai-parachutisme-en-tandem-a-palm-drop-zone-burj-khalifa-gratuit) already exists.

您需要在保存前检查 slug 是否已经存在

def save(self, *args, **kwargs):     
    if not self.id:
        if Product.objects.filter(slug=slugify(self.title)).exists():
            self.slug = slugify("f{self.title}-{Product.objects.filter(slug__startswith=slugify(self.title).count + 1}" )
        else:
            self.slug = slugify(self.title)

        super(Product, self).save(*args, **kwargs)

get_or_create uses 所有给定的值(defaults 关键字除外)来查找重复项。

将您的代码更改为此

Product.objects.get_or_create(
            title=titlefr,
            slug=slugify(titlefr),
            defaults={
                'destination': destinationfr,
                'description': translation,
                'link': item['canonicalUrl'],
                'image': item['image']['url'],
            }
        )

因此只有 titleslug 将用于查找可能的重复项。 defaults 中的所有其他值将不会用于过滤,但将用于创建。

此外,我建议您将 slug 字段初始化移动到 clean_fields() method