Django：使用 querysets/filtering 创建类别

Question

我正在尝试弄清楚是否可以使用自定义过滤器创建类别。

我正在构建一个电子商务应用程序，并且我已经使用 mptt 设置了我的类别模型。我正在导入一个 csv，它可以创建我的顶级类别，效果很好。问题是我需要有更具体的子类别，例如男士服装（顶级）> 牛仔裤。

csv 有几个字段，其中包含与每个产品相关的信息，例如描述："stone wash bootcut jeans"。理想情况下，我想检查这些字段中的关键字并将每个产品添加到正确的类别中。是否可以通过这种方式设置类别，或者是否有替代解决方案？

我是 django 的新手，非常感谢您的帮助。

models.py

from django.db import models
from mptt.models import MPTTModel, TreeForeignKey

class Category(MPTTModel):
    name = models.CharField(max_length=50, unique=True)
    parent = TreeForeignKey('self', null=True, blank=True, related_name='children', db_index=True, on_delete=models.CASCADE)
    slug = models.SlugField()

    class MPTTMeta:
        order_insertion_by = ['name']

    class Meta:
        unique_together = (('parent', 'slug',))
        verbose_name_plural = 'categories'

    def get_slug_list(self):
        try:
            ancestors = self.get_ancestors(include_self=True)
        except:
            ancestors = []
        else:
            ancestors = [ i.slug for i in ancestors]
        slugs = []
        for i in range(len(ancestors)):
            slugs.append('/'.join(ancestors[:i+1]))
        return slugs


    def __str__(self):
        return self.name

class Brands(models.Model):
    brand_name = models.CharField(max_length=500, default='')

    def __str__(self):
        return self.brand_name


class Product(models.Model):
    aw_deep_link = models.CharField(max_length=500, default='')
    description = models.CharField(max_length=500, default='')
    product_name = models.CharField(max_length=500, default='')
    aw_image_url = models.CharField(max_length=500, default='')
    search_price = models.DecimalField(max_digits=6, decimal_places=2, null=True)
    merchant_name = models.CharField(max_length=500, default='')
    display_price = models.CharField(max_length=500, default='')
    brand_name = TreeForeignKey('Brands', on_delete=models.CASCADE)
    colour = models.CharField(max_length=500, default='')
    rrp_price = models.DecimalField(max_digits=6, decimal_places=2, null=True)
    category = TreeForeignKey('Category',null=True,blank=True, on_delete=models.CASCADE)
    slug = models.SlugField(default='')

    def __str__(self):
        return self.product_name

importCSV.py

import re
from products.models import Category, Brands
from django.core.management.base import BaseCommand


class Command(BaseCommand):
    help = "Load some sample data into the db"

    def add_arguments(self, parser):
        parser.add_argument('--file', dest='file', help='File to load')

    def handle(self, **options):
        from products.models import Product

        if options['file']:
            print("Importing " + options['file'])

            with open(options['file']) as f:
                linecount = 0
                next(f)
                for line in f:
                    linecount += 1
                    fields = line.split(',')
                    category = Category.objects.get_or_create(name=fields[10])
                    brand_name = Brands.objects.get_or_create(brand_name=fields[7])

                    data = {
                            'aw_deep_link':  fields[0],
                            'description': fields[1],
                            'product_name': fields[2],
                            'aw_image_url':  fields[3],
                            'search_price':  fields[4],
                            'merchant_name': fields[5],
                            'display_price':  fields[6],
                            'brand_name':  brand_name[0],
                            'colour' :  fields[8],
                            'rrp_price' :  fields[9],
                            'category' :  category[0],

                    }

                    product = Product(**data)
                    product.save()

                print("Added {0} products".format(linecount))

Answer 1

所以你有

可能包含多个关键字的手动预定义子类别
每个产品的多个文本字段，确保任何关键字至少出现一次

从这个设置开始，我会首先尝试为每个子类别概括 "search term"，可能是通过正则表达式，这取决于您需要识别子类别的条件的复杂性。很可能一个同义词列表就足够了。将这样的字段添加到您的 Category 模型（这里是正则表达式解决方案）：

class Category(models.Model):
    regex = models.CharField(max_length=100, blank=True)  # only needed for subcategories (top level from csv)
    ...

对于您的示例，其中 trainers 和 runners 等效（据我的英语知识，这些是此处的复数词，因此不等效于 trainer 或 runner 出现在任何地方），这将通过（作为正则表达式）r'trainers|runners'

这是您需要手动定义的部分 - 我不羡慕您所涉及的繁琐工作 ;)

之后，您的导入循环需要在此处进行一些更改：

def handle(self, **options):
    from products.models import Product, Category
    all_categories = list(Category.objects.all())
    # converted to list to evaluate Queryset and don't query again in the loop below

这里

                data = ...
                for textfield in ('description', 'product_name'):
                    # I suppose these are the two relevant fields to scan?
                    subcat = None
                    for cat in all_categories:
                        if re.search(cat.regex, data[textfield]) is not None:
                            if cat.is_leaf_node():
                                # only consider nodes that have no children
                                subcat = cat
                                break
                    if subcat is not None:
                        break
                # subcat is now the first matching subcategory
                if subcat is not None:
                    data['category'] = subcat


                product = Product(**data)

完成

import re
from products.models import Category, Brands
from django.core.management.base import BaseCommand


class Command(BaseCommand):
    help = "Load some sample data into the db"

    def add_arguments(self, parser):
        parser.add_argument('--file', dest='file', help='File to load')

    def handle(self, **options):
        from products.models import Product, Category
        all_categories = list(Category.objects.all())

        if options['file']:
            print("Importing " + options['file'])

            with open(options['file']) as f:
                linecount = 0
                next(f)
                for line in f:
                    linecount += 1
                    fields = line.split(',')
                    category = Category.objects.get_or_create(name=fields[10])
                    brand_name = Brands.objects.get_or_create(brand_name=fields[7])

                    data = {
                            'aw_deep_link':  fields[0],
                            'description': fields[1],
                            'product_name': fields[2],
                            'aw_image_url':  fields[3],
                            'search_price':  fields[4],
                            'merchant_name': fields[5],
                            'display_price':  fields[6],
                            'brand_name':  brand_name[0],
                            'colour' :  fields[8],
                            'rrp_price' :  fields[9],
                            'category' :  category[0],

                    }

                    for textfield in ('description', 'product_name'):
                        # I suppose these are the two relevant fields to scan?
                        subcat = None
                        for cat in all_categories:
                            if re.search(cat.regex, data[textfield]) is not None:
                                if cat.is_leaf_node():
                                    # only consider nodes that have no children
                                    subcat = cat
                                    break
                        if subcat is not None:
                            break
                    # subcat is now the first matching subcategory
                    if subcat is not None:
                        data['category'] = subcat

                    product = Product(**data)
                    product.save()

                print("Added {0} products".format(linecount))

Django：使用 querysets/filtering 创建类别

Django: Creating categories using querysets/filtering

python

django

csv

django-models

django-mptt