通过 Django ORM 创建案例查询

Create Case query via Django ORM

使用 Django 1.8 和 Postgres 9.3 - 我有一个 Django 模型,如下所示,其中包含日志行。我想提取所有 domain_name 从未被允许访问的值。

class Logs(models.Model):
    date = DateTimeField(db_index=True)
    action = TextField(default='a', null=True, blank=True, db_index=True)
    url = TextField(null=True, blank=True)
    stats = JsonField(null=True, blank=True)
    domain_name = TextField(null=True, blank=True, db_index=True)
    <snip>

这个 SQL 查询工作得很好,但我在将它转换为 Django ORM 查询集时遇到了问题。

select domain_name from reporter_log
GROUP BY domain_name

HAVING COUNT(CASE WHEN action = 'a' OR action = 'ae' then 1 END) = 0;

我原以为以下查询集会起作用:

LogLine.objects.annotate(
     allowed=Count(Case(
        When(action='a', then=Value(1)),
        When(action='ae', then=Value(1)),
        default=Value(0),
        output_field=IntegerField(),
    )
))

但我得到了回溯:

ProgrammingError                          Traceback (most recent call last)
<ipython-input-4-feb056328fe8> in <module>()
      4         When(action='ae', then=Value(1)),
      5         default=Value(0),
----> 6         output_field=IntegerField(),
      7     )
      8 ))

/usr/lib/python3/dist-packages/IPython/core/displayhook.py in __call__(self, result)
    245             self.start_displayhook()
    246             self.write_output_prompt()
--> 247             format_dict, md_dict = self.compute_format_data(result)
    248             self.write_format_data(format_dict, md_dict)
    249             self.update_user_ns(result)

/usr/lib/python3/dist-packages/IPython/core/displayhook.py in compute_format_data(self, result)
    155 
    156         """
--> 157         return self.shell.display_formatter.format(result)
    158 
    159     def write_format_data(self, format_dict, md_dict=None):

/usr/lib/python3/dist-packages/IPython/core/formatters.py in format(self, obj, include, exclude)
    150             md = None
    151             try:
--> 152                 data = formatter(obj)
    153             except:
    154                 # FIXME: log the exception

/usr/lib/python3/dist-packages/IPython/core/formatters.py in __call__(self, obj)
    478                 type_pprinters=self.type_printers,
    479                 deferred_pprinters=self.deferred_printers)
--> 480             printer.pretty(obj)
    481             printer.flush()
    482             return stream.getvalue()

/usr/lib/python3/dist-packages/IPython/lib/pretty.py in pretty(self, obj)
    361                             if isinstance(meth, collections.Callable):
    362                                 return meth(obj, self, cycle)
--> 363             return _default_pprint(obj, self, cycle)
    364         finally:
    365             self.end_group()

/usr/lib/python3/dist-packages/IPython/lib/pretty.py in _default_pprint(obj, p, cycle)
    481     if getattr(klass, '__repr__', None) not in _baseclass_reprs:
    482         # A user-provided repr.
--> 483         p.text(repr(obj))
    484         return
    485     p.begin_group(1, '<')

/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in __repr__(self)
    136 
    137     def __repr__(self):
--> 138         data = list(self[:REPR_OUTPUT_SIZE + 1])
    139         if len(data) > REPR_OUTPUT_SIZE:
    140             data[-1] = "...(remaining elements truncated)..."

/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in __iter__(self)
    160                - Responsible for turning the rows into model objects.
    161         """
--> 162         self._fetch_all()
    163         return iter(self._result_cache)
    164 

/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in _fetch_all(self)
    963     def _fetch_all(self):
    964         if self._result_cache is None:
--> 965             self._result_cache = list(self.iterator())
    966         if self._prefetch_related_lookups and not self._prefetch_done:
    967             self._prefetch_related_objects()

/usr/local/lib/python3.4/dist-packages/django/db/models/query.py in iterator(self)
    236         # Execute the query. This will also fill compiler.select, klass_info,
    237         # and annotations.
--> 238         results = compiler.execute_sql()
    239         select, klass_info, annotation_col_map = (compiler.select, compiler.klass_info,
    240                                                   compiler.annotation_col_map)

/usr/local/lib/python3.4/dist-packages/django/db/models/sql/compiler.py in execute_sql(self, result_type)
    838         cursor = self.connection.cursor()
    839         try:
--> 840             cursor.execute(sql, params)
    841         except Exception:
    842             cursor.close()

/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
     77         start = time()
     78         try:
---> 79             return super(CursorDebugWrapper, self).execute(sql, params)
     80         finally:
     81             stop = time()

/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
     62                 return self.cursor.execute(sql)
     63             else:
---> 64                 return self.cursor.execute(sql, params)
     65 
     66     def executemany(self, sql, param_list):

/usr/local/lib/python3.4/dist-packages/django/db/utils.py in __exit__(self, exc_type, exc_value, traceback)
     95                 if dj_exc_type not in (DataError, IntegrityError):
     96                     self.wrapper.errors_occurred = True
---> 97                 six.reraise(dj_exc_type, dj_exc_value, traceback)
     98 
     99     def __call__(self, func):

/usr/local/lib/python3.4/dist-packages/django/utils/six.py in reraise(tp, value, tb)
    656             value = tp()
    657         if value.__traceback__ is not tb:
--> 658             raise value.with_traceback(tb)
    659         raise value
    660 

/usr/local/lib/python3.4/dist-packages/django/db/backends/utils.py in execute(self, sql, params)
     62                 return self.cursor.execute(sql)
     63             else:
---> 64                 return self.cursor.execute(sql, params)
     65 
     66     def executemany(self, sql, param_list):

ProgrammingError: could not identify an equality operator for type json
LINE 1: ...e"."domain_name", "reporter_logs"."rule_type", "reporter_...

                                                         ^

这是查询失败的输出。

[{'sql': 'SELECT "reporter_logs"."id", "reporter_logs"."date", "reporter_logs"."userip", "reporter_logs"."action", "reporter_logs"."url", "reporter_logs"."method", "reporter_logs"."status", "reporter_logs"."mimetype", "reporter_logs"."content_length", "reporter_logs"."pruned", "reporter_logs"."page_title", "reporter_logs"."user_agent", "reporter_logs"."domain_name", "reporter_logs"."rule_type", "reporter_logs"."tally_stats", "reporter_logs"."cat_stats", "reporter_logs"."grade_stats", "reporter_logs"."ignored", "reporter_logs"."category", "reporter_logs"."genre", "reporter_logs"."grade", "reporter_logs"."top_grade", "reporter_logs"."category_confidence", "reporter_logs"."grade_confidence", COUNT(CASE WHEN "reporter_logs"."action" = \'a\' THEN 1 WHEN "reporter_logs"."action" = \'ae\' THEN 1 ELSE 0 END) AS "allowed" FROM "reporter_logs" GROUP BY "reporter_logs"."id", "reporter_logs"."date", "reporter_logs"."userip", "reporter_logs"."action", "reporter_logs"."url", "reporter_logs"."method", "reporter_logs"."status", "reporter_logs"."mimetype", "reporter_logs"."content_length", "reporter_logs"."pruned", "reporter_logs"."page_title", "reporter_logs"."user_agent", "reporter_logs"."domain_name", "reporter_logs"."rule_type", "reporter_logs"."tally_stats", "reporter_logs"."cat_stats", "reporter_logs"."grade_stats", "reporter_logs"."ignored", "reporter_logs"."category", "reporter_logs"."genre", "reporter_logs"."grade", "reporter_logs"."top_grade", "reporter_logs"."category_confidence", "reporter_logs"."grade_confidence" LIMIT 21', 'time': '0.002'}]

如有任何建议,我们将不胜感激。包括有关编写查询的替代方法的建议。

尝试不使用 Q 函数,使用分隔的 When 代替:

Logs.objects.annotate(
     allowed=Count(Case(
        When(action='a', then=1),
        When(action='ae', then=1),
        default=0,
        output_field=IntegerField()
    )
)).values('date')

嗯,感谢@knbk 和@Gocht 的尝试。我终于找到了解决问题的方法。我必须指定我只想要 domain_name 列,它负责回溯。

此外,对于所有没有匹配项的域,我不得不使用 Sum 而不是 Count 来 return 0。计数 returned 1 不匹配的值。

Logs.objects.values('domain_name').annotate(
     allowed=Sum(Case(
        When(Q(action='a') |Q(action='ae'), then=Value(1)),
        default=Value(0),
        output_field=IntegerField()
    )
))