鹡鸰断言错误 "Unmatched tags: expected img, got p"

Wagtail AssertionError "Unmatched tags: expected img, got p"

我在 Docker 中使用 运行 Wagtail 2.4,使用 Postgres 数据库,当我尝试在 Wagtail 中编辑博客 post 时出现错误。最初的 post 有效,我可以在网站上正常查看页面。当我在 Wagtail 管理中编辑 post 时,它会抛出 500 错误:AssertionError "Unmatched tags: expected img, got p"

我最初认为问题是因为博客 post 最初是从另一个数据库(旧 Drupal 站点)导入的,并且 introbody 字段包含图像标签,即使介绍根本不应该有任何 HTML 标记。但是,我使用 Navicat 在数据库中删除了它们,但错误仍然存​​在。不确定还可以尝试什么,但我可以确认并非所有 post 都在编辑视图中出现错误,只是那些从 Drupal 导入的错误。在 Wagtail 管理中添加的任何更新的都可以。

模型看起来像这样:

class NewsPage(Page):    
    main_image = models.ForeignKey('wagtailimages.Image', null=True, blank=True,
                                    on_delete=models.SET_NULL, related_name='+')
    date = models.DateTimeField(default=datetime.datetime.today)
    category = models.CharField(max_length=50, choices=CATEGORIES)
    intro = models.TextField()
    body = RichTextField(blank=True)
    search_keywords = models.TextField(blank=True, null=True)

    search_fields = Page.search_fields + [
        index.SearchField('intro'),
        index.SearchField('body'),
    ]

    content_panels = Page.content_panels + [
        ImageChooserPanel('main_image'),
        FieldPanel('category'),
        FieldPanel('intro'),
        FieldPanel('body', classname='full'),
    ]

    promote_panels = Page.promote_panels + [
        FieldPanel('search_keywords'),
    ]

    settings_panels = Page.settings_panels + [
        FieldPanel('date'),
    ]

    @property
    def news_index_page(self):
        return self.get_parent().specific

    def get_context(self, request, *args, **kwargs):
        context = super(NewsPage, self).get_context(request, *args, **kwargs)
        context['news_index_page'] = self.news_index_page
        context['news'] = self
        return context

回溯:

Traceback:

File "/usr/local/lib/python3.6/site-packages/django/core/handlers/exception.py" in inner
  34.             response = get_response(request)

File "/usr/local/lib/python3.6/site-packages/django/core/handlers/base.py" in _get_response
  115.                 response = self.process_exception_by_middleware(e, request)

File "/usr/local/lib/python3.6/site-packages/django/core/handlers/base.py" in _get_response
  113.                 response = wrapped_callback(request, *callback_args, **callback_kwargs)

File "/usr/local/lib/python3.6/site-packages/django/views/decorators/cache.py" in _wrapped_view_func
  44.         response = view_func(request, *args, **kwargs)

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/urls/__init__.py" in wrapper
  102.             return view_func(request, *args, **kwargs)

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/decorators.py" in decorated_view
  34.             return view_func(request, *args, **kwargs)

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/views/pages.py" in edit
  545.         'has_unsaved_changes': has_unsaved_changes,

File "/usr/local/lib/python3.6/site-packages/django/shortcuts.py" in render
  36.     content = loader.render_to_string(template_name, context, request, using=using)

File "/usr/local/lib/python3.6/site-packages/django/template/loader.py" in render_to_string
  62.     return template.render(context, request)

File "/usr/local/lib/python3.6/site-packages/django/template/backends/django.py" in render
  61.             return self.template.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  171.                     return self._render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _render
  163.         return self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/loader_tags.py" in render
  150.             return compiled_parent._render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _render
  163.         return self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/loader_tags.py" in render
  150.             return compiled_parent._render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _render
  163.         return self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/loader_tags.py" in render
  150.             return compiled_parent._render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _render
  163.         return self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/loader_tags.py" in render
  62.                 result = block.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/loader_tags.py" in render
  62.                 result = block.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  987.             output = self.filter_expression.resolve(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in resolve
  671.                 obj = self.var.resolve(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in resolve
  796.             value = self._resolve_lookup(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _resolve_lookup
  858.                             current = current()

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/edit_handlers.py" in render_form_content
  252.         return mark_safe(self.render_as_object() + self.render_missing_fields())

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/edit_handlers.py" in render_as_object
  220.         return self.render()

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/edit_handlers.py" in render
  324.             'self': self

File "/usr/local/lib/python3.6/site-packages/django/template/loader.py" in render_to_string
  62.     return template.render(context, request)

File "/usr/local/lib/python3.6/site-packages/django/template/backends/django.py" in render
  61.             return self.template.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  171.                     return self._render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _render
  163.         return self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/defaulttags.py" in render
  209.                     nodelist.append(node.render_annotated(context))

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  987.             output = self.filter_expression.resolve(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in resolve
  671.                 obj = self.var.resolve(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in resolve
  796.             value = self._resolve_lookup(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _resolve_lookup
  858.                             current = current()

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/edit_handlers.py" in render_as_object
  220.         return self.render()

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/edit_handlers.py" in render
  324.             'self': self

File "/usr/local/lib/python3.6/site-packages/django/template/loader.py" in render_to_string
  62.     return template.render(context, request)

File "/usr/local/lib/python3.6/site-packages/django/template/backends/django.py" in render
  61.             return self.template.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  171.                     return self._render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _render
  163.         return self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/defaulttags.py" in render
  209.                     nodelist.append(node.render_annotated(context))

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  987.             output = self.filter_expression.resolve(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in resolve
  671.                 obj = self.var.resolve(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in resolve
  796.             value = self._resolve_lookup(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _resolve_lookup
  858.                             current = current()

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/edit_handlers.py" in render_as_object
  482.             'field': self.bound_field,

File "/usr/local/lib/python3.6/site-packages/django/template/loader.py" in render_to_string
  62.     return template.render(context, request)

File "/usr/local/lib/python3.6/site-packages/django/template/backends/django.py" in render
  61.             return self.template.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  171.                     return self._render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _render
  163.         return self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/loader_tags.py" in render
  188.             return template.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  173.                 return self._render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in _render
  163.         return self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/loader_tags.py" in render
  53.                 result = self.nodelist.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  937.                 bit = node.render_annotated(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render_annotated
  904.             return self.render(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in render
  987.             output = self.filter_expression.resolve(context)

File "/usr/local/lib/python3.6/site-packages/django/template/base.py" in resolve
  698.                 new_obj = func(obj, *arg_vals)

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/templatetags/wagtailadmin_tags.py" in render_with_errors
  251.         return bound_field.as_widget()

File "/usr/local/lib/python3.6/site-packages/django/forms/boundfield.py" in as_widget
  93.             renderer=self.form.renderer,

File "/usr/local/lib/python3.6/site-packages/django/forms/widgets.py" in render
  241.         context = self.get_context(name, value, attrs)

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/rich_text/editors/draftail/__init__.py" in get_context
  62.         context = super().get_context(name, value, attrs)

File "/usr/local/lib/python3.6/site-packages/django/forms/widgets.py" in get_context
  293.         context = super().get_context(name, value, attrs)

File "/usr/local/lib/python3.6/site-packages/django/forms/widgets.py" in get_context
  233.             'value': self.format_value(value),

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/rich_text/editors/draftail/__init__.py" in format_value
  59.         return self.converter.from_database_format(value)

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/rich_text/converters/contentstate.py" in from_database_format
  84.         self.html_to_contentstate_handler.feed(html)

File "/usr/local/lib/python3.6/html/parser.py" in feed
  111.         self.goahead(0)

File "/usr/local/lib/python3.6/html/parser.py" in goahead
  173.                     k = self.parse_endtag(i)

File "/usr/local/lib/python3.6/html/parser.py" in parse_endtag
  421.         self.handle_endtag(elem.lower())

File "/usr/local/lib/python3.6/site-packages/wagtail/admin/rich_text/converters/html_to_contentstate.py" in handle_endtag
  313.         assert name == expected_name, "Unmatched tags: expected %s, got %s" % (expected_name, name)

Exception Type: AssertionError at /admin/pages/2251/edit/
Exception Value: Unmatched tags: expected img, got p 

问题是 Wagtail 的解析器不能容忍格式错误的 HTML,而您从 Drupal 导入的 HTML 格式错误(例如,它包含未闭合的标签)。 Wagtail 编辑器与此中断有点烦人,因为您无法从编辑器中修复内容。

我过去所做的就是修复 shell 中的错误 HTML,使用像 Beautiful Soup 这样的 HTML 解析器。沿着这些线的东西:

from bs4 import BeautifulSoup

for page in NewsPage.objects.all():
    html = page.body
    soup = BeautifulSoup(html, 'html5lib')
    cleaned_html = soup.body.decode_contents()
    page.body = cleaned_html
    page.save()

decode_contents() 将生成 Wagtail 的解析器不会抱怨的有效 HTML。

我最终成功的解决方案如下:

from bs4 import BeautifulSoup

for page in NewsPage.objects.all():
    html = page.body
    soup = BeautifulSoup(html, "html.parser")
    if soup.find("img"):
        soup.img.decompose()

    page.body = soup
    page.save_revision().publish()