搜索多个单词 elasticsearch haystack
Search for multiple words elasticsearch haystack
我用过 django、haystack 和 elasticsearch。
我的search_index.py:
from haystack import indexes
from models import Advertisement
class AdvertisementIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
make = indexes.CharField()
section = indexes.CharField()
subcategory = indexes.CharField()
content = indexes.CharField(model_attr='content')
images = indexes.CharField(model_attr='images')
def get_model(self):
return Advertisement
def index_queryset(self, using=None):
return self.get_model().objects.filter(is_published=True).select_related('make').select_related('section').select_related('subcategory')
搜索表单:
<form action="/search" method="get">
<input type="text-search" name="q">
<input type="submit" value="">
</form>
模板:
{% block content %}
{% for result in page.object_list %}
<p>{{ result.object.title }}</p>
<p>{{ result.object.content }}</p>
<p>{{ result.object.images }}</p>
<p>{{ result.object.make }}</p>
<p>{{ result.object.section }}</p>
<p>{{ result.object.subcategory }}</p>
{% empty %}
<p>No result.</p>
{% endfor %}
{% endblock %}
正在查看curl -XGET "http://localhost:9200/_search?q=fender+boss"
我得到了 "boss" 和 "fender"
的所有值
当您在搜索框中输入 "boss fender" 时,我没有得到任何结果。从搜索表单中,我可以得到只有一个单词的结果,例如 "boss"。
如何实现多词搜索?
这个月我陷入了这个问题。
为了执行正确的查询,您需要覆盖一些 haystack 对象。我发现这篇文章非常有用 Extending Haystack’s Elasticsearch backend。一开始相当复杂,但一旦了解它是如何工作的......它就有效了:-)
博文教了如何实现elasticsearch的嵌套查询...嗯...我实现了一个基本的multi_match query.
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
ElasticsearchSearchBackend, ElasticsearchSearchEngine, ElasticsearchSearchQuery)
from haystack.query import SearchQuerySet
class ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend):
DEFAULT_ANALYZER = "snowball"
def __init__(self, connection_alias, **connection_options):
super(ElasticsearchEngineBackendCustom, self).__init__(connection_alias, **connection_options)
user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', {})
if user_settings:
setattr(self, 'DEFAULT_SETTINGS', user_settings)
user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', '')
if user_analyzer:
setattr(self, 'DEFAULT_ANALYZER', user_analyzer)
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
fields='', highlight=False, facets=None,
date_facets=None, query_facets=None,
narrow_queries=None, spelling_query=None,
within=None, dwithin=None, distance_point=None,
models=None, limit_to_registered_models=None,
result_class=None, multi_match=None):
out = super(ElasticsearchEngineBackendCustom, self).build_search_kwargs(query_string, sort_by, start_offset,
end_offset,
fields, highlight, facets,
date_facets, query_facets,
narrow_queries, spelling_query,
within, dwithin, distance_point,
models, limit_to_registered_models,
result_class)
if multi_match:
out['query'] = {
'multi_match': {
'query': multi_match['query'],
'fields': multi_match['fields'],
'tie_breaker': multi_match['tie_breaker'],
'minimum_should_match': multi_match['minimum_should_match'],
}
}
return out
def build_schema(self, fields):
content_field_name, mapping = super(ElasticsearchEngineBackendCustom, self).build_schema(fields)
for field_name, field_class in fields.items():
field_mapping = mapping[field_class.index_fieldname]
if field_mapping['type'] == 'string' and field_class.indexed:
if not hasattr(field_class, 'facet_for') or field_class.field_type in ('ngram', 'edge_ngram'):
field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)
mapping.update({field_class.index_fieldname: field_mapping})
return content_field_name, mapping
def multi_match_run(self, query, fields, minimum_should_match, tie_breaker):
from elasticsearch_dsl import Search
from elasticsearch_dsl.query import MultiMatch
raw = Search().using(self.conn).query(
MultiMatch(query=u'{}'.format(query), fields=fields, minimum_should_match=minimum_should_match, tie_breaker=tie_breaker)
).execute()
return self._process_results(raw)
class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery):
def multi_match(self, query, fields, minimum_should_match, tie_breaker):
results = self.backend.multi_match_run(query, fields, minimum_should_match, tie_breaker)
self._results = results.get('results', [])
self._hit_count = results.get('hits', 0)
def add_multi_match_query(self, query, fields, minimum_should_match, tie_breaker):
self.multi_match_query = {
'query': query,
'fields': fields,
'minimum_should_match': minimum_should_match,
'tie_breaker': tie_breaker
}
def build_params(self, spelling_query=None, **kwargs):
search_kwargs = super(ElasticsearchSearchQueryCustom, self).build_params(spelling_query, **kwargs)
if self.multi_match_query:
search_kwargs['multi_match'] = self.multi_match_query
return search_kwargs
class ElasticsearchSearchQuerySetCustom(SearchQuerySet):
def multi_match(self, query, fields, minimum_should_match="35%", tie_breaker=0.3):
clone = self._clone()
clone.query.add_multi_match_query(query, fields, minimum_should_match, tie_breaker)
clone.query.multi_match(query, fields, minimum_should_match, tie_breaker)
return clone
class ElasticsearchEngineCustom(ElasticsearchSearchEngine):
backend = ElasticsearchEngineBackendCustom
query = ElasticsearchSearchQueryCustom
如您所见,我使用 elasticsearc-dsl
来执行查询 (MultiMatch) 并且这句话总结了博客 post:ElasticsearchSearchQuerySetCustom().multi_match(...)
调用取决于 ElasticsearchSearchQueryCustom
这取决于在 ElasticsearchEngineBackendCustom
.
然后将elasticsearch配置放入你的设置中,例如:
ELASTICSEARCH_DEFAULT_ANALYZER = 'italian'
ELASTICSEARCH_INDEX_SETTINGS = {
"settings": {[...]}
}
您可以从 Language Analyzers
获取 ELASTICSEARCH_INDEX_SETTINGS
的语言
您还需要覆盖 SearchForm
:
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from haystack.forms import SearchForm
from .backend import ElasticsearchSearchQuerySetCustom
class SearchFormCustom(SearchForm):
def search(self):
query = self.searchqueryset.query.clean(self.cleaned_data.get('q'))
if not self.is_valid() or not query:
return self.no_query_found()
sqs = ElasticsearchSearchQuerySetCustom().multi_match(query, ['title^8', 'text^0.5'])
return sqs
字段 title
和 text
必须在您的索引中,脱字符用于对字段执行提升。
您需要覆盖 haystack url 模式才能使用自定义表单:
urlpatterns = patterns(
'search.views',
url('^$', search_view_factory(form_class=SearchFormCustom), name='haystack-search'),
)
就是这样,HTH :-)
注意不要使用result.object.something
,而是使用索引中的字段,例如result.tilte
,因为result.object.tilte
命中数据库!参见 Haystack Best Practices
我用过 django、haystack 和 elasticsearch。
我的search_index.py:
from haystack import indexes
from models import Advertisement
class AdvertisementIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
make = indexes.CharField()
section = indexes.CharField()
subcategory = indexes.CharField()
content = indexes.CharField(model_attr='content')
images = indexes.CharField(model_attr='images')
def get_model(self):
return Advertisement
def index_queryset(self, using=None):
return self.get_model().objects.filter(is_published=True).select_related('make').select_related('section').select_related('subcategory')
搜索表单:
<form action="/search" method="get">
<input type="text-search" name="q">
<input type="submit" value="">
</form>
模板:
{% block content %}
{% for result in page.object_list %}
<p>{{ result.object.title }}</p>
<p>{{ result.object.content }}</p>
<p>{{ result.object.images }}</p>
<p>{{ result.object.make }}</p>
<p>{{ result.object.section }}</p>
<p>{{ result.object.subcategory }}</p>
{% empty %}
<p>No result.</p>
{% endfor %}
{% endblock %}
正在查看curl -XGET "http://localhost:9200/_search?q=fender+boss"
我得到了 "boss" 和 "fender"
当您在搜索框中输入 "boss fender" 时,我没有得到任何结果。从搜索表单中,我可以得到只有一个单词的结果,例如 "boss"。 如何实现多词搜索?
这个月我陷入了这个问题。
为了执行正确的查询,您需要覆盖一些 haystack 对象。我发现这篇文章非常有用 Extending Haystack’s Elasticsearch backend。一开始相当复杂,但一旦了解它是如何工作的......它就有效了:-)
博文教了如何实现elasticsearch的嵌套查询...嗯...我实现了一个基本的multi_match query.
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
ElasticsearchSearchBackend, ElasticsearchSearchEngine, ElasticsearchSearchQuery)
from haystack.query import SearchQuerySet
class ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend):
DEFAULT_ANALYZER = "snowball"
def __init__(self, connection_alias, **connection_options):
super(ElasticsearchEngineBackendCustom, self).__init__(connection_alias, **connection_options)
user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', {})
if user_settings:
setattr(self, 'DEFAULT_SETTINGS', user_settings)
user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', '')
if user_analyzer:
setattr(self, 'DEFAULT_ANALYZER', user_analyzer)
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
fields='', highlight=False, facets=None,
date_facets=None, query_facets=None,
narrow_queries=None, spelling_query=None,
within=None, dwithin=None, distance_point=None,
models=None, limit_to_registered_models=None,
result_class=None, multi_match=None):
out = super(ElasticsearchEngineBackendCustom, self).build_search_kwargs(query_string, sort_by, start_offset,
end_offset,
fields, highlight, facets,
date_facets, query_facets,
narrow_queries, spelling_query,
within, dwithin, distance_point,
models, limit_to_registered_models,
result_class)
if multi_match:
out['query'] = {
'multi_match': {
'query': multi_match['query'],
'fields': multi_match['fields'],
'tie_breaker': multi_match['tie_breaker'],
'minimum_should_match': multi_match['minimum_should_match'],
}
}
return out
def build_schema(self, fields):
content_field_name, mapping = super(ElasticsearchEngineBackendCustom, self).build_schema(fields)
for field_name, field_class in fields.items():
field_mapping = mapping[field_class.index_fieldname]
if field_mapping['type'] == 'string' and field_class.indexed:
if not hasattr(field_class, 'facet_for') or field_class.field_type in ('ngram', 'edge_ngram'):
field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)
mapping.update({field_class.index_fieldname: field_mapping})
return content_field_name, mapping
def multi_match_run(self, query, fields, minimum_should_match, tie_breaker):
from elasticsearch_dsl import Search
from elasticsearch_dsl.query import MultiMatch
raw = Search().using(self.conn).query(
MultiMatch(query=u'{}'.format(query), fields=fields, minimum_should_match=minimum_should_match, tie_breaker=tie_breaker)
).execute()
return self._process_results(raw)
class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery):
def multi_match(self, query, fields, minimum_should_match, tie_breaker):
results = self.backend.multi_match_run(query, fields, minimum_should_match, tie_breaker)
self._results = results.get('results', [])
self._hit_count = results.get('hits', 0)
def add_multi_match_query(self, query, fields, minimum_should_match, tie_breaker):
self.multi_match_query = {
'query': query,
'fields': fields,
'minimum_should_match': minimum_should_match,
'tie_breaker': tie_breaker
}
def build_params(self, spelling_query=None, **kwargs):
search_kwargs = super(ElasticsearchSearchQueryCustom, self).build_params(spelling_query, **kwargs)
if self.multi_match_query:
search_kwargs['multi_match'] = self.multi_match_query
return search_kwargs
class ElasticsearchSearchQuerySetCustom(SearchQuerySet):
def multi_match(self, query, fields, minimum_should_match="35%", tie_breaker=0.3):
clone = self._clone()
clone.query.add_multi_match_query(query, fields, minimum_should_match, tie_breaker)
clone.query.multi_match(query, fields, minimum_should_match, tie_breaker)
return clone
class ElasticsearchEngineCustom(ElasticsearchSearchEngine):
backend = ElasticsearchEngineBackendCustom
query = ElasticsearchSearchQueryCustom
如您所见,我使用 elasticsearc-dsl
来执行查询 (MultiMatch) 并且这句话总结了博客 post:ElasticsearchSearchQuerySetCustom().multi_match(...)
调用取决于 ElasticsearchSearchQueryCustom
这取决于在 ElasticsearchEngineBackendCustom
.
然后将elasticsearch配置放入你的设置中,例如:
ELASTICSEARCH_DEFAULT_ANALYZER = 'italian'
ELASTICSEARCH_INDEX_SETTINGS = {
"settings": {[...]}
}
您可以从 Language Analyzers
获取ELASTICSEARCH_INDEX_SETTINGS
的语言
您还需要覆盖 SearchForm
:
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from haystack.forms import SearchForm
from .backend import ElasticsearchSearchQuerySetCustom
class SearchFormCustom(SearchForm):
def search(self):
query = self.searchqueryset.query.clean(self.cleaned_data.get('q'))
if not self.is_valid() or not query:
return self.no_query_found()
sqs = ElasticsearchSearchQuerySetCustom().multi_match(query, ['title^8', 'text^0.5'])
return sqs
字段 title
和 text
必须在您的索引中,脱字符用于对字段执行提升。
您需要覆盖 haystack url 模式才能使用自定义表单:
urlpatterns = patterns(
'search.views',
url('^$', search_view_factory(form_class=SearchFormCustom), name='haystack-search'),
)
就是这样,HTH :-)
注意不要使用result.object.something
,而是使用索引中的字段,例如result.tilte
,因为result.object.tilte
命中数据库!参见 Haystack Best Practices