Elasticsearch:点击标签的搜索结果
Elasticsearch : search results on clicking on Hashtag
我有一个带有驼峰式标签的标签,例如#teamIndia。现在,当单击此主题标签时,它应该获取其中包含“#teamIndia”的所有结果,它应该首先显示带有“#teamIndia”的结果,然后是 "teamIndia" 的结果,然后是 "team India",然后是 "team" 或 "India" 等等。
我在做什么:
搜索文字:
“#teamIndia”、“#NEWYORK”、“#profession”、“#2016”
POST /clip
{
"settings": {
"analysis": {
"char_filter" : {
"space_hashtags" : {
"type" : "mapping",
"mappings" : ["#=>|#"]
}
},
"filter": {
"substring": {
"max_gram": "20",
"type": "nGram",
"min_gram": "1",
"token_chars": [
"whitespace"
]
},
"camelcase": {
"type": "word_delimiter",
"type_table": ["# => ALPHANUM", "@ => ALPHANUM"]
},
"stopword": {
"type": "stop",
"stopwords": ["and", "is", "the"]
}
},
"analyzer": {
"substring_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "standard"
},
"camelcase_analyzer": {
"type" : "custom",
"char_filter" : "space_hashtags",
"tokenizer" : "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"description": {
"type": "string",
"analyzer": "substring_analyzer",
"search_analyzer": "standard"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer"
}
}
},
....
}
}
}
}
文档示例:-
POST /clip/Clip/2 {"id" : 1, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/3 {"id" : 2, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/3 {"id" : 2, "description" : "Know how a software engineer surprised his wife! <a href="search/clips?q=%23theProvider&source=hashtag" ng-click="handleModalClick()"> #theProvider </a> rioOlympic <a href="search/clips?q=%23DUBAI&source=hashtag" ng-click="handleModalClick()"> #DUBAI </a> <a href="search/clips?q=%23TheBestAndTheBeast&source=hashtag" ng-click="handleModalClick()"> #TheBestAndTheBeast </a> <a href="search/clips?q=%23rioOlympic&source=hashtag" ng-click="handleModalClick()"> #rioOlympic </a>"}
** 搜索查询 **
GET clip/_search
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must":
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "teamIndia"
}
},
"should": {
"match":
{ "description.raw": "#teamIndia"}
}
}
}
}
}
}
异常结果:
"#teamIndia",
"teamIndia",
"team India",
"team",
"India",
与其他测试关键字类似。
原始 post 中的查询无法按预期工作的原因之一是 description.raw
是 not_analyzed
。
结果 #teamIndia
永远不会匹配带有 description: "Animals and Pets and #teamIndia"
的文档,因为 description.raw
将包含
未分析的术语 Animals and Pets and #teamIndia
而不是 #teamIndia
假设您拥有的文档类似于 OP 中的第二个示例。
示例:
{"id" : 2, "description" : "Animals and Pets and #teamIndia"}
或
{"id":7,"description":"This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"}
那么您应该能够按以下顺序对文档进行排名:
1) 包含“#teamIndia”的描述,
2) 包含 "teamIndia"
的描述
3) 包含"team India"
的描述
4) 描述包含 "India"
通过在 wordlimiter 过滤器中启用 preserve_orginal
和 catenate_words
,如下例所示
示例:
索引文件
PUT clip
{
"settings": {
"analysis": {
"char_filter": {
"zwsp_normalize": {
"type": "mapping",
"mappings": [
"\u200B=>",
"\u200C=>",
"\u200D=>"
]
},
"html_decoder": {
"type": "mapping",
"mappings": [
"<=> <",
">=> >"
]
}
},
"filter": {
"camelcase": {
"type": "word_delimiter",
"preserve_original": "true",
"catenate_all": "true"
},
"stopword": {
"type": "stop",
"stopwords": [
"and",
"is",
"the"
]
}
},
"analyzer": {
"camelcase_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
],
"char_filter": [
"zwsp_normalize",
"html_decoder",
"html_strip"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer",
"norms": {
"enabled": false
}
}
}
}
}
}
}
}
POST /clip/Clip/1
{
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
POST /clip/Clip/2
{
"id": 2,
"description": "Animals and Pets and teamIndia"
}
POST /clip/Clip/3
{
"id": 3,
"description": "Animals and Pets and team India"
}
POST /clip/Clip/4
{
"id": 4,
"description": "Animals and Pets and India"
}
POST /clip/Clip/7
{
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
查询结果:
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#teamIndia"
}
}
]
}
}
}
}
}
结果:
"hits": {
"total": 5,
"max_score": 1.4969246,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 1.4969246,
"_source": {
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "1",
"_score": 1.4969246,
"_source": {
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "2",
"_score": 1.0952718,
"_source": {
"id": 2,
"description": "Animals and Pets and teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "3",
"_score": 0.5207714,
"_source": {
"id": 3,
"description": "Animals and Pets and team India"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "4",
"_score": 0.11123338,
"_source": {
"id": 4,
"description": "Animals and Pets and India"
}
}
]
}
示例#dubai:
POST /clip/Clip/5
{
"id": 5,
"description": "#dubai is hot"
}
POST /clip/Clip/6
{
"id": 6,
"description": "dubai airport is huge"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#dubai"
}
}
]
}
}
}
}
}
"hits": {
"total": 2,
"max_score": 1.820827,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "5",
"_score": 1.820827,
"_source": {
"id": 5,
"description": "#dubai is hot"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "6",
"_score": 0.5856731,
"_source": {
"id": 6,
"description": "dubai airport is huge"
}
}
]
}
示例#professionalAndPunctual :
POST /clip/Clip/7
{
"id": 7,
"description": "professionalAndPunctual"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#professionalAndPunctual"
}
}
]
}
}
}
}
}
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 2.2149992,
"_source": {
"id": 7,
"description": "professionalAndPunctual"
}
}
]
已编辑示例
示例:#TheBestAndTheBea st
POST /clip/Clip/10
{"id" : 10, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/11
{"id" :11, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/12
{"id" : 12, "description" : "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#TheBestAndTheBeast"
}
}
]
}
}
}
}
}
#结果
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "12",
"_score": 1.8701664,
"_source": {
"id": 12,
"description": "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "10",
"_score": 0.9263139,
"_source": {
"id": 10,
"description": "TheBestAndTheBeast"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "11",
"_score": 0.9263139,
"_source": {
"id": 11,
"description": "bikes in DUBAI TheBestAndTheBeast profession"
}
}
]
分析器示例 :
get clip/_analyze?analyzer=camelcase_analyzer&text=%23DUBAI
{
"tokens": [
{
"token": "#dubai",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 0
},
{
"token": "dubai",
"start_offset": 1,
"end_offset": 6,
"type": "word",
"position": 0
}
]
}
get clip/_analyze?analyzer=camelcase_analyzer&text=This%20%26lt%3Ba%20href%3D%26quot%3Bsearch%2Fclips%3Fq%3D%2523teamIndia%26amp%3Bsource%3Dhashtag%26quot%3B%26gt%3B%23teamIndia%26lt%3B%2Fa%26gt%3B
{
"tokens": [
{
"token": "this",
"start_offset": 0,
"end_offset": 4,
"type": "word",
"position": 0
},
{
"token": "#teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 1
},
{
"token": "india",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "team",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
}
]
}
我有一个带有驼峰式标签的标签,例如#teamIndia。现在,当单击此主题标签时,它应该获取其中包含“#teamIndia”的所有结果,它应该首先显示带有“#teamIndia”的结果,然后是 "teamIndia" 的结果,然后是 "team India",然后是 "team" 或 "India" 等等。
我在做什么:
搜索文字: “#teamIndia”、“#NEWYORK”、“#profession”、“#2016”
POST /clip
{
"settings": {
"analysis": {
"char_filter" : {
"space_hashtags" : {
"type" : "mapping",
"mappings" : ["#=>|#"]
}
},
"filter": {
"substring": {
"max_gram": "20",
"type": "nGram",
"min_gram": "1",
"token_chars": [
"whitespace"
]
},
"camelcase": {
"type": "word_delimiter",
"type_table": ["# => ALPHANUM", "@ => ALPHANUM"]
},
"stopword": {
"type": "stop",
"stopwords": ["and", "is", "the"]
}
},
"analyzer": {
"substring_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "standard"
},
"camelcase_analyzer": {
"type" : "custom",
"char_filter" : "space_hashtags",
"tokenizer" : "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"description": {
"type": "string",
"analyzer": "substring_analyzer",
"search_analyzer": "standard"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer"
}
}
},
....
}
}
}
}
文档示例:-
POST /clip/Clip/2 {"id" : 1, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/3 {"id" : 2, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/3 {"id" : 2, "description" : "Know how a software engineer surprised his wife! <a href="search/clips?q=%23theProvider&source=hashtag" ng-click="handleModalClick()"> #theProvider </a> rioOlympic <a href="search/clips?q=%23DUBAI&source=hashtag" ng-click="handleModalClick()"> #DUBAI </a> <a href="search/clips?q=%23TheBestAndTheBeast&source=hashtag" ng-click="handleModalClick()"> #TheBestAndTheBeast </a> <a href="search/clips?q=%23rioOlympic&source=hashtag" ng-click="handleModalClick()"> #rioOlympic </a>"}
** 搜索查询 **
GET clip/_search
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must":
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "teamIndia"
}
},
"should": {
"match":
{ "description.raw": "#teamIndia"}
}
}
}
}
}
}
异常结果: "#teamIndia", "teamIndia", "team India", "team", "India",
与其他测试关键字类似。
原始 post 中的查询无法按预期工作的原因之一是 description.raw
是 not_analyzed
。
结果 #teamIndia
永远不会匹配带有 description: "Animals and Pets and #teamIndia"
的文档,因为 description.raw
将包含
未分析的术语 Animals and Pets and #teamIndia
而不是 #teamIndia
假设您拥有的文档类似于 OP 中的第二个示例。
示例:
{"id" : 2, "description" : "Animals and Pets and #teamIndia"}
或
{"id":7,"description":"This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"}
那么您应该能够按以下顺序对文档进行排名:
1) 包含“#teamIndia”的描述,
2) 包含 "teamIndia"
的描述
3) 包含"team India"
的描述
4) 描述包含 "India"
通过在 wordlimiter 过滤器中启用 preserve_orginal
和 catenate_words
,如下例所示
示例:
索引文件
PUT clip
{
"settings": {
"analysis": {
"char_filter": {
"zwsp_normalize": {
"type": "mapping",
"mappings": [
"\u200B=>",
"\u200C=>",
"\u200D=>"
]
},
"html_decoder": {
"type": "mapping",
"mappings": [
"<=> <",
">=> >"
]
}
},
"filter": {
"camelcase": {
"type": "word_delimiter",
"preserve_original": "true",
"catenate_all": "true"
},
"stopword": {
"type": "stop",
"stopwords": [
"and",
"is",
"the"
]
}
},
"analyzer": {
"camelcase_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
],
"char_filter": [
"zwsp_normalize",
"html_decoder",
"html_strip"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer",
"norms": {
"enabled": false
}
}
}
}
}
}
}
}
POST /clip/Clip/1
{
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
POST /clip/Clip/2
{
"id": 2,
"description": "Animals and Pets and teamIndia"
}
POST /clip/Clip/3
{
"id": 3,
"description": "Animals and Pets and team India"
}
POST /clip/Clip/4
{
"id": 4,
"description": "Animals and Pets and India"
}
POST /clip/Clip/7
{
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
查询结果:
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#teamIndia"
}
}
]
}
}
}
}
}
结果:
"hits": {
"total": 5,
"max_score": 1.4969246,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 1.4969246,
"_source": {
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "1",
"_score": 1.4969246,
"_source": {
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "2",
"_score": 1.0952718,
"_source": {
"id": 2,
"description": "Animals and Pets and teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "3",
"_score": 0.5207714,
"_source": {
"id": 3,
"description": "Animals and Pets and team India"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "4",
"_score": 0.11123338,
"_source": {
"id": 4,
"description": "Animals and Pets and India"
}
}
]
}
示例#dubai:
POST /clip/Clip/5
{
"id": 5,
"description": "#dubai is hot"
}
POST /clip/Clip/6
{
"id": 6,
"description": "dubai airport is huge"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#dubai"
}
}
]
}
}
}
}
}
"hits": {
"total": 2,
"max_score": 1.820827,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "5",
"_score": 1.820827,
"_source": {
"id": 5,
"description": "#dubai is hot"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "6",
"_score": 0.5856731,
"_source": {
"id": 6,
"description": "dubai airport is huge"
}
}
]
}
示例#professionalAndPunctual :
POST /clip/Clip/7
{
"id": 7,
"description": "professionalAndPunctual"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#professionalAndPunctual"
}
}
]
}
}
}
}
}
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 2.2149992,
"_source": {
"id": 7,
"description": "professionalAndPunctual"
}
}
]
已编辑示例
示例:#TheBestAndTheBea st
POST /clip/Clip/10
{"id" : 10, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/11
{"id" :11, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/12
{"id" : 12, "description" : "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#TheBestAndTheBeast"
}
}
]
}
}
}
}
}
#结果
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "12",
"_score": 1.8701664,
"_source": {
"id": 12,
"description": "Know how a software engineer surprised his wife! <a href=\"search/clips?q=%23theProvider&source=hashtag\" ng-click=\"handleModalClick()\"> #theProvider </a> rioOlympic <a href=\"search/clips?q=%23DUBAI&source=hashtag\" ng-click=\"handleModalClick()\"> #DUBAI </a> <a href=\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\" ng-click=\"handleModalClick()\"> #TheBestAndTheBeast </a> <a href=\"search/clips?q=%23rioOlympic&source=hashtag\" ng-click=\"handleModalClick()\"> #rioOlympic </a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "10",
"_score": 0.9263139,
"_source": {
"id": 10,
"description": "TheBestAndTheBeast"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "11",
"_score": 0.9263139,
"_source": {
"id": 11,
"description": "bikes in DUBAI TheBestAndTheBeast profession"
}
}
]
分析器示例 :
get clip/_analyze?analyzer=camelcase_analyzer&text=%23DUBAI
{
"tokens": [
{
"token": "#dubai",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 0
},
{
"token": "dubai",
"start_offset": 1,
"end_offset": 6,
"type": "word",
"position": 0
}
]
}
get clip/_analyze?analyzer=camelcase_analyzer&text=This%20%26lt%3Ba%20href%3D%26quot%3Bsearch%2Fclips%3Fq%3D%2523teamIndia%26amp%3Bsource%3Dhashtag%26quot%3B%26gt%3B%23teamIndia%26lt%3B%2Fa%26gt%3B
{
"tokens": [
{
"token": "this",
"start_offset": 0,
"end_offset": 4,
"type": "word",
"position": 0
},
{
"token": "#teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 1
},
{
"token": "india",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "team",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
}
]
}