应用 analyzer/filter 时在 elasticsearch 错误中搜索
search in elasticsearch errors when applying analyzer/filter
我下载了包含技能分类的 onet 数据集,并将其上传到 elasticsearch。在技能分类法中,有一些技能,如 C++、.net、C#。我想给c#而在技能上只获得c#。通过检查一些 links,我设置了索引的映射和设置,如下所示。
{
"onnet_taxonomy": {
"mappings": {
"text": {
"properties": {
"Occupation": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Skill": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Skill Type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"keywords": {
"properties": {
"Occupation": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Skill": {
"type": "text",
"fields": {
"analyzed": {
"type": "text",
"analyzer": "analyzer_keyword",
"search_analyzer": "analyzer_shingle"
},
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Skill Type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "onnet_taxonomy",
"creation_date": "1583114276039",
"analysis": {
"filter": {
"my_shingle_filter": {
"max_shingle_size": "8",
"min_shingle_size": "2",
"output_unigrams": "true",
"type": "shingle"
}
},
"analyzer": {
"analyzer_keyword": {
"filter": [
"lowercase"
],
"char_filter": [
"code_mapping"
],
"type": "custom",
"tokenizer": "keyword"
},
"analyzer_shingle": {
"filter": [
"lowercase",
"my_shingle_filter"
],
"char_filter": [
"code_mapping"
],
"tokenizer": "standard"
}
},
"char_filter": {
"code_mapping": {
"type": "mapping",
"mappings": [
"++ => plusplus",
"c# => csharp",
"C# => csharp",
"F# => fsharp",
"f# => fsharp",
".net => dotnet",
".Net => dotnet",
".NET => dotnet",
"( => map_lp",
") => map_rp",
"& => and",
"# => hash",
"+ => plus"
]
}
}
},
"number_of_replicas": "1",
"uuid": "LNf2frW1S8WmHSOJWVrvLA",
"version": {
"created": "5030399"
}
}
}
}
}
当我使用如下查询时
{
"query": {
"bool": {
"must": [
{
"match": {
"Skill": "c++"
}
}
]
}
},
"size": 10
我正在学习所有 'c'
的技能
假设应用了分析器,当我使用如下查询时
{
"query": {
"bool": {
"must": [
{
"match": {
"Skill.analyzed": "c++"
}
}
]
}
},
"size": 10
}
我得到空输出。我是否正确包含了分析器,或者我的查询有误?
我只是简化了您的问题,为了简单起见,我们假设您只有 1 个名为 title
的字段,其中包含不同的语言,例如 c
、c++
、c#
f#
。
此 title
字段的索引设置和映射。
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"my_analyzer": {
"filter": [
"lowercase"
],
"char_filter": [
"code_mapping"
],
"tokenizer": "standard" --> notice `standard`
}
},
"char_filter": {
"code_mapping": {
"type": "mapping",
"mappings": [
"++ => plusplus",
"c# => csharp",
"C# => csharp",
"F# => fsharp",
"f# => fsharp",
".net => dotnet",
".Net => dotnet",
".NET => dotnet",
"( => map_lp",
") => map_rp",
"& => and",
"# => hash",
"+ => plus"
]
}
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "my_analyzer" --> using custom analyzer created in settings
}
}
}
}
索引一些文档
POST /_doc/{doc-is}
{
"title": "c#"
}
{
"title": "c++"
}
{
"title": "c"
}
{
"title": "F#"
}
搜索查询,在您的问题中提供给您,它获取包含 c
.
的所有记录
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "c++"
}
}
]
}
},
"size": 10
}
现在对我来说,它只返回仅包含 c++
的文档,如我的搜索 API 结果所示。
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.9808292,
"hits": [
{
"_index": "cplus",
"_type": "_doc",
"_id": "1",
"_score": 0.9808292,
"_source": {
"title": "c++"
}
}
]
}
我下载了包含技能分类的 onet 数据集,并将其上传到 elasticsearch。在技能分类法中,有一些技能,如 C++、.net、C#。我想给c#而在技能上只获得c#。通过检查一些 links,我设置了索引的映射和设置,如下所示。
{
"onnet_taxonomy": {
"mappings": {
"text": {
"properties": {
"Occupation": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Skill": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Skill Type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"keywords": {
"properties": {
"Occupation": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Skill": {
"type": "text",
"fields": {
"analyzed": {
"type": "text",
"analyzer": "analyzer_keyword",
"search_analyzer": "analyzer_shingle"
},
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Skill Type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "onnet_taxonomy",
"creation_date": "1583114276039",
"analysis": {
"filter": {
"my_shingle_filter": {
"max_shingle_size": "8",
"min_shingle_size": "2",
"output_unigrams": "true",
"type": "shingle"
}
},
"analyzer": {
"analyzer_keyword": {
"filter": [
"lowercase"
],
"char_filter": [
"code_mapping"
],
"type": "custom",
"tokenizer": "keyword"
},
"analyzer_shingle": {
"filter": [
"lowercase",
"my_shingle_filter"
],
"char_filter": [
"code_mapping"
],
"tokenizer": "standard"
}
},
"char_filter": {
"code_mapping": {
"type": "mapping",
"mappings": [
"++ => plusplus",
"c# => csharp",
"C# => csharp",
"F# => fsharp",
"f# => fsharp",
".net => dotnet",
".Net => dotnet",
".NET => dotnet",
"( => map_lp",
") => map_rp",
"& => and",
"# => hash",
"+ => plus"
]
}
}
},
"number_of_replicas": "1",
"uuid": "LNf2frW1S8WmHSOJWVrvLA",
"version": {
"created": "5030399"
}
}
}
}
}
当我使用如下查询时
{
"query": {
"bool": {
"must": [
{
"match": {
"Skill": "c++"
}
}
]
}
},
"size": 10
我正在学习所有 'c'
的技能假设应用了分析器,当我使用如下查询时
{
"query": {
"bool": {
"must": [
{
"match": {
"Skill.analyzed": "c++"
}
}
]
}
},
"size": 10
}
我得到空输出。我是否正确包含了分析器,或者我的查询有误?
我只是简化了您的问题,为了简单起见,我们假设您只有 1 个名为 title
的字段,其中包含不同的语言,例如 c
、c++
、c#
f#
。
此 title
字段的索引设置和映射。
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"my_analyzer": {
"filter": [
"lowercase"
],
"char_filter": [
"code_mapping"
],
"tokenizer": "standard" --> notice `standard`
}
},
"char_filter": {
"code_mapping": {
"type": "mapping",
"mappings": [
"++ => plusplus",
"c# => csharp",
"C# => csharp",
"F# => fsharp",
"f# => fsharp",
".net => dotnet",
".Net => dotnet",
".NET => dotnet",
"( => map_lp",
") => map_rp",
"& => and",
"# => hash",
"+ => plus"
]
}
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "my_analyzer" --> using custom analyzer created in settings
}
}
}
}
索引一些文档
POST /_doc/{doc-is}
{
"title": "c#"
}
{
"title": "c++"
}
{
"title": "c"
}
{
"title": "F#"
}
搜索查询,在您的问题中提供给您,它获取包含 c
.
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "c++"
}
}
]
}
},
"size": 10
}
现在对我来说,它只返回仅包含 c++
的文档,如我的搜索 API 结果所示。
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.9808292,
"hits": [
{
"_index": "cplus",
"_type": "_doc",
"_id": "1",
"_score": 0.9808292,
"_source": {
"title": "c++"
}
}
]
}