如果在弹性搜索中错误地输入关键字,如何匹配相关数据
how to match a related data if incorrectly texted a keyword in elastic search
我有一份文件,标题为“努力工作与成功”。我需要搜索此文档。如果我输入“Hardwork”(没有空格),它不会返回任何值。但是如果我输入“hard work”,它就会返回文档。
这是我用过的查询:
const search = qObject.search;
const payload = {
from: skip,
size: limit,
_source: [
"id",
"title",
"thumbnailUrl",
"youtubeUrl",
"speaker",
"standards",
"topics",
"schoolDetails",
"uploadTime",
"schoolName",
"description",
"studentDetails",
"studentId"
],
query: {
bool: {
must: {
multi_match: {
fields: [
"title^2",
"standards.standard^2",
"speaker^2",
"schoolDetails.schoolName^2",
"hashtags^2",
"topics.topic^2",
"studentDetails.studentName^2",
],
query: search,
fuzziness: "AUTO",
},
},
},
},
};
如果我搜索标题“努力工作”(包含 space)
然后它 returns 数据是这样的:
"searchResults": [
{
"_id": "92",
"_score": 19.04531,
"_source": {
"standards": {
"standard": "3",
"categoryType": "STANDARD",
"categoryId": "S3"
},
"schoolDetails": {
"categoryType": "SCHOOL",
"schoolId": "TPS123",
"schoolType": "PUBLIC",
"logo": "91748922mn8bo9krcx71.png",
"schoolName": "Carmel CMI Public School"
},
"studentDetails": {
"studentId": 270,
"studentDp": "164646972124244.jpg",
"studentName": "Nelvin",
"about": "good student"
},
"topics": {
"categoryType": "TOPIC",
"topic": "Motivation",
"categoryId": "MY"
},
"youtubeUrl": "https://www.youtube.com/watch?v=wermQ",
"speaker": "Anna Maria Siby",
"description": "How hardwork leads to success - motivational talk by Anna",
"id": 92,
"uploadTime": "2022-03-17T10:59:59.400Z",
"title": "Hard work & Success",
}
},
]
如果我搜索关键字“Hardwork”(无空格),它不会检测到此数据。我需要在其中制作一个 space 或者我需要将相关数据与搜索关键字匹配。有什么解决办法吗,你能帮我解决这个问题吗?
我用 shingle 分析器做了一个例子。
映射:
{
"settings": {
"analysis": {
"filter": {
"shingle_filter": {
"type": "shingle",
"max_shingle_size": 4,
"min_shingle_size": 2,
"output_unigrams": "true",
"token_separator": ""
}
},
"analyzer": {
"shingle_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"shingle_filter"
]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "shingle_analyzer"
}
}
}
}
现在我用你的术语测试了它。请注意,令牌“hardwork”已生成,但其他令牌也已生成,这对您来说可能是个问题。
GET idx-separator-words/_analyze
{
"analyzer": "shingle_analyzer",
"text": ["Hard work & Success"]
}
结果:
{
"tokens" : [
{
"token" : "hard",
"start_offset" : 0,
"end_offset" : 4,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "hardwork",
"start_offset" : 0,
"end_offset" : 9,
"type" : "shingle",
"position" : 0,
"positionLength" : 2
},
{
"token" : "hardworksuccess",
"start_offset" : 0,
"end_offset" : 19,
"type" : "shingle",
"position" : 0,
"positionLength" : 3
},
{
"token" : "work",
"start_offset" : 5,
"end_offset" : 9,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "worksuccess",
"start_offset" : 5,
"end_offset" : 19,
"type" : "shingle",
"position" : 1,
"positionLength" : 2
},
{
"token" : "success",
"start_offset" : 12,
"end_offset" : 19,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}
我有一份文件,标题为“努力工作与成功”。我需要搜索此文档。如果我输入“Hardwork”(没有空格),它不会返回任何值。但是如果我输入“hard work”,它就会返回文档。
这是我用过的查询:
const search = qObject.search;
const payload = {
from: skip,
size: limit,
_source: [
"id",
"title",
"thumbnailUrl",
"youtubeUrl",
"speaker",
"standards",
"topics",
"schoolDetails",
"uploadTime",
"schoolName",
"description",
"studentDetails",
"studentId"
],
query: {
bool: {
must: {
multi_match: {
fields: [
"title^2",
"standards.standard^2",
"speaker^2",
"schoolDetails.schoolName^2",
"hashtags^2",
"topics.topic^2",
"studentDetails.studentName^2",
],
query: search,
fuzziness: "AUTO",
},
},
},
},
};
如果我搜索标题“努力工作”(包含 space) 然后它 returns 数据是这样的:
"searchResults": [
{
"_id": "92",
"_score": 19.04531,
"_source": {
"standards": {
"standard": "3",
"categoryType": "STANDARD",
"categoryId": "S3"
},
"schoolDetails": {
"categoryType": "SCHOOL",
"schoolId": "TPS123",
"schoolType": "PUBLIC",
"logo": "91748922mn8bo9krcx71.png",
"schoolName": "Carmel CMI Public School"
},
"studentDetails": {
"studentId": 270,
"studentDp": "164646972124244.jpg",
"studentName": "Nelvin",
"about": "good student"
},
"topics": {
"categoryType": "TOPIC",
"topic": "Motivation",
"categoryId": "MY"
},
"youtubeUrl": "https://www.youtube.com/watch?v=wermQ",
"speaker": "Anna Maria Siby",
"description": "How hardwork leads to success - motivational talk by Anna",
"id": 92,
"uploadTime": "2022-03-17T10:59:59.400Z",
"title": "Hard work & Success",
}
},
]
如果我搜索关键字“Hardwork”(无空格),它不会检测到此数据。我需要在其中制作一个 space 或者我需要将相关数据与搜索关键字匹配。有什么解决办法吗,你能帮我解决这个问题吗?
我用 shingle 分析器做了一个例子。
映射:
{
"settings": {
"analysis": {
"filter": {
"shingle_filter": {
"type": "shingle",
"max_shingle_size": 4,
"min_shingle_size": 2,
"output_unigrams": "true",
"token_separator": ""
}
},
"analyzer": {
"shingle_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"shingle_filter"
]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "shingle_analyzer"
}
}
}
}
现在我用你的术语测试了它。请注意,令牌“hardwork”已生成,但其他令牌也已生成,这对您来说可能是个问题。
GET idx-separator-words/_analyze
{
"analyzer": "shingle_analyzer",
"text": ["Hard work & Success"]
}
结果:
{
"tokens" : [
{
"token" : "hard",
"start_offset" : 0,
"end_offset" : 4,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "hardwork",
"start_offset" : 0,
"end_offset" : 9,
"type" : "shingle",
"position" : 0,
"positionLength" : 2
},
{
"token" : "hardworksuccess",
"start_offset" : 0,
"end_offset" : 19,
"type" : "shingle",
"position" : 0,
"positionLength" : 3
},
{
"token" : "work",
"start_offset" : 5,
"end_offset" : 9,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "worksuccess",
"start_offset" : 5,
"end_offset" : 19,
"type" : "shingle",
"position" : 1,
"positionLength" : 2
},
{
"token" : "success",
"start_offset" : 12,
"end_offset" : 19,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}