如何使用 DSL 查询丢弃 ElasticSearch 中的重复值?
How to discard the Duplicate values in ElasticSearch using DSL Query?
这里我试图根据查询客户获取 attribute_name 这里的问题是我想丢弃的属性名称中有很多重复值,有人可以帮我解决这个问题
{
"_source": [
"attribute_name"
],
"size": 500,
"query": {
"multi_match": {
"query": "CUSTOMER",
"fields": [
"hierarchy_name",
"attribute_name"
]
}
}
}
假设这是我的输出,在这里我想丢弃重复的 attribute_name
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "tD6WDnkBQTXQIneq8Ypr",
"_score": 2.5454113,
"_source": {
"attribute_name": "CUSTOMER"
}
},
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "3j6WDnkBQTXQIneq8Yps",
"_score": 2.5454113,
"_source": {
"attribute_name": "CUSTOMER"
}
},
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "nT6WDnkBQTXQIneqyonu",
"_score": 1.8101583,
"_source": {
"attribute_name": "REGION"
}
},
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "6D6WDnkBQTXQIneq8Yps",
"_score": 1.8101583,
"_source": {
"attribute_name": "REGION"
}
},
我的输出应该是这样的..
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "3j6WDnkBQTXQIneq8Yps",
"_score": 2.5454113,
"_source": {
"attribute_name": "CUSTOMER"
}
},
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "nT6WDnkBQTXQIneqyonu",
"_score": 1.8101583,
"_source": {
"attribute_name": "REGION"
}
},
您可以使用 collapse
参数,根据字段值从搜索结果中删除重复项
添加包含索引数据、映射、搜索查询和搜索结果的工作示例
索引映射:
{
"mappings": {
"properties": {
"attribute_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
索引数据:
{
"attribute_name": "CUSTOMER-ALL"
}
{
"attribute_name": "CUSTOMER-ALL"
}
{
"attribute_name": "CUSTOMER"
}
{
"attribute_name": "CUSTOMER"
}
搜索查询:
{
"query": {
"multi_match": {
"query": "CUSTOMER",
"fields": [
"attribute_name"
]
}
},
"collapse": {
"field": "attribute_name.keyword"
}
}
搜索结果:
"hits": [
{
"_index": "67260491",
"_type": "_doc",
"_id": "1",
"_score": 0.12199639,
"_source": {
"attribute_name": "CUSTOMER"
},
"fields": {
"attribute_name.keyword": [
"CUSTOMER"
]
}
},
{
"_index": "67260491",
"_type": "_doc",
"_id": "3",
"_score": 0.09271726,
"_source": {
"attribute_name": "CUSTOMER-ALL"
},
"fields": {
"attribute_name.keyword": [
"CUSTOMER-ALL"
]
}
}
]
更新 1:
如果你只是想删除重复的数据,你可以运行下面的查询
{
"collapse": {
"field": "attribute_name.keyword"
}
}
搜索结果将是
"hits": [
{
"_index": "67276433",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"attribute_name": "CUSTOMER"
},
"fields": {
"attribute_name.keyword": [
"CUSTOMER"
]
}
},
{
"_index": "67276433",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"attribute_name": "REGION"
},
"fields": {
"attribute_name.keyword": [
"REGION"
]
}
}
]
这里我试图根据查询客户获取 attribute_name 这里的问题是我想丢弃的属性名称中有很多重复值,有人可以帮我解决这个问题
{
"_source": [
"attribute_name"
],
"size": 500,
"query": {
"multi_match": {
"query": "CUSTOMER",
"fields": [
"hierarchy_name",
"attribute_name"
]
}
}
}
假设这是我的输出,在这里我想丢弃重复的 attribute_name
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "tD6WDnkBQTXQIneq8Ypr",
"_score": 2.5454113,
"_source": {
"attribute_name": "CUSTOMER"
}
},
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "3j6WDnkBQTXQIneq8Yps",
"_score": 2.5454113,
"_source": {
"attribute_name": "CUSTOMER"
}
},
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "nT6WDnkBQTXQIneqyonu",
"_score": 1.8101583,
"_source": {
"attribute_name": "REGION"
}
},
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "6D6WDnkBQTXQIneq8Yps",
"_score": 1.8101583,
"_source": {
"attribute_name": "REGION"
}
},
我的输出应该是这样的..
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "3j6WDnkBQTXQIneq8Yps",
"_score": 2.5454113,
"_source": {
"attribute_name": "CUSTOMER"
}
},
{
"_index": "planlytx_records",
"_type": "_doc",
"_id": "nT6WDnkBQTXQIneqyonu",
"_score": 1.8101583,
"_source": {
"attribute_name": "REGION"
}
},
您可以使用 collapse
参数,根据字段值从搜索结果中删除重复项
添加包含索引数据、映射、搜索查询和搜索结果的工作示例
索引映射:
{
"mappings": {
"properties": {
"attribute_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
索引数据:
{
"attribute_name": "CUSTOMER-ALL"
}
{
"attribute_name": "CUSTOMER-ALL"
}
{
"attribute_name": "CUSTOMER"
}
{
"attribute_name": "CUSTOMER"
}
搜索查询:
{
"query": {
"multi_match": {
"query": "CUSTOMER",
"fields": [
"attribute_name"
]
}
},
"collapse": {
"field": "attribute_name.keyword"
}
}
搜索结果:
"hits": [
{
"_index": "67260491",
"_type": "_doc",
"_id": "1",
"_score": 0.12199639,
"_source": {
"attribute_name": "CUSTOMER"
},
"fields": {
"attribute_name.keyword": [
"CUSTOMER"
]
}
},
{
"_index": "67260491",
"_type": "_doc",
"_id": "3",
"_score": 0.09271726,
"_source": {
"attribute_name": "CUSTOMER-ALL"
},
"fields": {
"attribute_name.keyword": [
"CUSTOMER-ALL"
]
}
}
]
更新 1:
如果你只是想删除重复的数据,你可以运行下面的查询
{
"collapse": {
"field": "attribute_name.keyword"
}
}
搜索结果将是
"hits": [
{
"_index": "67276433",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"attribute_name": "CUSTOMER"
},
"fields": {
"attribute_name.keyword": [
"CUSTOMER"
]
}
},
{
"_index": "67276433",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"attribute_name": "REGION"
},
"fields": {
"attribute_name.keyword": [
"REGION"
]
}
}
]