Elasticsearch - 如何使用关键字字段订购桶
Elasticsearch - How to order buckets using keyword field
我遇到了一个问题,因为我需要为此使用关键字字段对存储桶进行排序,我尝试了两种方法。
- 我一直在尝试从热门聚合中对聚合(桶)的结果进行排序。我的 top_hits 包含一个元素,即用户名
"user_data": {
"top_hits": {
"_source": {
"includes": ["username"]
},
"size": 1
}
},
为了对我尝试使用桶排序的桶进行排序,桶排序是这样的
sorting": {
"bucket_sort": {
"sort": [
{
"user_data>username": { ----> This is the error
"order": "desc"
}
}
],
"from": 0,
"size": 25
}
}
但是我收到一个语法错误,基本上是存储桶路径错误。
- 我用来完成排序的另一种方法是在用户名上添加另一个聚合以获得最大值。像这样
"to_sort" : {
"max": {
"field": "username"
}
}
并使用下面的bucket_sort
"sorting": {
"bucket_sort": {
"sort": [
{
"to_sort": {
"order": "desc"
}
}
],
"from": 0,
"size": 25
}
}
但基本上我不能使用关键字字段来使用最大聚合。
有没有办法使用用户名对我的存储桶进行排序,用户名是关键字字段?
我聚合的父级是
"aggs": {
"CountryId": {
"terms": {
"field": "countryId",
"size": 10000
}
用户名的值在每个bucket之间是不同的
桶的结果是这样的
"buckets" : [
{
"key" : "11111",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "cccccc"
}
}
]
}
}
},
{
"key" : "33333",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "bbbbb"
}
}
]
}
}
},
{
"key" : "22222",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "aaaaa"
}
}
]
}
}
}
]
下面的桶结果是我想要的
"buckets" : [
{
"key" : "22222",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "aaaaa"
}
}
]
}
}
},
{
"key" : "33333",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "bbbbb"
}
}
]
}
}
},
{
"key" : "11111",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "ccccc"
}
}
]
}
}
}
]
如何查看存储桶是按用户名排序的。
我遇到了类似的问题,但在互联网上没有找到任何答案。所以我试着建立自己的,花了我将近一周的时间:/。由于对字符串的有序哈希码生成的限制,它不会总是有效,因此您将不得不使用自己的 charset
和您认为足以排序的字符串上第一个字符的长度(6我),做一些测试,因为你只想使用 long
类型的正间隔,否则它根本不起作用(由于我的字符集长度,我可以达到 13)。基本上,我基于从 here 中手动找到 top_hits
并使用它来计算我想要的关键字的有序哈希码,使用 scripted_metric
为 bucket_sort
构建我的指标。
下面是我的查询,我在其中按 sso.name
关键字对用户的最后一次会话热门点击进行排序,您应该或多或少容易根据您的问题进行调整。
{
"size": 0,
"timeout": "60s",
"query": {
"bool": {
"must": [
{
"exists": {
"field": "user_id"
}
}
]
}
},
"aggregations": {
"by_user": {
"terms": {
"field": "user_id",
"size": 10000,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"my_top_hits_sso_ordered_hash": {
"scripted_metric": {
"init_script": "state.timestamp_latest = 0L; state.last_sso_ordered_hash = 0L",
"map_script": """
def current_date = doc['login_timestamp'].getValue().toInstant().toEpochMilli();
if (current_date > state.timestamp_latest) {
state.timestamp_latest = current_date;
state.last_sso_ordered_hash = 0L;
if(doc['sso.name'].size()>0) {
String charset = "abcdefghijklmnopqrstuvwxyz";
String ssoName = doc['sso.name'].value;
int length = charset.length();
for(int i = 0; i<Math.min(ssoName.length(), 6); i++) {
state.last_sso_ordered_hash = state.last_sso_ordered_hash*length + charset.indexOf(String.valueOf(ssoName.charAt(i))) + 1;
}
}
}
""",
"combine_script":"return state",
"reduce_script": """
def last_sso_ordered_hash = '';
def timestamp_latest = 0L;
for (s in states) {
if (s.timestamp_latest > (timestamp_latest)) {
timestamp_latest = s.timestamp_latest; last_sso_ordered_hash = s.last_sso_ordered_hash;
}
}
return last_sso_ordered_hash;
"""
}
},
"user_last_session": {
"top_hits": {
"from": 0,
"size": 1,
"sort": [
{
"login_timestamp": {
"order": "desc"
}
}
]
}
},
"pagination": {
"bucket_sort": {
"sort": [
{
"my_top_hits_sso_ordered_hash.value": {
"order": "desc"
}
}
],
"from": 0,
"size": 100
}
}
}
}
}
}
我遇到了一个问题,因为我需要为此使用关键字字段对存储桶进行排序,我尝试了两种方法。
- 我一直在尝试从热门聚合中对聚合(桶)的结果进行排序。我的 top_hits 包含一个元素,即用户名
"user_data": {
"top_hits": {
"_source": {
"includes": ["username"]
},
"size": 1
}
},
为了对我尝试使用桶排序的桶进行排序,桶排序是这样的
sorting": {
"bucket_sort": {
"sort": [
{
"user_data>username": { ----> This is the error
"order": "desc"
}
}
],
"from": 0,
"size": 25
}
}
但是我收到一个语法错误,基本上是存储桶路径错误。
- 我用来完成排序的另一种方法是在用户名上添加另一个聚合以获得最大值。像这样
"to_sort" : {
"max": {
"field": "username"
}
}
并使用下面的bucket_sort
"sorting": {
"bucket_sort": {
"sort": [
{
"to_sort": {
"order": "desc"
}
}
],
"from": 0,
"size": 25
}
}
但基本上我不能使用关键字字段来使用最大聚合。 有没有办法使用用户名对我的存储桶进行排序,用户名是关键字字段?
我聚合的父级是
"aggs": {
"CountryId": {
"terms": {
"field": "countryId",
"size": 10000
}
用户名的值在每个bucket之间是不同的
桶的结果是这样的
"buckets" : [
{
"key" : "11111",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "cccccc"
}
}
]
}
}
},
{
"key" : "33333",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "bbbbb"
}
}
]
}
}
},
{
"key" : "22222",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "aaaaa"
}
}
]
}
}
}
]
下面的桶结果是我想要的
"buckets" : [
{
"key" : "22222",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "aaaaa"
}
}
]
}
}
},
{
"key" : "33333",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "bbbbb"
}
}
]
}
}
},
{
"key" : "11111",
"doc_count" : 17,
"user_data" : {
"hits" : {
"total" : 10,
"max_score" : 11,
"hits" : [
{
"_index" : "index_name",
"_type" : "index_name",
"_id" : "101010",
"_score" : 0.0,
"_source" : {
"username" : "ccccc"
}
}
]
}
}
}
]
如何查看存储桶是按用户名排序的。
我遇到了类似的问题,但在互联网上没有找到任何答案。所以我试着建立自己的,花了我将近一周的时间:/。由于对字符串的有序哈希码生成的限制,它不会总是有效,因此您将不得不使用自己的 charset
和您认为足以排序的字符串上第一个字符的长度(6我),做一些测试,因为你只想使用 long
类型的正间隔,否则它根本不起作用(由于我的字符集长度,我可以达到 13)。基本上,我基于从 here 中手动找到 top_hits
并使用它来计算我想要的关键字的有序哈希码,使用 scripted_metric
为 bucket_sort
构建我的指标。
下面是我的查询,我在其中按 sso.name
关键字对用户的最后一次会话热门点击进行排序,您应该或多或少容易根据您的问题进行调整。
{
"size": 0,
"timeout": "60s",
"query": {
"bool": {
"must": [
{
"exists": {
"field": "user_id"
}
}
]
}
},
"aggregations": {
"by_user": {
"terms": {
"field": "user_id",
"size": 10000,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"my_top_hits_sso_ordered_hash": {
"scripted_metric": {
"init_script": "state.timestamp_latest = 0L; state.last_sso_ordered_hash = 0L",
"map_script": """
def current_date = doc['login_timestamp'].getValue().toInstant().toEpochMilli();
if (current_date > state.timestamp_latest) {
state.timestamp_latest = current_date;
state.last_sso_ordered_hash = 0L;
if(doc['sso.name'].size()>0) {
String charset = "abcdefghijklmnopqrstuvwxyz";
String ssoName = doc['sso.name'].value;
int length = charset.length();
for(int i = 0; i<Math.min(ssoName.length(), 6); i++) {
state.last_sso_ordered_hash = state.last_sso_ordered_hash*length + charset.indexOf(String.valueOf(ssoName.charAt(i))) + 1;
}
}
}
""",
"combine_script":"return state",
"reduce_script": """
def last_sso_ordered_hash = '';
def timestamp_latest = 0L;
for (s in states) {
if (s.timestamp_latest > (timestamp_latest)) {
timestamp_latest = s.timestamp_latest; last_sso_ordered_hash = s.last_sso_ordered_hash;
}
}
return last_sso_ordered_hash;
"""
}
},
"user_last_session": {
"top_hits": {
"from": 0,
"size": 1,
"sort": [
{
"login_timestamp": {
"order": "desc"
}
}
]
}
},
"pagination": {
"bucket_sort": {
"sort": [
{
"my_top_hits_sso_ordered_hash.value": {
"order": "desc"
}
}
],
"from": 0,
"size": 100
}
}
}
}
}
}