如何在 Elasticsearch 中聚合略有不同的数据?
How do I aggregate slightly different data in Elasticsearch?
有一个请求,您可以使用该请求计算到端点的请求持续时间的百分位数 /api/v1/blabla
POST /filebeat-nginx-*/_search
{
"aggs": {
"hosts": {
"terms": {
"field": "host.name",
"size": 1000
},
"aggs": {
"url": {
"terms": {
"field": "nginx.access.url",
"size": 1000
},
"aggs": {
"time_duration_percentiles": {
"percentiles": {
"field": "nginx.access.time_duration",
"percents": [
50,
90
],
"keyed": true
}
}
}
}
}
}
},
"size": 0,
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{
"prefix": {
"nginx.access.url": "/api/v1/blabla"
}
}
]
}
},
{
"range": {
"@timestamp": {
"gte": "now-10m",
"lte": "now"
}
}
}
]
}
}
}
有些参数也传递给此端点,例如 /api/v1/blabla?Lang=en&type=active 或 /api/v1/blabla/?Lang=en&type=istory等
因此,答案显示了每个此类“单独”端点的百分位数:
{
"key" : "/api/v1/blabla?lang=ru",
"doc_count" : 423,
"time_duration_percentiles" : {
"values" : {
"50.0" : 0.21199999749660492,
"90.0" : 0.29839999079704277
}
}
},
{
"key" : "/api/v1/blabla?lang=en&type=active",
"doc_count" : 31,
"time_duration_percentiles" : {
"values" : {
"50.0" : 0.21699999272823334,
"90.0" : 0.2510000020265579
}
}
},
{
"key" : "/api/v1/blabla?lang=en",
"doc_count" : 4,
"time_duration_percentiles" : {
"values" : {
"50.0" : 0.22700000554323196,
"90.0" : 0.24899999797344208
}
}
}
请告诉我是否有可能以某种方式将相似的端点聚合成一个 /api/v1/blabla 并获得一般百分位数?
像这样:
{
"key" : "/api/v1/blabla",
"doc_count" : 4,
"time_duration_percentiles" : {
"values" : {
"50.0" : 0.22700000554323196,
"90.0" : 0.24899999797344208
}
}
}
您可以尝试拆分 nginx.access.url
in a script 但请记住,它可能会很慢:
{
"aggs": {
"hosts": {
"terms": {
"field": "host.name",
"size": 1000
},
"aggs": {
"url": {
"terms": {
"script": {
"source": "/\?/.split(doc['nginx.access.url'].value)[0]" <--- here
},
"size": 1000
},
"aggs": {
"time_duration_percentiles": {
"percentiles": {
"field": "nginx.access.time_duration",
"percents": [
50,
90
],
"keyed": true
}
}
}
}
}
}
},
...
}
顺便说一句,提取 URI 主机名、路径、查询字符串等是一种很好的做法。在 索引文档之前。您可以通过 URI parts pipeline 和其他机制来做到这一点。
感谢乔的建议。
我做出这样的决定:
"aggs": {
"uri": {
"terms": {
"script": {
"source": "def uri = /(\/[^\?]+)\?.+/.matcher(doc['nginx.access.url'].value);
if (uri.matches()) {
return uri.group(1)
} else {
return 'no_match'
}"
}
}
}
}
有一个请求,您可以使用该请求计算到端点的请求持续时间的百分位数 /api/v1/blabla
POST /filebeat-nginx-*/_search
{
"aggs": {
"hosts": {
"terms": {
"field": "host.name",
"size": 1000
},
"aggs": {
"url": {
"terms": {
"field": "nginx.access.url",
"size": 1000
},
"aggs": {
"time_duration_percentiles": {
"percentiles": {
"field": "nginx.access.time_duration",
"percents": [
50,
90
],
"keyed": true
}
}
}
}
}
}
},
"size": 0,
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{
"prefix": {
"nginx.access.url": "/api/v1/blabla"
}
}
]
}
},
{
"range": {
"@timestamp": {
"gte": "now-10m",
"lte": "now"
}
}
}
]
}
}
}
有些参数也传递给此端点,例如 /api/v1/blabla?Lang=en&type=active 或 /api/v1/blabla/?Lang=en&type=istory等 因此,答案显示了每个此类“单独”端点的百分位数:
{
"key" : "/api/v1/blabla?lang=ru",
"doc_count" : 423,
"time_duration_percentiles" : {
"values" : {
"50.0" : 0.21199999749660492,
"90.0" : 0.29839999079704277
}
}
},
{
"key" : "/api/v1/blabla?lang=en&type=active",
"doc_count" : 31,
"time_duration_percentiles" : {
"values" : {
"50.0" : 0.21699999272823334,
"90.0" : 0.2510000020265579
}
}
},
{
"key" : "/api/v1/blabla?lang=en",
"doc_count" : 4,
"time_duration_percentiles" : {
"values" : {
"50.0" : 0.22700000554323196,
"90.0" : 0.24899999797344208
}
}
}
请告诉我是否有可能以某种方式将相似的端点聚合成一个 /api/v1/blabla 并获得一般百分位数?
像这样:
{
"key" : "/api/v1/blabla",
"doc_count" : 4,
"time_duration_percentiles" : {
"values" : {
"50.0" : 0.22700000554323196,
"90.0" : 0.24899999797344208
}
}
}
您可以尝试拆分 nginx.access.url
in a script 但请记住,它可能会很慢:
{
"aggs": {
"hosts": {
"terms": {
"field": "host.name",
"size": 1000
},
"aggs": {
"url": {
"terms": {
"script": {
"source": "/\?/.split(doc['nginx.access.url'].value)[0]" <--- here
},
"size": 1000
},
"aggs": {
"time_duration_percentiles": {
"percentiles": {
"field": "nginx.access.time_duration",
"percents": [
50,
90
],
"keyed": true
}
}
}
}
}
}
},
...
}
顺便说一句,提取 URI 主机名、路径、查询字符串等是一种很好的做法。在 索引文档之前。您可以通过 URI parts pipeline 和其他机制来做到这一点。
感谢乔的建议。
我做出这样的决定:
"aggs": {
"uri": {
"terms": {
"script": {
"source": "def uri = /(\/[^\?]+)\?.+/.matcher(doc['nginx.access.url'].value);
if (uri.matches()) {
return uri.group(1)
} else {
return 'no_match'
}"
}
}
}
}