嵌套术语聚合,对 Sum 聚合进行排序
Nested Terms Aggregation with sort on Sum Aggregation
下面有嵌套class、
public class ServiceEntries
{
public string servicename{ get; set; }
public DateTime timestamp { get; set; }
public List<ProjectT> projects { get; set; }
}
public class ProjectT
{
public string project_engineer { get; set; }
public decimal project_money { get; set; }
}
我需要的是按 project_engineer 的术语进行术语聚合,并按 Sum 排序 project_money,
我尝试关注 https://nest.azurewebsites.net/nest/aggregations/nested.html 但不知何故未能成功,不确定将 .Term 聚合放在哪里。
请帮忙。
谢谢
这是一个使用 NEST 1.8.0 和 Elasticsearch 1.7.4 的例子。
首先,让我们设置一个 ConnectionSettings,以便使用它轻松查看请求和响应;我在 LINQPad 中为此示例工作,因此当我执行
时输出会出现在底部窗格中
var settings = new ConnectionSettings(new Uri("http://localhost:9200"))
.ExposeRawResponse(true)
.PrettyJson()
.SetDefaultIndex("entries")
// Let's map the servicename as the id for ServiceEntries types
.MapIdPropertyFor<ServiceEntries>(entry => entry.servicename)
// This is good for the purposes of this demo, but we don't want
// to be doing logging **all** requests in production
.SetConnectionStatusHandler(r =>
{
// log out the requests
if (r.Request != null)
{
Console.WriteLine("{0} {1} \n{2}\n", r.RequestMethod.ToUpperInvariant(), r.RequestUrl,
Encoding.UTF8.GetString(r.Request));
}
else
{
Console.WriteLine("{0} {1}\n", r.RequestMethod.ToUpperInvariant(), r.RequestUrl);
}
if (r.ResponseRaw != null)
{
Console.WriteLine("Status: {0}\n{1}\n\n{2}\n", r.HttpStatusCode, Encoding.UTF8.GetString(r.ResponseRaw), new String('-', 30));
}
else
{
Console.WriteLine("Status: {0}\n\n{1}\n", r.HttpStatusCode, new String('-', 30));
}
});
var client = new ElasticClient(settings);
现在我们有一个客户端可以使用,我们首先需要创建一个索引,其中包含适合手头问题的映射
client.CreateIndex("entries", c => c
.AddMapping<ServiceEntries>(m => m
.Properties(p => p
.String(s => s
.Name(n => n.servicename)
// No need to analyze the string id
// on the POCO
.Index(FieldIndexOption.NotAnalyzed)
)
.NestedObject<ProjectT>(n => n
.Name(nn => nn.projects.First())
.MapFromAttributes()
.Properties(pp => pp
.String(s => s
.Name(nn => nn.project_engineer)
// map engineer names as not analyzed so
// we have the raw values to aggregate on.
// If you also need to search on these, take a
// look at mapping as a multi_field
.Index(FieldIndexOption.NotAnalyzed)
)
)
)
)
)
);
有了索引,让我们生成一些数据来使用
var engineerNames = new[] { "Paul", "John", "Ringo", "George" };
var entries = Enumerable.Range(1, 100).Select(i =>
{
var entry = new ServiceEntries()
{
servicename = i.ToString(),
timestamp = DateTime.UtcNow.Date.AddDays(i),
projects = new List<UserQuery.ProjectT>
{
new UserQuery.ProjectT
{
project_engineer = engineerNames[i%4],
project_money = i
}
}
};
return entry;
});
// bulk index all ServiceEntries and refresh the index after indexing
// so we can search on it immediately
client.Bulk(b => b.IndexMany(entries, (bd, d) => bd.Document(d)).Refresh());
现在是有趣的部分! 要对嵌套类型执行聚合,我们首先需要设置一个 Nested Aggregation with a path that points to the nested type, then nest the Aggregations we want to perform on the nested type. Finally, we also want to sort on the result of a sub aggregation .
var response = client.Search<ServiceEntries>(s => s
.Size(0)
.Aggregations(a => a
// set up a nested aggregation
.Nested("project",n => n
// the path to the nested type
.Path(entry => entry.projects)
.Aggregations(aa => aa
// aggregate by project engineer
.Terms("project_engineers", t => t
.Field(entry => entry.projects.First().project_engineer)
// order project engineer names by the descending sum of project money
.OrderDescending("project_money.value")
.Aggregations(aaa => aaa
// sum project money for each project engineer
.Sum("project_money", sa => sa
.Field(entry => entry.projects.First().project_money)
)
)
)
)
)
)
);
这会生成以下内容 json
POST http://localhost:9200/entries/serviceentries/_search?pretty=true
{
"size": 0,
"aggs": {
"project": {
"nested": {
"path": "projects"
},
"aggs": {
"project_engineers": {
"terms": {
"field": "projects.project_engineer",
"order": {
"project_money.value": "desc"
}
},
"aggs": {
"project_money": {
"sum": {
"field": "projects.project_money"
}
}
}
}
}
}
}
}
并产生以下结果
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 100,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"project" : {
"doc_count" : 100,
"project_engineers" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ {
"key" : "Paul",
"doc_count" : 25,
"project_money" : {
"value" : 1300.0
}
}, {
"key" : "George",
"doc_count" : 25,
"project_money" : {
"value" : 1275.0
}
}, {
"key" : "Ringo",
"doc_count" : 25,
"project_money" : {
"value" : 1250.0
}
}, {
"key" : "John",
"doc_count" : 25,
"project_money" : {
"value" : 1225.0
}
} ]
}
}
}
}
下面有嵌套class、
public class ServiceEntries
{
public string servicename{ get; set; }
public DateTime timestamp { get; set; }
public List<ProjectT> projects { get; set; }
}
public class ProjectT
{
public string project_engineer { get; set; }
public decimal project_money { get; set; }
}
我需要的是按 project_engineer 的术语进行术语聚合,并按 Sum 排序 project_money,
我尝试关注 https://nest.azurewebsites.net/nest/aggregations/nested.html 但不知何故未能成功,不确定将 .Term 聚合放在哪里。
请帮忙。
谢谢
这是一个使用 NEST 1.8.0 和 Elasticsearch 1.7.4 的例子。
首先,让我们设置一个 ConnectionSettings,以便使用它轻松查看请求和响应;我在 LINQPad 中为此示例工作,因此当我执行
时输出会出现在底部窗格中var settings = new ConnectionSettings(new Uri("http://localhost:9200"))
.ExposeRawResponse(true)
.PrettyJson()
.SetDefaultIndex("entries")
// Let's map the servicename as the id for ServiceEntries types
.MapIdPropertyFor<ServiceEntries>(entry => entry.servicename)
// This is good for the purposes of this demo, but we don't want
// to be doing logging **all** requests in production
.SetConnectionStatusHandler(r =>
{
// log out the requests
if (r.Request != null)
{
Console.WriteLine("{0} {1} \n{2}\n", r.RequestMethod.ToUpperInvariant(), r.RequestUrl,
Encoding.UTF8.GetString(r.Request));
}
else
{
Console.WriteLine("{0} {1}\n", r.RequestMethod.ToUpperInvariant(), r.RequestUrl);
}
if (r.ResponseRaw != null)
{
Console.WriteLine("Status: {0}\n{1}\n\n{2}\n", r.HttpStatusCode, Encoding.UTF8.GetString(r.ResponseRaw), new String('-', 30));
}
else
{
Console.WriteLine("Status: {0}\n\n{1}\n", r.HttpStatusCode, new String('-', 30));
}
});
var client = new ElasticClient(settings);
现在我们有一个客户端可以使用,我们首先需要创建一个索引,其中包含适合手头问题的映射
client.CreateIndex("entries", c => c
.AddMapping<ServiceEntries>(m => m
.Properties(p => p
.String(s => s
.Name(n => n.servicename)
// No need to analyze the string id
// on the POCO
.Index(FieldIndexOption.NotAnalyzed)
)
.NestedObject<ProjectT>(n => n
.Name(nn => nn.projects.First())
.MapFromAttributes()
.Properties(pp => pp
.String(s => s
.Name(nn => nn.project_engineer)
// map engineer names as not analyzed so
// we have the raw values to aggregate on.
// If you also need to search on these, take a
// look at mapping as a multi_field
.Index(FieldIndexOption.NotAnalyzed)
)
)
)
)
)
);
有了索引,让我们生成一些数据来使用
var engineerNames = new[] { "Paul", "John", "Ringo", "George" };
var entries = Enumerable.Range(1, 100).Select(i =>
{
var entry = new ServiceEntries()
{
servicename = i.ToString(),
timestamp = DateTime.UtcNow.Date.AddDays(i),
projects = new List<UserQuery.ProjectT>
{
new UserQuery.ProjectT
{
project_engineer = engineerNames[i%4],
project_money = i
}
}
};
return entry;
});
// bulk index all ServiceEntries and refresh the index after indexing
// so we can search on it immediately
client.Bulk(b => b.IndexMany(entries, (bd, d) => bd.Document(d)).Refresh());
现在是有趣的部分! 要对嵌套类型执行聚合,我们首先需要设置一个 Nested Aggregation with a path that points to the nested type, then nest the Aggregations we want to perform on the nested type. Finally, we also want to sort on the result of a sub aggregation .
var response = client.Search<ServiceEntries>(s => s
.Size(0)
.Aggregations(a => a
// set up a nested aggregation
.Nested("project",n => n
// the path to the nested type
.Path(entry => entry.projects)
.Aggregations(aa => aa
// aggregate by project engineer
.Terms("project_engineers", t => t
.Field(entry => entry.projects.First().project_engineer)
// order project engineer names by the descending sum of project money
.OrderDescending("project_money.value")
.Aggregations(aaa => aaa
// sum project money for each project engineer
.Sum("project_money", sa => sa
.Field(entry => entry.projects.First().project_money)
)
)
)
)
)
)
);
这会生成以下内容 json
POST http://localhost:9200/entries/serviceentries/_search?pretty=true
{
"size": 0,
"aggs": {
"project": {
"nested": {
"path": "projects"
},
"aggs": {
"project_engineers": {
"terms": {
"field": "projects.project_engineer",
"order": {
"project_money.value": "desc"
}
},
"aggs": {
"project_money": {
"sum": {
"field": "projects.project_money"
}
}
}
}
}
}
}
}
并产生以下结果
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 100,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"project" : {
"doc_count" : 100,
"project_engineers" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ {
"key" : "Paul",
"doc_count" : 25,
"project_money" : {
"value" : 1300.0
}
}, {
"key" : "George",
"doc_count" : 25,
"project_money" : {
"value" : 1275.0
}
}, {
"key" : "Ringo",
"doc_count" : 25,
"project_money" : {
"value" : 1250.0
}
}, {
"key" : "John",
"doc_count" : 25,
"project_money" : {
"value" : 1225.0
}
} ]
}
}
}
}