有条件地对数组中的元素进行分组和求和
Conditionally group and sum of the elements from an array
我想按 APPName
分组,我想找出特定日期的每个应用程序有多少 PrestoBarImpression
、PrestoKeyCountChange
、PrestoTileImpression
(只是总和订单计数)。
这样我就可以用这些信息生成报告。我需要每个应用程序的 PrestoTileImpression 订单数,PrestoBarImpression 订单数,PrestoTileClick 订单数。
下面是我的文档。
{
"ClientId": "XYZ123",
"location": {
"Name": "Hyderabad",
"Country": "India",
"Zip": "500084",
"Gps": {
"lat": "17.463607",
"lon": "78.344279"
}
},
"Network": {
"Operator": "Airtel",
"Type": "wifi",
"TowerID": "123",
"IP": "1.1.1.1"
},
"SessionTimeStamp": {
"Start": ISODate("2015-06-02T05:36:49.045 Z"),
"End": ISODate("2015-06-02T05:36:56.045 Z"),
"Duration": "7000"
},
"AppName": "WhatsApp",
"Text": "Key1 Key2 Key3 Key4",
"Actions": [{
"Type": "PrestoBarImpression",
"CampaignId": 1,
"keyword": "key1",
"prestoCount": 1,
"duration": 100,
"OrderCount": 1
}, {
"Type": "PrestoKeyCountChange",
"CampaignId": 1,
"keyword": "key1",
"prestoCount": 1,
"OrderCount": 2
}, {
"Type": "PrestoBarImpression",
"CampaignId": 2,
"keyword": "key2",
"prestoCount": 2,
"duration": 150,
"OrderCount": 3
}, {
"Type": "PrestoKeyCountChange",
"CampaignId": "2",
"keyword": "key2",
"prestoCount": 2,
"OrderCount": 4
}, {
"Type": "PrestoBarImpression",
"CampaignId": 1,
"keyword": "key3",
"prestoCount": 2,
"duration": 200,
"OrderCount": 5
}, {
"Type": "PrestoTileImpression",
"CampaignId": 1,
"duration": 200,
"OrderCount": 6
}, {
"Type": "PrestoTileImpression",
"AdditionalAction": "swipeRight",
"CampaignId": 2,
"duration": 200,
"OrderCount": 7
}, {
"Type": "PrestoTileClick",
"AdditionalAction": "swipeRight",
"CampaignId": 2,
"OrderCount": 8
}, {
"Type": "PrestoBarImpression",
"CampaignId": 2,
"keyword": "key4",
"prestoCount": 2,
"duration": 150,
"OrderCount": 9
}]
}
我通过使用@Viswas 响应获得了以下输出我进行了查询。
查询
[
{
"$match":{
"SessionTimeStamp.Start":{
"$gte": ISODate("2015-06-01T18:30:00.000 Z"),
"$lte": ISODate("2015-06-04T18:29:59.000 Z")
}
}
},
{
"$unwind":"$Actions"
},
{
"$match":{
"Actions.Type":{
"$in":[
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]
}
}
},
{
"$group":{
"_id":{
"AppName":"$AppName",
"type":"$Actions.Type"
},
"total":{
"$sum":"$Actions.OrderCount"
}
}
},
{
"$sort":{
"total":1,
}
}
]
输出
{
"result":[
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoKeyCountChange"
},
"total":6
},
{
"_id":{
"AppName":"hike",
"type":"PrestoKeyCountChange"
},
"total":6
},
{
"_id":{
"AppName":"hike",
"type":"PrestoTileImpression"
},
"total":13
},
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoTileImpression"
},
"total":13
},
{
"_id":{
"AppName":"hike",
"type":"PrestoBarImpression"
},
"total":18
},
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoBarImpression"
},
"total":18
}
],
"ok":1.0000000000000000
}
我需要以下格式的输出
[
{
"AppName":"WhatsApp",
" PrestoTileImpression":13,
"PrestoKeyCountChange":6,
"PrestoBarImpression":18,
"count":"10 (This is how many times thee Application presents in document, because I need to find top 10 apps Need to sort the output by this count)"
},
{
"AppName":"Hike",
" PrestoTileImpression":13,
"PrestoKeyCountChange":6,
"PrestoBarImpression":18,
"count":"10 "
}
]
您应该使用 aggregation 来获得结果。
如果你想 ActionType
明智地 OrderCount
给定日期(特定) 那么你需要首先将 start
匹配到您的日期,然后根据 Action.Type
匹配 group
数据。
查询如下:
db.collection.aggregate({
$match: {
"SessionTimeStamp.Start": ISODate("2015-06-02T05:36:49.045Z")
}
}, {
$group: {
"_id": "AppName",
"Document": {
$push: {
"SessionTimeStamp": "$SessionTimeStamp",
"Actions": "$Actions",
"AppName": "$AppName"
}
}
}
}, {
$unwind: "$Document"
}, {
$unwind: "$Document.Actions"
}, {
$group: {
_id: "$Document.Actions.Type",
"OrderCount": {
$sum: "$Document.Actions.OrderCount"
},
"App": {
$first: "$Document.AppName"
}
}
}, {
$project: {
"_id": 0,
"OrderCount": 1,
"ActionType": "$_id",
"App": 1
}
})
在问题作者的评论后编辑:
Reference to Duplicate Question作者
请验证 appNames
的 count
的拼写,因为某些 appName
的拼写不同(count
、Count
)
您应该使用以下查询-
db.collection.aggregate({
$match: {
"SessionTimeStamp.Start": {
$gte: ISODate("2015-06-02T05:36:49.045Z")
},
"SessionTimeStamp.End": {
$lte: ISODate("2015-06-02T05:36:56.045Z")
}
}
}, {
$unwind: "$Actions"
}, {
$group: {
"_id": {
"AppName": "$AppName",
"Type": "$Actions.Type"
},
"count": {
"$sum": "$Actions.Count"
},
"appCount": {
$sum: 1
}
}
}, {
$project: {
"AppName": "$_id.AppName",
"Type": "$_id.Type",
"count": 1,
"appCount": 1,
"_id": 0
}
})
如果您仍想将动态值分配为键,那么您可以遍历光标 -
db.collection.aggregate({$match:{"SessionTimeStamp.Start":{$gte:ISODate("2015-06-02T05:36:49.045Z")},
"SessionTimeStamp.End":{$lte:ISODate("2015-06-02T05:36:56.045Z")}}},
{$unwind:"$Actions"},{$group:{"_id":{"AppName":"$AppName","Type":"$Actions.Type"},
"count":{"$sum":"$Actions.Count"},"appCount":{$sum:1}}},
{$project:{"AppName":"$_id.AppName","Type":"$_id.Type","count":1,
"appCount":1,"_id":0}}).forEach( function(myDoc){ var feeType = {};
feeType["AppName"] = myDoc.AppName; feeType[myDoc.Type]= myDoc.count;
feeType["appCount"] = myDoc.appCount; printjson (feeType);})
这真的是关于过滤数组内容以在总和中得到你想要的项目:
db.collection.aggregate([
// Filter documents with matching entries first
{ "$match": {
"Actions.Type": { "$in": [
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]}
}},
// Unwind the array entries
{ "$unwind": "$Actions" },
// Filter to only keep desired array entries
{ "$match": {
"Actions.Type": { "$in": [
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]}
}},
// Group by AppName and current day (finishing)
{ "$group": {
"_id": {
"AppName": "$AppName",
"day": {
"year": { "$year": "$SessionTimeStamp.End" },
"month": { "$month": "$SessionTimeStamp.End" },
"day": { "$dayOfMonth": "$SessionTimeStamp.End" }
},
"type": "$Actions.Type"
},
"total": { "$sum": "$Actions.OrderCount" }
}},
// Sort as however you require
{ "$sort": {
"_id.AppName": 1,
"_id.day": 1,
"_id.type": 1,
"total": -1
}}
])
或者,如果您想要每个文档的所有这些字段,则在现有组之后添加:
{ "$group": {
"_id": {
"AppName": "$_id.AppName",
"day": "$_id.day",
},
"PrestoBarImpression": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoBarImpression" ] },
"$total",
0
]
}},
"PrestoKeyCountChange": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoKeyCountChange" ] },
"$total",
0
]
}},
"PrestoTileImpression": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoTileImpression" ] },
"$total",
0
]
}}
}}
将 "AppName" 和 "day" 的每个字段的总数统计到单个文档中。
您可能希望将 "date range" 匹配添加到第一个 $match
管道,而不是将集合中的所有内容相加,并且在日期之间也这样做。
我想按 APPName
分组,我想找出特定日期的每个应用程序有多少 PrestoBarImpression
、PrestoKeyCountChange
、PrestoTileImpression
(只是总和订单计数)。
这样我就可以用这些信息生成报告。我需要每个应用程序的 PrestoTileImpression 订单数,PrestoBarImpression 订单数,PrestoTileClick 订单数。
下面是我的文档。
{
"ClientId": "XYZ123",
"location": {
"Name": "Hyderabad",
"Country": "India",
"Zip": "500084",
"Gps": {
"lat": "17.463607",
"lon": "78.344279"
}
},
"Network": {
"Operator": "Airtel",
"Type": "wifi",
"TowerID": "123",
"IP": "1.1.1.1"
},
"SessionTimeStamp": {
"Start": ISODate("2015-06-02T05:36:49.045 Z"),
"End": ISODate("2015-06-02T05:36:56.045 Z"),
"Duration": "7000"
},
"AppName": "WhatsApp",
"Text": "Key1 Key2 Key3 Key4",
"Actions": [{
"Type": "PrestoBarImpression",
"CampaignId": 1,
"keyword": "key1",
"prestoCount": 1,
"duration": 100,
"OrderCount": 1
}, {
"Type": "PrestoKeyCountChange",
"CampaignId": 1,
"keyword": "key1",
"prestoCount": 1,
"OrderCount": 2
}, {
"Type": "PrestoBarImpression",
"CampaignId": 2,
"keyword": "key2",
"prestoCount": 2,
"duration": 150,
"OrderCount": 3
}, {
"Type": "PrestoKeyCountChange",
"CampaignId": "2",
"keyword": "key2",
"prestoCount": 2,
"OrderCount": 4
}, {
"Type": "PrestoBarImpression",
"CampaignId": 1,
"keyword": "key3",
"prestoCount": 2,
"duration": 200,
"OrderCount": 5
}, {
"Type": "PrestoTileImpression",
"CampaignId": 1,
"duration": 200,
"OrderCount": 6
}, {
"Type": "PrestoTileImpression",
"AdditionalAction": "swipeRight",
"CampaignId": 2,
"duration": 200,
"OrderCount": 7
}, {
"Type": "PrestoTileClick",
"AdditionalAction": "swipeRight",
"CampaignId": 2,
"OrderCount": 8
}, {
"Type": "PrestoBarImpression",
"CampaignId": 2,
"keyword": "key4",
"prestoCount": 2,
"duration": 150,
"OrderCount": 9
}]
}
我通过使用@Viswas 响应获得了以下输出我进行了查询。
查询
[
{
"$match":{
"SessionTimeStamp.Start":{
"$gte": ISODate("2015-06-01T18:30:00.000 Z"),
"$lte": ISODate("2015-06-04T18:29:59.000 Z")
}
}
},
{
"$unwind":"$Actions"
},
{
"$match":{
"Actions.Type":{
"$in":[
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]
}
}
},
{
"$group":{
"_id":{
"AppName":"$AppName",
"type":"$Actions.Type"
},
"total":{
"$sum":"$Actions.OrderCount"
}
}
},
{
"$sort":{
"total":1,
}
}
]
输出
{
"result":[
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoKeyCountChange"
},
"total":6
},
{
"_id":{
"AppName":"hike",
"type":"PrestoKeyCountChange"
},
"total":6
},
{
"_id":{
"AppName":"hike",
"type":"PrestoTileImpression"
},
"total":13
},
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoTileImpression"
},
"total":13
},
{
"_id":{
"AppName":"hike",
"type":"PrestoBarImpression"
},
"total":18
},
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoBarImpression"
},
"total":18
}
],
"ok":1.0000000000000000
}
我需要以下格式的输出
[
{
"AppName":"WhatsApp",
" PrestoTileImpression":13,
"PrestoKeyCountChange":6,
"PrestoBarImpression":18,
"count":"10 (This is how many times thee Application presents in document, because I need to find top 10 apps Need to sort the output by this count)"
},
{
"AppName":"Hike",
" PrestoTileImpression":13,
"PrestoKeyCountChange":6,
"PrestoBarImpression":18,
"count":"10 "
}
]
您应该使用 aggregation 来获得结果。
如果你想 ActionType
明智地 OrderCount
给定日期(特定) 那么你需要首先将 start
匹配到您的日期,然后根据 Action.Type
匹配 group
数据。
查询如下:
db.collection.aggregate({
$match: {
"SessionTimeStamp.Start": ISODate("2015-06-02T05:36:49.045Z")
}
}, {
$group: {
"_id": "AppName",
"Document": {
$push: {
"SessionTimeStamp": "$SessionTimeStamp",
"Actions": "$Actions",
"AppName": "$AppName"
}
}
}
}, {
$unwind: "$Document"
}, {
$unwind: "$Document.Actions"
}, {
$group: {
_id: "$Document.Actions.Type",
"OrderCount": {
$sum: "$Document.Actions.OrderCount"
},
"App": {
$first: "$Document.AppName"
}
}
}, {
$project: {
"_id": 0,
"OrderCount": 1,
"ActionType": "$_id",
"App": 1
}
})
在问题作者的评论后编辑:
Reference to Duplicate Question作者
请验证 appNames
的 count
的拼写,因为某些 appName
的拼写不同(count
、Count
)
您应该使用以下查询-
db.collection.aggregate({
$match: {
"SessionTimeStamp.Start": {
$gte: ISODate("2015-06-02T05:36:49.045Z")
},
"SessionTimeStamp.End": {
$lte: ISODate("2015-06-02T05:36:56.045Z")
}
}
}, {
$unwind: "$Actions"
}, {
$group: {
"_id": {
"AppName": "$AppName",
"Type": "$Actions.Type"
},
"count": {
"$sum": "$Actions.Count"
},
"appCount": {
$sum: 1
}
}
}, {
$project: {
"AppName": "$_id.AppName",
"Type": "$_id.Type",
"count": 1,
"appCount": 1,
"_id": 0
}
})
如果您仍想将动态值分配为键,那么您可以遍历光标 -
db.collection.aggregate({$match:{"SessionTimeStamp.Start":{$gte:ISODate("2015-06-02T05:36:49.045Z")},
"SessionTimeStamp.End":{$lte:ISODate("2015-06-02T05:36:56.045Z")}}},
{$unwind:"$Actions"},{$group:{"_id":{"AppName":"$AppName","Type":"$Actions.Type"},
"count":{"$sum":"$Actions.Count"},"appCount":{$sum:1}}},
{$project:{"AppName":"$_id.AppName","Type":"$_id.Type","count":1,
"appCount":1,"_id":0}}).forEach( function(myDoc){ var feeType = {};
feeType["AppName"] = myDoc.AppName; feeType[myDoc.Type]= myDoc.count;
feeType["appCount"] = myDoc.appCount; printjson (feeType);})
这真的是关于过滤数组内容以在总和中得到你想要的项目:
db.collection.aggregate([
// Filter documents with matching entries first
{ "$match": {
"Actions.Type": { "$in": [
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]}
}},
// Unwind the array entries
{ "$unwind": "$Actions" },
// Filter to only keep desired array entries
{ "$match": {
"Actions.Type": { "$in": [
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]}
}},
// Group by AppName and current day (finishing)
{ "$group": {
"_id": {
"AppName": "$AppName",
"day": {
"year": { "$year": "$SessionTimeStamp.End" },
"month": { "$month": "$SessionTimeStamp.End" },
"day": { "$dayOfMonth": "$SessionTimeStamp.End" }
},
"type": "$Actions.Type"
},
"total": { "$sum": "$Actions.OrderCount" }
}},
// Sort as however you require
{ "$sort": {
"_id.AppName": 1,
"_id.day": 1,
"_id.type": 1,
"total": -1
}}
])
或者,如果您想要每个文档的所有这些字段,则在现有组之后添加:
{ "$group": {
"_id": {
"AppName": "$_id.AppName",
"day": "$_id.day",
},
"PrestoBarImpression": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoBarImpression" ] },
"$total",
0
]
}},
"PrestoKeyCountChange": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoKeyCountChange" ] },
"$total",
0
]
}},
"PrestoTileImpression": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoTileImpression" ] },
"$total",
0
]
}}
}}
将 "AppName" 和 "day" 的每个字段的总数统计到单个文档中。
您可能希望将 "date range" 匹配添加到第一个 $match
管道,而不是将集合中的所有内容相加,并且在日期之间也这样做。