有条件地对数组中的元素进行分组和求和

Conditionally group and sum of the elements from an array

我想按 APPName 分组,我想找出特定日期的每个应用程序有多少 PrestoBarImpressionPrestoKeyCountChangePrestoTileImpression(只是总和订单计数)。

这样我就可以用这些信息生成报告。我需要每个应用程序的 PrestoTileImpression 订单数,PrestoBarImpression 订单数,PrestoTileClick 订单数。

下面是我的文档。

{
    "ClientId": "XYZ123",
    "location": {
    "Name": "Hyderabad",
    "Country": "India",
    "Zip": "500084",
    "Gps": {
        "lat": "17.463607",
        "lon": "78.344279"
    }
    },
    "Network": {
    "Operator": "Airtel",
    "Type": "wifi",
    "TowerID": "123",
    "IP": "1.1.1.1"
    },
    "SessionTimeStamp": {
    "Start": ISODate("2015-06-02T05:36:49.045        Z"),
    "End": ISODate("2015-06-02T05:36:56.045        Z"),
    "Duration": "7000"
    },
    "AppName": "WhatsApp",
    "Text": "Key1 Key2 Key3 Key4",
    "Actions": [{
    "Type": "PrestoBarImpression",
    "CampaignId": 1,
    "keyword": "key1",
    "prestoCount": 1,
    "duration": 100,
    "OrderCount": 1
    }, {
    "Type": "PrestoKeyCountChange",
    "CampaignId": 1,
    "keyword": "key1",
    "prestoCount": 1,
    "OrderCount": 2
    }, {
    "Type": "PrestoBarImpression",
    "CampaignId": 2,
    "keyword": "key2",
    "prestoCount": 2,
    "duration": 150,
    "OrderCount": 3
    }, {
    "Type": "PrestoKeyCountChange",
    "CampaignId": "2",
    "keyword": "key2",
    "prestoCount": 2,
    "OrderCount": 4
    }, {
    "Type": "PrestoBarImpression",
    "CampaignId": 1,
    "keyword": "key3",
    "prestoCount": 2,
    "duration": 200,
    "OrderCount": 5
    }, {
    "Type": "PrestoTileImpression",
    "CampaignId": 1,
    "duration": 200,
    "OrderCount": 6
    }, {
    "Type": "PrestoTileImpression",
    "AdditionalAction": "swipeRight",
    "CampaignId": 2,
    "duration": 200,
    "OrderCount": 7
    }, {
    "Type": "PrestoTileClick",
    "AdditionalAction": "swipeRight",
    "CampaignId": 2,
    "OrderCount": 8
    }, {
    "Type": "PrestoBarImpression",
    "CampaignId": 2,
    "keyword": "key4",
    "prestoCount": 2,
    "duration": 150,
    "OrderCount": 9
    }]
}

我通过使用@Viswas 响应获得了以下输出我进行了查询。

查询

[  
{  
    "$match":{  
        "SessionTimeStamp.Start":{  
            "$gte":                ISODate("2015-06-01T18:30:00.000                Z"),
            "$lte":                ISODate("2015-06-04T18:29:59.000                Z")
        }
    }
},
{  
    "$unwind":"$Actions"
},
{  
    "$match":{  
        "Actions.Type":{  
            "$in":[  
                "PrestoBarImpression",
                "PrestoKeyCountChange",
                "PrestoTileImpression"
            ]
        }
    }
},
{  
    "$group":{  
        "_id":{  
            "AppName":"$AppName",
            "type":"$Actions.Type"
        },
        "total":{  
            "$sum":"$Actions.OrderCount"
        }
    }
},
{  
    "$sort":{  
        "total":1,

    }
}

]

输出

{  
"result":[  
    {  
        "_id":{  
            "AppName":"WhatsApp",
            "type":"PrestoKeyCountChange"
        },
        "total":6
    },
    {  
        "_id":{  
            "AppName":"hike",
            "type":"PrestoKeyCountChange"
        },
        "total":6
    },
    {  
        "_id":{  
            "AppName":"hike",
            "type":"PrestoTileImpression"
        },
        "total":13
    },
    {  
        "_id":{  
            "AppName":"WhatsApp",
            "type":"PrestoTileImpression"
        },
        "total":13
    },
    {  
        "_id":{  
            "AppName":"hike",
            "type":"PrestoBarImpression"
        },
        "total":18
    },
    {  
        "_id":{  
            "AppName":"WhatsApp",
            "type":"PrestoBarImpression"
        },
        "total":18
    }
],
"ok":1.0000000000000000

}

我需要以下格式的输出

[  
{  
    "AppName":"WhatsApp",
    " PrestoTileImpression":13,
    "PrestoKeyCountChange":6,
    "PrestoBarImpression":18,
    "count":"10 (This is how many times thee Application presents in document, because I need to find top 10 apps Need to sort the output by this count)"
},
{  
    "AppName":"Hike",
    " PrestoTileImpression":13,
    "PrestoKeyCountChange":6,
    "PrestoBarImpression":18,
    "count":"10 "
}

]

您应该使用 aggregation 来获得结果。

如果你想 ActionType 明智地 OrderCount 给定日期(特定) 那么你需要首先将 start 匹配到您的日期,然后根据 Action.Type 匹配 group 数据。 查询如下:

db.collection.aggregate({
    $match: {
    "SessionTimeStamp.Start": ISODate("2015-06-02T05:36:49.045Z")
    }
}, {
    $group: {
    "_id": "AppName",
    "Document": {
        $push: {
            "SessionTimeStamp": "$SessionTimeStamp",
            "Actions": "$Actions",
            "AppName": "$AppName"
        }
    }
    }
}, {
    $unwind: "$Document"
}, {
    $unwind: "$Document.Actions"
}, {
    $group: {
    _id: "$Document.Actions.Type",
    "OrderCount": {
        $sum: "$Document.Actions.OrderCount"
    },
    "App": {
        $first: "$Document.AppName"
    }
    }
}, {
    $project: {
    "_id": 0,
    "OrderCount": 1,
    "ActionType": "$_id",
    "App": 1
    }
})

在问题作者的评论后编辑:

Reference to Duplicate Question作者

请验证 appNamescount 的拼写,因为某些 appName 的拼写不同(countCount

您应该使用以下查询-

db.collection.aggregate({
    $match: {
    "SessionTimeStamp.Start": {
        $gte: ISODate("2015-06-02T05:36:49.045Z")
    },
    "SessionTimeStamp.End": {
        $lte: ISODate("2015-06-02T05:36:56.045Z")
    }
    }
}, {
    $unwind: "$Actions"
}, {
    $group: {
    "_id": {
        "AppName": "$AppName",
        "Type": "$Actions.Type"
    },
    "count": {
        "$sum": "$Actions.Count"
    },
    "appCount": {
        $sum: 1
    }
    }
}, {
    $project: {
    "AppName": "$_id.AppName",
    "Type": "$_id.Type",
    "count": 1,
    "appCount": 1,
    "_id": 0
    }
})

如果您仍想将动态值分配为键,那么您可以遍历光标 -

db.collection.aggregate({$match:{"SessionTimeStamp.Start":{$gte:ISODate("2015-06-02T05:36:49.045Z")},
"SessionTimeStamp.End":{$lte:ISODate("2015-06-02T05:36:56.045Z")}}},
{$unwind:"$Actions"},{$group:{"_id":{"AppName":"$AppName","Type":"$Actions.Type"},
"count":{"$sum":"$Actions.Count"},"appCount":{$sum:1}}},
{$project:{"AppName":"$_id.AppName","Type":"$_id.Type","count":1,
"appCount":1,"_id":0}}).forEach( function(myDoc){  var feeType = {}; 
feeType["AppName"] = myDoc.AppName; feeType[myDoc.Type]= myDoc.count;
feeType["appCount"] = myDoc.appCount; printjson (feeType);})

这真的是关于过滤数组内容以在总和中得到你想要的项目:

db.collection.aggregate([
    // Filter documents with matching entries first
    { "$match": {
        "Actions.Type": { "$in": [
            "PrestoBarImpression",
            "PrestoKeyCountChange",
            "PrestoTileImpression"
        ]}
    }},
    // Unwind the array entries
    { "$unwind": "$Actions" },
    // Filter to only keep desired array entries
    { "$match": {
        "Actions.Type": { "$in": [
            "PrestoBarImpression",
            "PrestoKeyCountChange",
            "PrestoTileImpression"
        ]}
    }},
    // Group by AppName and current day (finishing)
    { "$group": {
        "_id": {
            "AppName": "$AppName",
            "day": {
                "year": { "$year": "$SessionTimeStamp.End" },
                "month": { "$month": "$SessionTimeStamp.End" },
                "day": { "$dayOfMonth": "$SessionTimeStamp.End" }
            },
            "type": "$Actions.Type"
        },
        "total": { "$sum": "$Actions.OrderCount" }
    }},
    // Sort as however you require
    { "$sort": {
        "_id.AppName": 1,
        "_id.day": 1,
        "_id.type": 1,
        "total": -1
    }}
])

或者,如果您想要每个文档的所有这些字段,则在现有组之后添加:

{ "$group": {
    "_id": {
        "AppName": "$_id.AppName",
        "day": "$_id.day",
     },
     "PrestoBarImpression": { "$sum": {
         "$cond": [
             { "$eq": [ "$_id.type", "PrestoBarImpression" ] },
             "$total",
             0
         ]
     }},
     "PrestoKeyCountChange": { "$sum": {
         "$cond": [
             { "$eq": [ "$_id.type", "PrestoKeyCountChange" ] },
             "$total",
             0
         ]
     }},
     "PrestoTileImpression": { "$sum": {
         "$cond": [
             { "$eq": [ "$_id.type", "PrestoTileImpression" ] },
             "$total",
             0
         ]
     }}
}}

将 "AppName" 和 "day" 的每个字段的总数统计到单个文档中。

您可能希望将 "date range" 匹配添加到第一个 $match 管道,而不是将集合中的所有内容相加,并且在日期之间也这样做。