MongoDB 查询以包括多个字段的最频繁值的计数
MongoDB query to include count of most frequent values for multiple fields
提前感谢您的帮助!
我有一个集合 QR
,其架构与此类似:
var qrSchema = new Schema({
qrId: { type: String, index: true },
owner: { type: Schema.Types.ObjectId, ref: 'User' },
qrName: { type: String },
qrCategory: { type: String, index: true },
shortUrl: { type: String}}
})
和集合Datas
类似这样:
var dataSchema = new Schema({
qrId: { type: String, index: true}
city: { type: String},
device: { type: String},
date: { type: Date, index:true},
})
QR和Datas之间是一对多的关系。
我有这样的聚合:
Model.QR.aggregate([
{ $match: {
$and: [
{ owner: mongoose.Types.ObjectId(user._id) },
{
$expr: {
$cond: [
{ $in: [ category, [ null, "", "undefined" ]] },
true,
{ $eq: [ "$qrCategory", category ] }
]
}
}
]
}
},
{ $lookup:
{
"from": "datas",
"localField": "qrId",
"foreignField": "qrId",
"as": "data"
}
},
{
$project: {
_id: 0,
qrId: 1,
qrName: 1,
qrCategory: 1,
shortUrl: 1,
data: {
$filter: {
input: "$data",
as: "item",
cond: {
$and: [
{ $gte: [ "$$item.date", date.start ] },
{ $lte: [ "$$item.date", date.end ] }
] }
}
}
}
},
{
$group: {
_id: { "qrId": "$qrId", "qrName": "$qrName", "qrCategory": "$qrCategory", "shortUrl": "$shortUrl" },
data: {
$push: {
dataItems: "$data",
count: {
$size: { '$ifNull': ['$data', []] }
}
}
}
}
},
{
$sort: {
"data.count": -1
}
},
{
$limit: 10,
}]).exec((err, results) => { })
返回的内容如下:
[
{
"_id": {
"qrId": "0PRA",
"qrName": "Campaign 0PRA",
"qrCategory": "html",
"shortUrl": "http://someurl.com/0PRA"
},
"data": [
{
"dataItems": [
{
"_id": "6200f2a8c0cf7a1c49233c7f",
"qrId": "0PRA",
"device": "iOS",
"city": "Beijing",
},
{
"_id": "6200f2eac0cf7a1c49233c80",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f3a4c0cf7a1c49233c81",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f632c0cf7a1c49233c88",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Nanchang",
},
{
"_id": "6201b342c0cf7a1c49233caa",
"qrId": "0PRA",
"device": "iOS",
"city": "Taizhou",
}
],
"count": 5
}
]
},
{
"_id": {
"qrId": "NQ17",
"qrName": "Campaign NQ17",
"qrCategory": "menu",
"shortUrl": "http://someurl.com/NQ17"
},
"data": [
{
"dataItems": [
{
"_id": "6200f207c0cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "8200f207c1cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "6200ac5db44f23b9ec2b6040",
"qrId": "NQ17",
"device": "AndroidOS",
"city": "San Antonio"
}
],
"count": 3
}
]
}
]
我试图在 dataItems
计数后的结果中包含最频繁的设备和城市,如下所示:
[
{
"_id": {
"qrId": "0PRA",
"qrName": "Campaign 0PRA",
"qrCategory": "html",
"shortUrl": "http://someurl.com/0PRA"
},
"data": [
{
"dataItems": [
{
"_id": "6200f2a8c0cf7a1c49233c7f",
"qrId": "0PRA",
"device": "iOS",
"city": "Beijing",
},
{
"_id": "6200f2eac0cf7a1c49233c80",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f3a4c0cf7a1c49233c81",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f632c0cf7a1c49233c88",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Nanchang",
},
{
"_id": "6201b342c0cf7a1c49233caa",
"qrId": "0PRA",
"device": "iOS",
"city": "Taizhou",
}
],
"count": 5,
"topDevice": "AndroidOS", // <---- trying to add this
"topLocation": "Beijing" // <---- trying to add this
}
]
},
{
"_id": {
"qrId": "NQ17",
"qrName": "Campaign NQ17",
"qrCategory": "menu",
"shortUrl": "http://someurl.com/NQ17"
},
"data": [
{
"dataItems": [
{
"_id": "6200f207c0cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "8200f207c1cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "6200ac5db44f23b9ec2b6040",
"qrId": "NQ17",
"device": "android",
"city": "San Antonio"
}
],
"count": 3,
"topDevice": "iOS", // <---- trying to add this
"topLocation": "Singapore" // <---- trying to add this
}
]
}
]
这可能吗?
非常感谢您的帮助或提示!
方法一
使用$function
会更容易。 MongoDB 版本 >= 4.4
Sort function in js
db.collection.aggregate([
{
"$set": {
"data": {
"$map": {
"input": "$data",
"as": "d",
"in": {
"count": "$$d.count",
"dataItems": "$$d.dataItems",
"topDevice": {
$function: {
body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
args: [ "$$d.dataItems.device" ],
lang: "js"
}
},
"topLocation": {
$function: {
body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
args: [ "$$d.dataItems.city" ],
lang: "js"
}
}
}
}
}
}
}
])
方法二
db.qr.aggregate([
{
"$match": {
owner: {
"$in": [
"1",
"2"
]
}
}
},
{
"$lookup": {
"from": "data",
"localField": "qrId",
"foreignField": "qrId",
"as": "data",
"pipeline": [
{
"$match": {
"$and": [
{
"date": {
"$gte": ISODate("2021-09-01T01:23:25.184Z")
}
},
{
"date": {
"$lte": ISODate("2021-09-02T11:23:25.184Z")
}
}
]
}
},
{
"$facet": {
"deviceGroup": [
{
"$group": {
"_id": "$device",
"sum": {
"$sum": 1
}
}
},
{
"$sort": {
sum: -1
}
},
{
"$limit": 1
}
],
"cityGroup": [
{
"$group": {
"_id": "$city",
"sum": {
"$sum": 1
}
}
},
{
"$sort": {
sum: -1
}
},
{
"$limit": 1
}
],
"all": []
}
}
]
}
},
{
"$set": {
"data": {
"$first": "$data.all"
},
"topDevice": {
"$first": {
"$first": "$data.deviceGroup._id"
}
},
"topLocation": {
"$first": {
"$first": "$data.cityGroup._id"
}
}
}
},
{
$group: {
_id: {
"qrId": "$qrId",
"qrName": "$qrName",
"qrCategory": "$qrCategory",
"shortUrl": "$shortUrl"
},
data: {
$push: {
dataItems: "$data",
topDevice: "$topDevice",
topLocation: "$topLocation",
count: {
$size: {
"$ifNull": [
"$data",
[]
]
}
}
}
}
}
}
])
查询
- 添加你需要的匹配,我不明白匹配应该做什么
- 在
qrId
上查找
- 过滤器仅保留开始<=日期<=结束(替换 1 和 100)
- facet 组
all-documents
、topDevice
topLocation
$set
将这些数据从它们所在的嵌套位置中取出
- count 添加为
all-documents
的大小
*也许我遗漏了一些东西,但试试看(第一部分我认为它像玉婷的回答)
QR.aggregate(
[{"$lookup":
{"from":"Datas",
"localField":"qrId",
"foreignField":"qrId",
"pipeline":
[{"$match":{"$and":[{"date":{"$gte":1}}, {"date":{"$lte":100}}]}},
{"$facet":
{"dataItems":[],
"topDevice":
[{"$group":{"_id":"$device", "count":{"$sum":1}}},
{"$sort":{"count":-1}}, {"$limit":1}],
"topLocation":
[{"$group":{"_id":"$city", "count":{"$sum":1}}},
{"$sort":{"count":-1}}, {"$limit":1}]}}],
"as":"data"}},
{"$set":{"data":{"$arrayElemAt":["$data", 0]}}},
{"$set":
{"dataItems":"$data.dataItems",
"count":{"$size":"$data.dataItems"},
"topDevice":
{"$getField":
{"field":"_id", "input":{"$arrayElemAt":["$data.topDevice", 0]}}},
"topLocation":
{"$getField":
{"field":"_id",
"input":{"$arrayElemAt":["$data.topLocation", 0]}}},
"data":"$$REMOVE"}}])
提前感谢您的帮助!
我有一个集合 QR
,其架构与此类似:
var qrSchema = new Schema({
qrId: { type: String, index: true },
owner: { type: Schema.Types.ObjectId, ref: 'User' },
qrName: { type: String },
qrCategory: { type: String, index: true },
shortUrl: { type: String}}
})
和集合Datas
类似这样:
var dataSchema = new Schema({
qrId: { type: String, index: true}
city: { type: String},
device: { type: String},
date: { type: Date, index:true},
})
QR和Datas之间是一对多的关系。
我有这样的聚合:
Model.QR.aggregate([
{ $match: {
$and: [
{ owner: mongoose.Types.ObjectId(user._id) },
{
$expr: {
$cond: [
{ $in: [ category, [ null, "", "undefined" ]] },
true,
{ $eq: [ "$qrCategory", category ] }
]
}
}
]
}
},
{ $lookup:
{
"from": "datas",
"localField": "qrId",
"foreignField": "qrId",
"as": "data"
}
},
{
$project: {
_id: 0,
qrId: 1,
qrName: 1,
qrCategory: 1,
shortUrl: 1,
data: {
$filter: {
input: "$data",
as: "item",
cond: {
$and: [
{ $gte: [ "$$item.date", date.start ] },
{ $lte: [ "$$item.date", date.end ] }
] }
}
}
}
},
{
$group: {
_id: { "qrId": "$qrId", "qrName": "$qrName", "qrCategory": "$qrCategory", "shortUrl": "$shortUrl" },
data: {
$push: {
dataItems: "$data",
count: {
$size: { '$ifNull': ['$data', []] }
}
}
}
}
},
{
$sort: {
"data.count": -1
}
},
{
$limit: 10,
}]).exec((err, results) => { })
返回的内容如下:
[
{
"_id": {
"qrId": "0PRA",
"qrName": "Campaign 0PRA",
"qrCategory": "html",
"shortUrl": "http://someurl.com/0PRA"
},
"data": [
{
"dataItems": [
{
"_id": "6200f2a8c0cf7a1c49233c7f",
"qrId": "0PRA",
"device": "iOS",
"city": "Beijing",
},
{
"_id": "6200f2eac0cf7a1c49233c80",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f3a4c0cf7a1c49233c81",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f632c0cf7a1c49233c88",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Nanchang",
},
{
"_id": "6201b342c0cf7a1c49233caa",
"qrId": "0PRA",
"device": "iOS",
"city": "Taizhou",
}
],
"count": 5
}
]
},
{
"_id": {
"qrId": "NQ17",
"qrName": "Campaign NQ17",
"qrCategory": "menu",
"shortUrl": "http://someurl.com/NQ17"
},
"data": [
{
"dataItems": [
{
"_id": "6200f207c0cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "8200f207c1cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "6200ac5db44f23b9ec2b6040",
"qrId": "NQ17",
"device": "AndroidOS",
"city": "San Antonio"
}
],
"count": 3
}
]
}
]
我试图在 dataItems
计数后的结果中包含最频繁的设备和城市,如下所示:
[
{
"_id": {
"qrId": "0PRA",
"qrName": "Campaign 0PRA",
"qrCategory": "html",
"shortUrl": "http://someurl.com/0PRA"
},
"data": [
{
"dataItems": [
{
"_id": "6200f2a8c0cf7a1c49233c7f",
"qrId": "0PRA",
"device": "iOS",
"city": "Beijing",
},
{
"_id": "6200f2eac0cf7a1c49233c80",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f3a4c0cf7a1c49233c81",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f632c0cf7a1c49233c88",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Nanchang",
},
{
"_id": "6201b342c0cf7a1c49233caa",
"qrId": "0PRA",
"device": "iOS",
"city": "Taizhou",
}
],
"count": 5,
"topDevice": "AndroidOS", // <---- trying to add this
"topLocation": "Beijing" // <---- trying to add this
}
]
},
{
"_id": {
"qrId": "NQ17",
"qrName": "Campaign NQ17",
"qrCategory": "menu",
"shortUrl": "http://someurl.com/NQ17"
},
"data": [
{
"dataItems": [
{
"_id": "6200f207c0cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "8200f207c1cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "6200ac5db44f23b9ec2b6040",
"qrId": "NQ17",
"device": "android",
"city": "San Antonio"
}
],
"count": 3,
"topDevice": "iOS", // <---- trying to add this
"topLocation": "Singapore" // <---- trying to add this
}
]
}
]
这可能吗?
非常感谢您的帮助或提示!
方法一
使用$function
会更容易。 MongoDB 版本 >= 4.4
Sort function in js
db.collection.aggregate([
{
"$set": {
"data": {
"$map": {
"input": "$data",
"as": "d",
"in": {
"count": "$$d.count",
"dataItems": "$$d.dataItems",
"topDevice": {
$function: {
body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
args: [ "$$d.dataItems.device" ],
lang: "js"
}
},
"topLocation": {
$function: {
body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
args: [ "$$d.dataItems.city" ],
lang: "js"
}
}
}
}
}
}
}
])
方法二
db.qr.aggregate([
{
"$match": {
owner: {
"$in": [
"1",
"2"
]
}
}
},
{
"$lookup": {
"from": "data",
"localField": "qrId",
"foreignField": "qrId",
"as": "data",
"pipeline": [
{
"$match": {
"$and": [
{
"date": {
"$gte": ISODate("2021-09-01T01:23:25.184Z")
}
},
{
"date": {
"$lte": ISODate("2021-09-02T11:23:25.184Z")
}
}
]
}
},
{
"$facet": {
"deviceGroup": [
{
"$group": {
"_id": "$device",
"sum": {
"$sum": 1
}
}
},
{
"$sort": {
sum: -1
}
},
{
"$limit": 1
}
],
"cityGroup": [
{
"$group": {
"_id": "$city",
"sum": {
"$sum": 1
}
}
},
{
"$sort": {
sum: -1
}
},
{
"$limit": 1
}
],
"all": []
}
}
]
}
},
{
"$set": {
"data": {
"$first": "$data.all"
},
"topDevice": {
"$first": {
"$first": "$data.deviceGroup._id"
}
},
"topLocation": {
"$first": {
"$first": "$data.cityGroup._id"
}
}
}
},
{
$group: {
_id: {
"qrId": "$qrId",
"qrName": "$qrName",
"qrCategory": "$qrCategory",
"shortUrl": "$shortUrl"
},
data: {
$push: {
dataItems: "$data",
topDevice: "$topDevice",
topLocation: "$topLocation",
count: {
$size: {
"$ifNull": [
"$data",
[]
]
}
}
}
}
}
}
])
查询
- 添加你需要的匹配,我不明白匹配应该做什么
- 在
qrId
上查找
- 过滤器仅保留开始<=日期<=结束(替换 1 和 100)
- facet 组
all-documents
、topDevice
topLocation
$set
将这些数据从它们所在的嵌套位置中取出- count 添加为
all-documents
的大小
*也许我遗漏了一些东西,但试试看(第一部分我认为它像玉婷的回答)
QR.aggregate(
[{"$lookup":
{"from":"Datas",
"localField":"qrId",
"foreignField":"qrId",
"pipeline":
[{"$match":{"$and":[{"date":{"$gte":1}}, {"date":{"$lte":100}}]}},
{"$facet":
{"dataItems":[],
"topDevice":
[{"$group":{"_id":"$device", "count":{"$sum":1}}},
{"$sort":{"count":-1}}, {"$limit":1}],
"topLocation":
[{"$group":{"_id":"$city", "count":{"$sum":1}}},
{"$sort":{"count":-1}}, {"$limit":1}]}}],
"as":"data"}},
{"$set":{"data":{"$arrayElemAt":["$data", 0]}}},
{"$set":
{"dataItems":"$data.dataItems",
"count":{"$size":"$data.dataItems"},
"topDevice":
{"$getField":
{"field":"_id", "input":{"$arrayElemAt":["$data.topDevice", 0]}}},
"topLocation":
{"$getField":
{"field":"_id",
"input":{"$arrayElemAt":["$data.topLocation", 0]}}},
"data":"$$REMOVE"}}])