MongoDB 多个嵌套数组的聚合
MongoDB Aggregation on multiple nested arrays
我正在研究如何查询具有两层嵌套数组的文档。
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
},
{
"name" : "www2.shropscommunityhealth.nhs.uk",
"firstSeen" : "2017-06-23 16:55:00",
"a_rr" : "80.175.25.17",
"data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
"asn" : 8607,
"asn_org" : "Timico Limited",
"city" : null,
"country" : "United Kingdom",
"shodan" : {
"timestamp" : ISODate("2019-09-16T17:21:13.664Z")
}
}
]
}
我希望能够搜索集合和 return 提供的端口号匹配的所有子域。到目前为止,我已经尝试过(在 PyMongo 中)
result = db.aggregate([{'$match': {'subdomains.shodan.ports.port': port}},
{'$project': {
'subdomains': {'$filter': {
'input': '$subdomains.shodan.ports',
'cond': {'$eq': ['$$this.port', port]}
}}
}}])
当我 运行 这样做时,我根本没有得到任何结果。我试过 $filter
但似乎无法得出任何结果。我正在使用类似的聚合来仅在 subdomains
数组中查询并且它工作正常,我只是在数组中的数组中苦苦挣扎,想知道我是否需要不同的方法。
尝试下面的聚合管道:
db.collection.aggregate([
{
$unwind: "$subdomains"
},
{
$match: {
"subdomains.shodan.ports": {
$elemMatch: {
port: 443
},
$ne: null
}
}
},
{
$group: {
_id: "$_id",
"org-name": {
$last: "$org-name"
},
"domain": {
$last: "$domain"
},
"subdomains": {
$push: "$subdomains"
}
}
}
])
给出输出:
[
{
"_id": ObjectId("5d7fb679d76f3bbf82ed952e"),
"domain": "shropscommunityhealth.nhs.uk",
"org-name": "Shropshire Community Health NHS Trust",
"subdomains": [
{
"a_rr": "195.49.146.9",
"asn": 21472,
"asn_org": "ServerHouse Ltd",
"city": "Portsmouth",
"country": "United Kingdom",
"data_retrieved": ISODate("2019-09-16T17:21:11.468Z"),
"firstSeen": "2015-10-17 01:10:00",
"name": "www.shropscommunityhealth.nhs.uk",
"shodan": {
"ports": [
{
"cpe": "cpe:/a:microsoft:internet_information_server:8.5",
"port": 443,
"product": "Microsoft IIS httpd"
},
{
"cpe": "cpe:/o:microsoft:windows",
"port": 80,
"product": "Microsoft HTTPAPI httpd"
}
],
"timestamp": ISODate("2019-09-16T17:21:12.659Z")
}
}
]
}
]
以下查询可以获得预期的输出:
db.collection.aggregate([
{
$project:{
"subdomains":{
$filter:{
"input":"$subdomains",
"as":"subdomain",
"cond":{
$in:[
443,
{
$ifNull:[
"$$subdomain.shodan.ports.port",
[]
]
}
]
}
}
}
}
}
]).pretty()
数据集:
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
},
{
"name" : "www2.shropscommunityhealth.nhs.uk",
"firstSeen" : "2017-06-23 16:55:00",
"a_rr" : "80.175.25.17",
"data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
"asn" : 8607,
"asn_org" : "Timico Limited",
"city" : null,
"country" : "United Kingdom",
"shodan" : {
"timestamp" : ISODate("2019-09-16T17:21:13.664Z")
}
}
]
}
输出:
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
}
]
}
我正在研究如何查询具有两层嵌套数组的文档。
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
},
{
"name" : "www2.shropscommunityhealth.nhs.uk",
"firstSeen" : "2017-06-23 16:55:00",
"a_rr" : "80.175.25.17",
"data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
"asn" : 8607,
"asn_org" : "Timico Limited",
"city" : null,
"country" : "United Kingdom",
"shodan" : {
"timestamp" : ISODate("2019-09-16T17:21:13.664Z")
}
}
]
}
我希望能够搜索集合和 return 提供的端口号匹配的所有子域。到目前为止,我已经尝试过(在 PyMongo 中)
result = db.aggregate([{'$match': {'subdomains.shodan.ports.port': port}},
{'$project': {
'subdomains': {'$filter': {
'input': '$subdomains.shodan.ports',
'cond': {'$eq': ['$$this.port', port]}
}}
}}])
当我 运行 这样做时,我根本没有得到任何结果。我试过 $filter
但似乎无法得出任何结果。我正在使用类似的聚合来仅在 subdomains
数组中查询并且它工作正常,我只是在数组中的数组中苦苦挣扎,想知道我是否需要不同的方法。
尝试下面的聚合管道:
db.collection.aggregate([
{
$unwind: "$subdomains"
},
{
$match: {
"subdomains.shodan.ports": {
$elemMatch: {
port: 443
},
$ne: null
}
}
},
{
$group: {
_id: "$_id",
"org-name": {
$last: "$org-name"
},
"domain": {
$last: "$domain"
},
"subdomains": {
$push: "$subdomains"
}
}
}
])
给出输出:
[
{
"_id": ObjectId("5d7fb679d76f3bbf82ed952e"),
"domain": "shropscommunityhealth.nhs.uk",
"org-name": "Shropshire Community Health NHS Trust",
"subdomains": [
{
"a_rr": "195.49.146.9",
"asn": 21472,
"asn_org": "ServerHouse Ltd",
"city": "Portsmouth",
"country": "United Kingdom",
"data_retrieved": ISODate("2019-09-16T17:21:11.468Z"),
"firstSeen": "2015-10-17 01:10:00",
"name": "www.shropscommunityhealth.nhs.uk",
"shodan": {
"ports": [
{
"cpe": "cpe:/a:microsoft:internet_information_server:8.5",
"port": 443,
"product": "Microsoft IIS httpd"
},
{
"cpe": "cpe:/o:microsoft:windows",
"port": 80,
"product": "Microsoft HTTPAPI httpd"
}
],
"timestamp": ISODate("2019-09-16T17:21:12.659Z")
}
}
]
}
]
以下查询可以获得预期的输出:
db.collection.aggregate([
{
$project:{
"subdomains":{
$filter:{
"input":"$subdomains",
"as":"subdomain",
"cond":{
$in:[
443,
{
$ifNull:[
"$$subdomain.shodan.ports.port",
[]
]
}
]
}
}
}
}
}
]).pretty()
数据集:
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
},
{
"name" : "www2.shropscommunityhealth.nhs.uk",
"firstSeen" : "2017-06-23 16:55:00",
"a_rr" : "80.175.25.17",
"data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
"asn" : 8607,
"asn_org" : "Timico Limited",
"city" : null,
"country" : "United Kingdom",
"shodan" : {
"timestamp" : ISODate("2019-09-16T17:21:13.664Z")
}
}
]
}
输出:
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
}
]
}