MongoDB 多个嵌套数组的聚合

MongoDB Aggregation on multiple nested arrays

我正在研究如何查询具有两层嵌套数组的文档。

{
    "_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
    "org-name" : "Shropshire Community Health NHS Trust",
    "domain" : "shropscommunityhealth.nhs.uk",
    "subdomains" : [ 
        {
            "name" : "www.shropscommunityhealth.nhs.uk",
            "firstSeen" : "2015-10-17 01:10:00",
            "a_rr" : "195.49.146.9",
            "data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
            "asn" : 21472,
            "asn_org" : "ServerHouse Ltd",
            "city" : "Portsmouth",
            "country" : "United Kingdom",
            "shodan" : {
                "ports" : [ 
                    {
                        "port" : 443,
                        "cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
                        "product" : "Microsoft IIS httpd"
                    }, 
                    {
                        "port" : 80,
                        "cpe" : "cpe:/o:microsoft:windows",
                        "product" : "Microsoft HTTPAPI httpd"
                    }
                ],
                "timestamp" : ISODate("2019-09-16T17:21:12.659Z")
            }
        }, 
        {
            "name" : "www2.shropscommunityhealth.nhs.uk",
            "firstSeen" : "2017-06-23 16:55:00",
            "a_rr" : "80.175.25.17",
            "data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
            "asn" : 8607,
            "asn_org" : "Timico Limited",
            "city" : null,
            "country" : "United Kingdom",
            "shodan" : {
                "timestamp" : ISODate("2019-09-16T17:21:13.664Z")
            }
        }
    ]
}

我希望能够搜索集合和 return 提供的端口号匹配的所有子域。到目前为止,我已经尝试过(在 PyMongo 中)

result = db.aggregate([{'$match': {'subdomains.shodan.ports.port': port}},
                            {'$project': {
                                'subdomains': {'$filter': {
                                    'input': '$subdomains.shodan.ports',
                                    'cond': {'$eq': ['$$this.port', port]}
                                }}
                            }}])

当我 运行 这样做时,我根本没有得到任何结果。我试过 $filter 但似乎无法得出任何结果。我正在使用类似的聚合来仅在 subdomains 数组中查询并且它工作正常,我只是在数组中的数组中苦苦挣扎,想知道我是否需要不同的方法。

尝试下面的聚合管道:

db.collection.aggregate([
  {
    $unwind: "$subdomains"
  },
  {
    $match: {
      "subdomains.shodan.ports": {
        $elemMatch: {
          port: 443
        },
        $ne: null
      }
    }
  },
  {
    $group: {
      _id: "$_id",
      "org-name": {
        $last: "$org-name"
      },
      "domain": {
        $last: "$domain"
      },
      "subdomains": {
        $push: "$subdomains"
      }
    }
  }
])

给出输出:

[
  {
    "_id": ObjectId("5d7fb679d76f3bbf82ed952e"),
    "domain": "shropscommunityhealth.nhs.uk",
    "org-name": "Shropshire Community Health NHS Trust",
    "subdomains": [
      {
        "a_rr": "195.49.146.9",
        "asn": 21472,
        "asn_org": "ServerHouse Ltd",
        "city": "Portsmouth",
        "country": "United Kingdom",
        "data_retrieved": ISODate("2019-09-16T17:21:11.468Z"),
        "firstSeen": "2015-10-17 01:10:00",
        "name": "www.shropscommunityhealth.nhs.uk",
        "shodan": {
          "ports": [
            {
              "cpe": "cpe:/a:microsoft:internet_information_server:8.5",
              "port": 443,
              "product": "Microsoft IIS httpd"
            },
            {
              "cpe": "cpe:/o:microsoft:windows",
              "port": 80,
              "product": "Microsoft HTTPAPI httpd"
            }
          ],
          "timestamp": ISODate("2019-09-16T17:21:12.659Z")
        }
      }
    ]
  }
]

以下查询可以获得预期的输出:

db.collection.aggregate([
    {
        $project:{
            "subdomains":{
                $filter:{
                    "input":"$subdomains",
                    "as":"subdomain",
                    "cond":{
                        $in:[
                            443, 
                            { 
                                $ifNull:[
                                    "$$subdomain.shodan.ports.port",
                                    []
                                ] 
                            } 
                        ]
                    }
                }
            }
        }
    }
]).pretty()

数据集:

{
    "_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
    "org-name" : "Shropshire Community Health NHS Trust",
    "domain" : "shropscommunityhealth.nhs.uk",
    "subdomains" : [ 
        {
            "name" : "www.shropscommunityhealth.nhs.uk",
            "firstSeen" : "2015-10-17 01:10:00",
            "a_rr" : "195.49.146.9",
            "data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
            "asn" : 21472,
            "asn_org" : "ServerHouse Ltd",
            "city" : "Portsmouth",
            "country" : "United Kingdom",
            "shodan" : {
                "ports" : [ 
                    {
                        "port" : 443,
                        "cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
                        "product" : "Microsoft IIS httpd"
                    }, 
                    {
                        "port" : 80,
                        "cpe" : "cpe:/o:microsoft:windows",
                        "product" : "Microsoft HTTPAPI httpd"
                    }
                ],
                "timestamp" : ISODate("2019-09-16T17:21:12.659Z")
            }
        }, 
        {
            "name" : "www2.shropscommunityhealth.nhs.uk",
            "firstSeen" : "2017-06-23 16:55:00",
            "a_rr" : "80.175.25.17",
            "data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
            "asn" : 8607,
            "asn_org" : "Timico Limited",
            "city" : null,
            "country" : "United Kingdom",
            "shodan" : {
                "timestamp" : ISODate("2019-09-16T17:21:13.664Z")
            }
        }
    ]
}

输出:

{
    "_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
    "org-name" : "Shropshire Community Health NHS Trust",
    "domain" : "shropscommunityhealth.nhs.uk",
    "subdomains" : [
        {
            "name" : "www.shropscommunityhealth.nhs.uk",
            "firstSeen" : "2015-10-17 01:10:00",
            "a_rr" : "195.49.146.9",
            "data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
            "asn" : 21472,
            "asn_org" : "ServerHouse Ltd",
            "city" : "Portsmouth",
            "country" : "United Kingdom",
            "shodan" : {
                "ports" : [
                    {
                        "port" : 443,
                        "cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
                        "product" : "Microsoft IIS httpd"
                    },
                    {
                        "port" : 80,
                        "cpe" : "cpe:/o:microsoft:windows",
                        "product" : "Microsoft HTTPAPI httpd"
                    }
                ],
                "timestamp" : ISODate("2019-09-16T17:21:12.659Z")
            }
        }
    ]
}