$lookup 双嵌套数组元素

$lookup with double nested array elements

求助,我需要为 2 级嵌套数组中的字段 pid 执行 $lookup/join 操作,其中 coll2.string(pid)==coll1.a.p.pid ,集合 coll1 相当大并且有是“a.p.pid”上的索引,但不确定如何查找嵌套数组元素,并偶然避免 $unwind 并使用索引“a.p.pid”...

coll1:

[
  {
  a:[
     {
      p:[
         {pid:"1" ,date:"2022-01-22"},
         {pid:"4", date:"2022-01-25"}
       ]
     }
   ,{
     p:[
        {pid:"3",date:"2022-01-27"}
      ]
    }
  ]
 }
]

coll2:

 [ 
  {
   pid:1
  }
  ,
 {
  pid:3
  }
 ]

预期结果:

 [
  {pid:1, date:"2022-01-22"}
  {pid:3, date:"2022-01-27"}
 ]

集合大小:

db.col2.count()
51171548
db.col1.count()
81039496

mongodb 版本 4.0

不确定为什么必须避免使用 $unwind

db.col2.aggregate([
  {
    $set: { pid: { $toString: "$pid" } }
  },
  {
    $lookup: {
      from: "col1",
      localField: "pid",
      foreignField: "a.p.pid",
      as: "date",
      let: { pid: "$pid" },
      pipeline: [
        {
          $unwind: "$a"
        },
        {
          $unwind: "$a.p"
        },
        {
          $match: { $expr: { $eq: [ "$a.p.pid", "$$pid" ] } }
        }
      ]
    }
  },
  {
    $set: { date: { $first: "$date.a.p.date" }, pid: { $toInt: "$pid" } }
  }
])

mongoplayground


db.col2.aggregate([
  {
    $set: { pid: { $toString: "$pid" } }
  },
  {
    $lookup: {
      from: "col1",
      localField: "pid",
      foreignField: "a.p.pid",
      as: "date"
    }
  },
  {
    $set: {
      date: {
        $filter: {
          input: "$date",
          as: "d1",
          cond: {
            $gt: [
              {
                $size: {
                  $filter: {
                    input: "$$d1.a",
                    as: "d2",
                    cond: {
                      $gt: [
                        {
                          $size: {
                            $filter: {
                              input: "$$d2.p",
                              as: "d3",
                              cond: { $eq: [ "$$d3.pid", "$pid" ] }
                            }
                          }
                        },
                        0
                      ]
                    }
                  }
                }
              },
              0
            ]
          }
        }
      }
    }
  },
  {
    $set: { date: { $first: "$date.a" } }
  },
  {
    $set: {
      "date": {
        $filter: {
          input: "$date",
          as: "d2",
          cond: {
            $gt: [
              {
                $size: {
                  $filter: {
                    input: "$$d2.p",
                    as: "d3",
                    cond: { $eq: [ "$$d3.pid", "$pid" ] }
                  }
                }
              },
              0
            ]
          }
        }
      }
    }
  },
  {
    $set: { date: { $first: "$date.p" } }
  },
  {
    $set: {
      date: {
        $filter: {
          input: "$date",
          as: "d3",
          cond: { $eq: [ "$$d3.pid", "$pid" ] }
        }
      }
    }
  },
  {
    $set: {
      date: { $first: "$date.date" },
      pid: { $toInt: "$pid" }
    }
  }
])

mongoplayground

以下是实际案例中实现的最终解决方案,确认在 v.4.0 中可以正常工作:

 db.col2.aggregate([
 {
 $project: {
  pid: {
    $toString: "$pid"
  }
 }
 },
  {
   $lookup: {
    from: "col1",
    localField: "pid",
    foreignField: "a.p.pid",
    as: "date"
  }
 },
 {
  $project: {
  "date.a.p.date": 1,
  "date.a.p.pid": 1,
  _id: 1,
  pid: 1
 }
 },
 {
  $unwind: "$date"
 },
 {
   $unwind: "$date.a"
 },
 {
   $unwind: "$date.a.p"
 },
 {
 $project: {
   pid: "$date.a.p.pid",
   date: "$date.a.p.date",
   "matched": {
    "$eq": [
      "$pid",
      "$date.a.p.pid"
    ]
  }
 }
 },
  {
   $match: {
     matched: true
  }
 },
  {
   $project: {
    "_id": 0,
    pid: 1,
   date: 1
  }
 }
 ])

@YuTing:你的答案被标记为已接受,因为它似乎是更高版本的更好! 感谢@YuTing 的帮助!

playground