猫鼬在聚合后填充

Mongoose Populate after Aggregate

我试图在 运行 聚合管道之后获取特定的数据模型,然后进行填充,但我还差一点。

最终想要的结果如下:

[
  {
    _accountId: "5beee0966d17bc42501f1234",
    name: "Company Name 1",
    contactEmail: "email1@email.com",
    contactName: "contact Name 1"
    reason: "Warranties",
    total: 1152,
    lineItems: [
      {
        _id: "5beee0966d17bc42501f5086",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf43929e7179a56e21382bc",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf4392fe7179a56e21382bd",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      }
    ]
  },
  {
    _accountId: "5beee0966d17bc42501f1235",
    name: "Company Name 2",
    contactEmail: "email2@email.com",
    contactName: "contact Name 2"
    reason: "Warranties",
    total: 1152,
    lineItems: [
      {
        _id: "5beee0966d17bc42501f5086",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf43929e7179a56e21382bc",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf4392fe7179a56e21382bd",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      }
    ]
  }
]

我正在从以下两个模型中收集这些数据:

保修

{
  _id: "5beee0966d17bc42501f5086",
  jobsiteAddressStreet: String,
  jobsiteAddressCity: String,
  jobsiteAddressState" String,
  jobsiteAddressZip: Number,
  warrantyFee: Number,
  _accountId: {
    type: Schema.Types.ObjectId,
    ref: "accounts"
  },
  payStatus: String
}

帐号

{
  _id: "5beee0966d17bc42501f1235",
  name: String,
  contactName: String,
  contactEmail: String
}

我当前的查询如下:

Warranty.aggregate([
    {
      $match: {
        payStatus: "Invoiced Next Billing Cycle"
      }
    },
    {
      $group: {
        _id: "$_accountId",
        total: {
          $sum: "$warrantyFee"
        },
        lineItems: {
          $push: {
            _id: "$_id",
            jobsiteAddress: {
              $concat: [
                "$jobsiteAddressStreet",
                " ",
                "$jobsiteAddressCity",
                ", ",
                "$jobsiteAddressState",
                " ",
                "$jobsiteAddressZip"
              ]
            },
            warrantyFee: "$warrantyFee"
          }
        }
      }
    },
    {
      $project: {
        reason: "Warranties",
        total: "$total",
        lineItems: "$lineItems"
      }
    }
  ])
    .then(warranties => {
      console.log(warranties);
      Account.populate(warranties, {
        path: "_id",
        select: "contactName contactEmail name"
      })
        .then(warranties => {
          res.send(warranties);
        })
        .catch(err => {
          res.status(422).send(err);
          throw err;
        });
    })
    .catch(err => {
      res.status(422).send(err);
      throw err;
    });

结果如下:

[
  {
    _id: {
      _id: "5bc39dfa331c0e2cb897b61e",
      name: "Company Name 1",
      contactEmail: "email1@email.com",
      contactName: "Contact Name 1"
    },
    reason: "Warranties",
    total: 1152,
    lineItems: [
      {
        _id: "5beee0966d17bc42501f5086",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf43929e7179a56e21382bc",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf4392fe7179a56e21382bd",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      }
    ]
  },
  {
    _id: {
      _id: "5bc39dfa331c0e2cb897b61e",
      name: "Company Name 2",
      contactEmail: "email2@email.com",
      contactName: "Contact Name 2"
    },
    reason: "Warranties",
    total: 1152,
    lineItems: [
      {
        _id: "5beee0966d17bc42501f5086",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf43929e7179a56e21382bc",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf4392fe7179a56e21382bd",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      }
    ]
  }
]

如您所见,这确实很接近一些小问题。

  1. 它显示 _id 而不是 _accountId。我默认这样做是因为每当我尝试 return $group 中的 _accountId 时,它会将其标记为非累加器字段,而当我在 $project 中这样做时,它就不会显示。数据集必须按 Warranty 模型中的 _accountId 分组。
  2. 如果可能的话,我宁愿将附加的 (contactName, contactEmail, name) 字段添加到顶级对象,而不是创建子文档。这可能很简单,也可能是不可能的,因为我对 populate 不是很熟悉,但找不到任何东西来直接回答我的问题。

最后的目标是获取 returned 对象并使用对象数组将文档批量创建到另一个集合。

-- 对我的具体用例的回答--

Warranty.aggregate([
    {
      $match: {
        payStatus: "Invoiced Next Billing Cycle"
      }
    },
    {
      $group: {
        _id: "$_accountId",
        total: {
          $sum: "$warrantyFee"
        },
        lineItems: {
          $push: {
            _id: "$_id",
            jobsiteAddress: {
              $concat: [
                "$jobsiteAddressStreet",
                " ",
                "$jobsiteAddressCity",
                ", ",
                "$jobsiteAddressState",
                " ",
                "$jobsiteAddressZip"
              ]
            },
            warrantyFee: "$warrantyFee"
          }
        }
      }
    },
    {
      $lookup: {
        from: Account.collection.name,
        localField: "_id",
        foreignField: "_id",
        as: "accounts"
      }
    },
    {
      $unwind: "$accounts"
    },
    {
      $project: {
        lineItems: "$lineItems",
        reason: "Warranties",
        total: "$total",
        type: "Invoice",
        date: new Date(),
        company: "$accounts.name",
        contactName: "$accounts.contactName",
        contactEmail: "$accounts.contactEmail"
      }
    },
    {
      $addFields: {
        _accountId: "$_id"
      }
    },
    {
      $project: {
        _id: 0
      }
    }
  ])

这给了我结果:

[
  {
    lineItems: [
      {
        _id: "5be203eb3afd8098d4988152",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      }
    ],
    reason: "Warranties",
    total: 384,
    type: "Invoice",
    date: "2018-11-21T14:08:15.052Z",
    company: "Company Name 1",
    contactName: "Contact Name 1",
    contactEmail: "email1@email.com",
    _accountId: "5be203eb3afd8098d4988152",
    referenceNumber: 1542809296615
  },
  {
    lineItems: [
      {
        _id: "5beee0966d17bc42501f5086",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf43929e7179a56e21382bc",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      },
      {
        _id: "5bf4392fe7179a56e21382bd",
        jobsiteAddress: "1234 Street Southwest Sunnyville, Wyoming 12345",
        warrantyFee: 384
      }
    ],
    reason: "Warranties",
    total: 1152,
    type: "Invoice",
    date: "2018-11-21T14:08:15.052Z",
    company: "Company Name 2",
    contactName: "Contact Name 2",
    contactEmail: "email2@email.com",
    _accountId: "5bc39dfa331c0e2cb897b61e",
    referenceNumber: 1542809295680
  }
]
  1. 它显示 _id 而不是 _accountId,因为当您使用 $group 时, 结果按指定的 _accountId 分组,因此它变成了 文档的新 _id。
  2. 将 contactName、contactEmail 和 name 移动到顶层有两种可能的解决方案:
    • 有人在填充后用 javascript 处理它。为此,您可以使用函数 'map()'.
    • 另一个解决方案是在聚合管道中使用 $lookup 在同一个 mongoDB 查询中填充文档,并且在 $lookup 之后,您必须再次使用 $project 根据需要构建输出文档。

因此,当您询问 "populate" 聚合结果时,您实际上在这里遗漏了一些概念。通常这不是你实际做的,而是为了解释要点:

  1. aggregate() 的输出不同于 Model.find() 或类似的操作,因为这里的目的是 "reshape the results"。这基本上意味着您用作聚合源的模型不再被视为输出模型。如果您仍然在输出中保持完全相同的文档结构,这甚至是正确的,但在您的情况下,输出与源文档显然不同。

    无论如何,它不再是您从中采购的 Warranty 模型的一个实例,而只是一个普通对象。我们可以在稍后谈到时解决这个问题。

  2. 大概这里的要点是populate()无论如何有点"old hat"。这实际上只是在实施初期添加到 Mongoose 的便利功能。它真正做的是对单独集合中的相关数据执行"another query",然后将内存中的结果合并到原始集合输出。

    出于很多原因,在大多数情况下,这并不是很有效甚至不可取。与普遍的误解相反,这 NOT 实际上是 "join".

    对于真正的 "join",您实际上使用了 $lookup 聚合管道阶段,MongoDB 用于 return 来自另一个集合的匹配项目。与 populate() 不同,这实际上是在对服务器的单个请求和单个响应中完成的。这避免了网络开销,通常速度更快,因为 "real join" 允许您做 populate() 不能做的事情。

改为使用 $lookup

这里缺少的快速版本是,在结果是[=147=之后,而不是在.then()中尝试populate() ]ed,您要做的是将 $lookup 添加到管道中:

  { "$lookup": {
    "from": Account.collection.name,
    "localField": "_id",
    "foreignField": "_id",
    "as": "accounts"
  }},
  { "$unwind": "$accounts" },
  { "$project": {
    "_id": "$accounts",
    "total": 1,
    "lineItems": 1
  }}

注意这里有一个约束,就是$lookup的输出是总是一个数组。是否只有一个相关项或多个相关项作为输出获取并不重要。管道阶段将从当前显示的文档中查找 "localField" 的值,并使用它来匹配指定的 "foreignField" 中的值。在这种情况下,它是从聚合 $group 目标到外部集合的 _id_id

因为如上所述输出总是一个数组,对于这个实例最有效的方法是简单地添加一个$unwind阶段直接在$lookup之后。所有这些都是为了 return 为目标数组中的每个项目 return 创建一个新文档,在这种情况下,您希望它是一个。如果 _id 在外部集合中不匹配,则将删除没有匹配的结果。

请注意,这实际上是核心文档中 $lookup + $unwind Coalescence 中描述的优化模式。这里发生了一件特殊的事情,其中​​ $unwind 指令实际上以一种有效的方式合并到 $lookup 操作中。您可以在那里阅读更多相关信息。

使用填充

从上面的内容你应该能基本明白为什么populate()这里是错误的做法了。除了输出不再由 Warranty 模型对象组成的基本事实外,该模型实际上只知道 _accountId 属性 中描述的外部项目,而这些外部项目无论如何都不存在于输出中.

现在您可以实际定义一个模型,该模型可用于将输出对象显式转换为定义的输出类型。一个简短的演示将涉及将代码添加到您的应用程序中,例如:

// Special models

const outputSchema = new Schema({
  _id: { type: Schema.Types.ObjectId, ref: "Account" },
  total: Number,
  lineItems: [{ address: String }]
});

const Output = mongoose.model('Output', outputSchema, 'dontuseme');

然后可以使用这个新的 Output 模型来 "cast" 生成的普通 JavaScript 对象到 Mongoose 文档中,这样 Model.populate() 这样的方法就可以实际调用:

// excerpt
result2 = result2.map(r => new Output(r));   // Cast to Output Mongoose Documents

// Call populate on the list of documents
result2 = await Output.populate(result2, { path: '_id' })
log(result2);

由于 Output 定义了一个模式,它知道它文档的 _id 字段上的 "reference",因此 Model.populate() 知道它需要做什么return 项。

请注意,因为这实际上会生成另一个查询。即:

Mongoose: warranties.aggregate([ { '$match': { payStatus: 'Invoiced Next Billing Cycle' } }, { '$group': { _id: '$_accountId', total: { '$sum': '$warrantyFee' }, lineItems: { '$push': { _id: '$_id', address: { '$trim': { input: { '$reduce': { input: { '$objectToArray': '$address' }, initialValue: '', in: { '$concat': [ '$$value', ' ', [Object] ] } } }, chars: ' ' } } } } } } ], {})
Mongoose: accounts.find({ _id: { '$in': [ ObjectId("5bf4b591a06509544b8cf75c"), ObjectId("5bf4b591a06509544b8cf75b") ] } }, { projection: {} })

第一行是聚合输出,然后您再次联系服务器以 [​​=147=] 相关的 Account 模型条目。

总结

所以这些是你的选择,但应该很清楚现代的方法是使用 $lookup 并获得 真实的 "join" 这不是 populate() 实际上在做什么。

包括一个列表,作为对这些方法中的每一个在实践中如何实际工作的完整演示。这里取得了一些艺术许可,因此所代表的模型可能与您所拥有的完全不一样,但足以证明可重现的基本概念:

const { Schema } = mongoose = require('mongoose');

const uri = 'mongodb://localhost:27017/joindemo';
const opts = { useNewUrlParser: true };

// Sensible defaults
mongoose.Promise = global.Promise;
mongoose.set('debug', true);
mongoose.set('useFindAndModify', false);
mongoose.set('useCreateIndex', true);

// Schema defs

const warrantySchema = new Schema({
  address: {
    street: String,
    city: String,
    state: String,
    zip: Number
  },
  warrantyFee: Number,
  _accountId: { type: Schema.Types.ObjectId, ref: "Account" },
  payStatus: String
});

const accountSchema = new Schema({
  name: String,
  contactName: String,
  contactEmail: String
});

// Special models


const outputSchema = new Schema({
  _id: { type: Schema.Types.ObjectId, ref: "Account" },
  total: Number,
  lineItems: [{ address: String }]
});

const Output = mongoose.model('Output', outputSchema, 'dontuseme');

const Warranty = mongoose.model('Warranty', warrantySchema);
const Account = mongoose.model('Account', accountSchema);


// log helper
const log = data => console.log(JSON.stringify(data, undefined, 2));

// main
(async function() {

  try {

    const conn = await mongoose.connect(uri, opts);

    // clean models
    await Promise.all(
      Object.entries(conn.models).map(([k,m]) => m.deleteMany())
    )

    // set up data
    let [first, second, third] = await Account.insertMany(
      [
        ['First Account', 'First Person', 'first@example.com'],
        ['Second Account', 'Second Person', 'second@example.com'],
        ['Third Account', 'Third Person', 'third@example.com']
      ].map(([name, contactName, contactEmail]) =>
        ({ name, contactName, contactEmail })
      )
    );

    await Warranty.insertMany(
      [
        {
          address: {
            street: '1 Some street',
            city: 'Somewhere',
            state: 'TX',
            zip: 1234
          },
          warrantyFee: 100,
          _accountId: first,
          payStatus: 'Invoiced Next Billing Cycle'
        },
        {
          address: {
            street: '2 Other street',
            city: 'Elsewhere',
            state: 'CA',
            zip: 5678
          },
          warrantyFee: 100,
          _accountId: first,
          payStatus: 'Invoiced Next Billing Cycle'
        },
        {
          address: {
            street: '3 Other street',
            city: 'Elsewhere',
            state: 'NY',
            zip: 1928
          },
          warrantyFee: 100,
          _accountId: first,
          payStatus: 'Invoiced Already'
        },
        {
          address: {
            street: '21 Jump street',
            city: 'Anywhere',
            state: 'NY',
            zip: 5432
          },
          warrantyFee: 100,
          _accountId: second,
          payStatus: 'Invoiced Next Billing Cycle'
        }
      ]
    );

    // Aggregate $lookup
    let result1 = await Warranty.aggregate([
      { "$match": {
        "payStatus": "Invoiced Next Billing Cycle"
      }},
      { "$group": {
        "_id": "$_accountId",
        "total": { "$sum": "$warrantyFee" },
        "lineItems": {
          "$push": {
            "_id": "$_id",
            "address": {
              "$trim": {
                "input": {
                  "$reduce": {
                    "input": { "$objectToArray": "$address" },
                    "initialValue": "",
                    "in": {
                      "$concat": [ "$$value", " ", { "$toString": "$$this.v" } ] }
                  }
                },
                "chars": " "
              }
            }
          }
        }
      }},
      { "$lookup": {
        "from": Account.collection.name,
        "localField": "_id",
        "foreignField": "_id",
        "as": "accounts"
      }},
      { "$unwind": "$accounts" },
      { "$project": {
        "_id": "$accounts",
        "total": 1,
        "lineItems": 1
      }}
    ])

    log(result1);

    // Convert and populate
    let result2 = await Warranty.aggregate([
      { "$match": {
        "payStatus": "Invoiced Next Billing Cycle"
      }},
      { "$group": {
        "_id": "$_accountId",
        "total": { "$sum": "$warrantyFee" },
        "lineItems": {
          "$push": {
            "_id": "$_id",
            "address": {
              "$trim": {
                "input": {
                  "$reduce": {
                    "input": { "$objectToArray": "$address" },
                    "initialValue": "",
                    "in": {
                      "$concat": [ "$$value", " ", { "$toString": "$$this.v" } ] }
                  }
                },
                "chars": " "
              }
            }
          }
        }
      }}
    ]);

    result2 = result2.map(r => new Output(r));

    result2 = await Output.populate(result2, { path: '_id' })
    log(result2);

  } catch(e) {
    console.error(e)
  } finally {
    process.exit()
  }

})()

完整输出:

Mongoose: dontuseme.deleteMany({}, {})
Mongoose: warranties.deleteMany({}, {})
Mongoose: accounts.deleteMany({}, {})
Mongoose: accounts.insertMany([ { _id: 5bf4b591a06509544b8cf75b, name: 'First Account', contactName: 'First Person', contactEmail: 'first@example.com', __v: 0 }, { _id: 5bf4b591a06509544b8cf75c, name: 'Second Account', contactName: 'Second Person', contactEmail: 'second@example.com', __v: 0 }, { _id: 5bf4b591a06509544b8cf75d, name: 'Third Account', contactName: 'Third Person', contactEmail: 'third@example.com', __v: 0 } ], {})
Mongoose: warranties.insertMany([ { _id: 5bf4b591a06509544b8cf75e, address: { street: '1 Some street', city: 'Somewhere', state: 'TX', zip: 1234 }, warrantyFee: 100, _accountId: 5bf4b591a06509544b8cf75b, payStatus: 'Invoiced Next Billing Cycle', __v: 0 }, { _id: 5bf4b591a06509544b8cf75f, address: { street: '2 Other street', city: 'Elsewhere', state: 'CA', zip: 5678 }, warrantyFee: 100, _accountId: 5bf4b591a06509544b8cf75b, payStatus: 'Invoiced Next Billing Cycle', __v: 0 }, { _id: 5bf4b591a06509544b8cf760, address: { street: '3 Other street', city: 'Elsewhere', state: 'NY', zip: 1928 }, warrantyFee: 100, _accountId: 5bf4b591a06509544b8cf75b, payStatus: 'Invoiced Already', __v: 0 }, { _id: 5bf4b591a06509544b8cf761, address: { street: '21 Jump street', city: 'Anywhere', state: 'NY', zip: 5432 }, warrantyFee: 100, _accountId: 5bf4b591a06509544b8cf75c, payStatus: 'Invoiced Next Billing Cycle', __v: 0 } ], {})
Mongoose: warranties.aggregate([ { '$match': { payStatus: 'Invoiced Next Billing Cycle' } }, { '$group': { _id: '$_accountId', total: { '$sum': '$warrantyFee' }, lineItems: { '$push': { _id: '$_id', address: { '$trim': { input: { '$reduce': { input: { '$objectToArray': '$address' }, initialValue: '', in: { '$concat': [ '$$value', ' ', [Object] ] } } }, chars: ' ' } } } } } }, { '$lookup': { from: 'accounts', localField: '_id', foreignField: '_id', as: 'accounts' } }, { '$unwind': '$accounts' }, { '$project': { _id: '$accounts', total: 1, lineItems: 1 } } ], {})
[
  {
    "total": 100,
    "lineItems": [
      {
        "_id": "5bf4b591a06509544b8cf761",
        "address": "21 Jump street Anywhere NY 5432"
      }
    ],
    "_id": {
      "_id": "5bf4b591a06509544b8cf75c",
      "name": "Second Account",
      "contactName": "Second Person",
      "contactEmail": "second@example.com",
      "__v": 0
    }
  },
  {
    "total": 200,
    "lineItems": [
      {
        "_id": "5bf4b591a06509544b8cf75e",
        "address": "1 Some street Somewhere TX 1234"
      },
      {
        "_id": "5bf4b591a06509544b8cf75f",
        "address": "2 Other street Elsewhere CA 5678"
      }
    ],
    "_id": {
      "_id": "5bf4b591a06509544b8cf75b",
      "name": "First Account",
      "contactName": "First Person",
      "contactEmail": "first@example.com",
      "__v": 0
    }
  }
]
Mongoose: warranties.aggregate([ { '$match': { payStatus: 'Invoiced Next Billing Cycle' } }, { '$group': { _id: '$_accountId', total: { '$sum': '$warrantyFee' }, lineItems: { '$push': { _id: '$_id', address: { '$trim': { input: { '$reduce': { input: { '$objectToArray': '$address' }, initialValue: '', in: { '$concat': [ '$$value', ' ', [Object] ] } } }, chars: ' ' } } } } } } ], {})
Mongoose: accounts.find({ _id: { '$in': [ ObjectId("5bf4b591a06509544b8cf75c"), ObjectId("5bf4b591a06509544b8cf75b") ] } }, { projection: {} })
[
  {
    "_id": {
      "_id": "5bf4b591a06509544b8cf75c",
      "name": "Second Account",
      "contactName": "Second Person",
      "contactEmail": "second@example.com",
      "__v": 0
    },
    "total": 100,
    "lineItems": [
      {
        "_id": "5bf4b591a06509544b8cf761",
        "address": "21 Jump street Anywhere NY 5432"
      }
    ]
  },
  {
    "_id": {
      "_id": "5bf4b591a06509544b8cf75b",
      "name": "First Account",
      "contactName": "First Person",
      "contactEmail": "first@example.com",
      "__v": 0
    },
    "total": 200,
    "lineItems": [
      {
        "_id": "5bf4b591a06509544b8cf75e",
        "address": "1 Some street Somewhere TX 1234"
      },
      {
        "_id": "5bf4b591a06509544b8cf75f",
        "address": "2 Other street Elsewhere CA 5678"
      }
    ]
  }
]