计算字段在 collection 中出现的次数,同时保持所有数据不重复
Count how many times a field shows up in a collection while keeping all data with no duplicates
假设我有这样一个 collection:
{
_id : 544e97123c9ef694fc68e21b,
title: "First Title",
notebook: {
title: "Misc",
slug: "misc"
}
}
{
_id: 54ab035a849788d0921d8eb2,
title: "Second Title",
notebook: {
title: "Personal",
slug: "personal"
}
}
{
_id: 544e97123c9ef694fc68e21b,
title: "Third Title",
notebook: {
title: "Misc",
slug: "misc"
}
}
在我看来,我希望能够显示笔记本标题与 link 一起用于特定笔记本的 slug 的次数,没有特定的顺序。例如:
<a href="/notebooks/misc">Misc(2)</a>
<a href="/notebooks/personal">Personal(1)</a>
我通过遍历每个文档实现了这一点,但问题是它有重复项,因为它正在遍历每个文档。所以在我看来它看起来像这样:
<a href="/notebooks/misc">Misc(2)</a>
<a href="/notebooks/personal">Personal(1)</a>
<a href="/notebooks/misc">Misc(2)</a>
如何抓取notebook.title、notebook.slug,数一数,不重复?
这是我目前执行此操作的怪异方式(这会导致重复):
function countNotebooks(notes) {
var table = Object.create(null);
for (var i = 0; i < notes.length; i++) {
if (typeof table[notes[i].notebook.slug] === 'undefined') {
table[notes[i].notebook.slug] = 1;
} else {
table[notes[i].notebook.slug] += 1;
}
}
return table;
}
app.get('/notebooks', function(req, res) {
Note.find(function(err, notes) {
if (err) {
throw err;
}
res.render('notebooks/index.html', {
title: 'All Notebooks',
jumbotron: 'Notebooks',
notes: notes,
notesTable: countNotebooks(notes)
});
});
});
notebooks/index.html:
{% for note in notes %}
<article class="note">
<h3 class="note-title">
<a href="/notebooks/{{ note.notebook.slug }}">{{ note.notebook.title }}</a> <span class="count">({{ notesTable[note.notebook.slug] }})</span>
</h3>
</article>
{% endfor %}
您可以通过基于公用密钥对数据进行实质性分组来实现此目的。 MongoDB 的 aggregation framework 就是为这种聚合和操作而设计的。
首先,我可以更正你的数据样本吗,因为你有一个重复的 _id
值,这是不允许的。
{
"_id" : ObjectId("544e97123c9ef694fc68e21b"),
"title" : "First Title",
"notebook" : {
"title" : "Misc",
"slug" : "misc"
}
},
{
"_id" : ObjectId("54ab035a849788d0921d8eb2"),
"title" : "Second Title",
"notebook" : {
"title" : "Personal",
"slug" : "personal"
}
},
{
"_id" : ObjectId("54ac074fa8a621d3fd49ac91"),
"title" : "Third Title",
"notebook" : {
"title" : "Misc",
"slug" : "misc"
}
}
要为数据中出现的 "slug" 添加 "count" 值,您可以形成这样的管道:
Note.aggregate([
// Group on the slug values and put other fields in an array
{ "$group": {
"_id": "$notebook.slug",
"count": { "$sum": 1 },
"docs": {
"$push": {
"_id": "$_id",
"title": "$title",
"notebook": "$notebook"
}
}
}},
// Unwind the created array elements
{ "$unwind": "$docs" },
// Re-structure back to original form
{ "$project": {
"_id": "$docs._id",
"title": "$docs.title",
"count": "$count",
"notebook": "$docs.notebook"
}},
// Sort in original order (or as desired)
{ "$sort": { "_id": 1 } }
],function(err,result) {
});
这会给你带来这样的结果:
{
"_id" : ObjectId("544e97123c9ef694fc68e21b"),
"count" : 2,
"title" : "First Title",
"notebook" : {
"title" : "Misc",
"slug" : "misc"
}
},
{
"_id" : ObjectId("54ab035a849788d0921d8eb2"),
"count" : 1,
"title" : "Second Title",
"notebook" : {
"title" : "Personal",
"slug" : "personal"
}
},
{
"_id" : ObjectId("54ac074fa8a621d3fd49ac91"),
"count" : 2,
"title" : "Third Title",
"notebook" : {
"title" : "Misc",
"slug" : "misc"
}
}
那是如果你想要 "retain the documents",但如果你只想要像 "facet count" 这样的唯一 "slugs",那么只需使用第一个 $group
与 $first
在笔记本标题上而不是 $push
与其他内容:
Note.aggregate([
// Group on the slug values and put other fields in an array
{ "$group": {
"_id": "$notebook.slug",
"count": { "$sum": 1 },
"title": { "$first": "$notebook.title" }
}},
],function(err,result) {
});
它应该是不言自明的,但只是总结一下。初始$group
is done using the value of "slug" to count the occurrences using the $sum
operator. In order to keep the rest of the document data it is placed in an array under the "slug" using $push
.
数组分组后 de-normalized 使用 $unwind
生成文档,然后使用 $project
. The final $sort
operation provides the original order or whatever you want, since the ordering was changed during the $group
re-structured 返回原始格式]管道阶段。
这不仅可以获得结果,而且还允许您使用 $limit
and $skip
运算符对数据进行 "page" 计算,甚至可以根据这些计数值对数据进行排序如果需要的话。
查看完整的 aggregation pipeline operator reference 以获得完整的描述和其他可以在这里完成的事情。
您需要:
Group
通过 notebook.slug
和 notebook.title
字段,到
消除重复项。
- 由于显示顺序并不重要,您可以跳过此处的
sort
阶段。
Project
他们
代码:
Model.aggregate([
{$group:{"_id":{"title":"$notebook.title",
"slug":"$notebook.slug"},
"count":{$sum:1}}},
{$project:{"title":"$_id.title",
"slug":"$_id.slug",
"count":1,"_id":0}}
],function(err,data){
// handle response and store it in the `notes` variable to be
// used to display.
})
o/p:
{ "count" : 1, "title" : "Personal", "slug" : "personal" }
{ "count" : 2, "title" : "Misc", "slug" : "misc" }
并显示为,
{% for note in notes %}
<article class="note">
<h3 class="note-title">
<a href="/notebooks/{{ note.slug }}">{{ note.title }}</a>
<span class="count">({{note.count}})</span>
</h3>
</article>
{% endfor %}
假设我有这样一个 collection:
{
_id : 544e97123c9ef694fc68e21b,
title: "First Title",
notebook: {
title: "Misc",
slug: "misc"
}
}
{
_id: 54ab035a849788d0921d8eb2,
title: "Second Title",
notebook: {
title: "Personal",
slug: "personal"
}
}
{
_id: 544e97123c9ef694fc68e21b,
title: "Third Title",
notebook: {
title: "Misc",
slug: "misc"
}
}
在我看来,我希望能够显示笔记本标题与 link 一起用于特定笔记本的 slug 的次数,没有特定的顺序。例如:
<a href="/notebooks/misc">Misc(2)</a>
<a href="/notebooks/personal">Personal(1)</a>
我通过遍历每个文档实现了这一点,但问题是它有重复项,因为它正在遍历每个文档。所以在我看来它看起来像这样:
<a href="/notebooks/misc">Misc(2)</a>
<a href="/notebooks/personal">Personal(1)</a>
<a href="/notebooks/misc">Misc(2)</a>
如何抓取notebook.title、notebook.slug,数一数,不重复?
这是我目前执行此操作的怪异方式(这会导致重复):
function countNotebooks(notes) {
var table = Object.create(null);
for (var i = 0; i < notes.length; i++) {
if (typeof table[notes[i].notebook.slug] === 'undefined') {
table[notes[i].notebook.slug] = 1;
} else {
table[notes[i].notebook.slug] += 1;
}
}
return table;
}
app.get('/notebooks', function(req, res) {
Note.find(function(err, notes) {
if (err) {
throw err;
}
res.render('notebooks/index.html', {
title: 'All Notebooks',
jumbotron: 'Notebooks',
notes: notes,
notesTable: countNotebooks(notes)
});
});
});
notebooks/index.html:
{% for note in notes %}
<article class="note">
<h3 class="note-title">
<a href="/notebooks/{{ note.notebook.slug }}">{{ note.notebook.title }}</a> <span class="count">({{ notesTable[note.notebook.slug] }})</span>
</h3>
</article>
{% endfor %}
您可以通过基于公用密钥对数据进行实质性分组来实现此目的。 MongoDB 的 aggregation framework 就是为这种聚合和操作而设计的。
首先,我可以更正你的数据样本吗,因为你有一个重复的 _id
值,这是不允许的。
{
"_id" : ObjectId("544e97123c9ef694fc68e21b"),
"title" : "First Title",
"notebook" : {
"title" : "Misc",
"slug" : "misc"
}
},
{
"_id" : ObjectId("54ab035a849788d0921d8eb2"),
"title" : "Second Title",
"notebook" : {
"title" : "Personal",
"slug" : "personal"
}
},
{
"_id" : ObjectId("54ac074fa8a621d3fd49ac91"),
"title" : "Third Title",
"notebook" : {
"title" : "Misc",
"slug" : "misc"
}
}
要为数据中出现的 "slug" 添加 "count" 值,您可以形成这样的管道:
Note.aggregate([
// Group on the slug values and put other fields in an array
{ "$group": {
"_id": "$notebook.slug",
"count": { "$sum": 1 },
"docs": {
"$push": {
"_id": "$_id",
"title": "$title",
"notebook": "$notebook"
}
}
}},
// Unwind the created array elements
{ "$unwind": "$docs" },
// Re-structure back to original form
{ "$project": {
"_id": "$docs._id",
"title": "$docs.title",
"count": "$count",
"notebook": "$docs.notebook"
}},
// Sort in original order (or as desired)
{ "$sort": { "_id": 1 } }
],function(err,result) {
});
这会给你带来这样的结果:
{
"_id" : ObjectId("544e97123c9ef694fc68e21b"),
"count" : 2,
"title" : "First Title",
"notebook" : {
"title" : "Misc",
"slug" : "misc"
}
},
{
"_id" : ObjectId("54ab035a849788d0921d8eb2"),
"count" : 1,
"title" : "Second Title",
"notebook" : {
"title" : "Personal",
"slug" : "personal"
}
},
{
"_id" : ObjectId("54ac074fa8a621d3fd49ac91"),
"count" : 2,
"title" : "Third Title",
"notebook" : {
"title" : "Misc",
"slug" : "misc"
}
}
那是如果你想要 "retain the documents",但如果你只想要像 "facet count" 这样的唯一 "slugs",那么只需使用第一个 $group
与 $first
在笔记本标题上而不是 $push
与其他内容:
Note.aggregate([
// Group on the slug values and put other fields in an array
{ "$group": {
"_id": "$notebook.slug",
"count": { "$sum": 1 },
"title": { "$first": "$notebook.title" }
}},
],function(err,result) {
});
它应该是不言自明的,但只是总结一下。初始$group
is done using the value of "slug" to count the occurrences using the $sum
operator. In order to keep the rest of the document data it is placed in an array under the "slug" using $push
.
数组分组后 de-normalized 使用 $unwind
生成文档,然后使用 $project
. The final $sort
operation provides the original order or whatever you want, since the ordering was changed during the $group
re-structured 返回原始格式]管道阶段。
这不仅可以获得结果,而且还允许您使用 $limit
and $skip
运算符对数据进行 "page" 计算,甚至可以根据这些计数值对数据进行排序如果需要的话。
查看完整的 aggregation pipeline operator reference 以获得完整的描述和其他可以在这里完成的事情。
您需要:
Group
通过notebook.slug
和notebook.title
字段,到 消除重复项。- 由于显示顺序并不重要,您可以跳过此处的
sort
阶段。 Project
他们
代码:
Model.aggregate([
{$group:{"_id":{"title":"$notebook.title",
"slug":"$notebook.slug"},
"count":{$sum:1}}},
{$project:{"title":"$_id.title",
"slug":"$_id.slug",
"count":1,"_id":0}}
],function(err,data){
// handle response and store it in the `notes` variable to be
// used to display.
})
o/p:
{ "count" : 1, "title" : "Personal", "slug" : "personal" }
{ "count" : 2, "title" : "Misc", "slug" : "misc" }
并显示为,
{% for note in notes %}
<article class="note">
<h3 class="note-title">
<a href="/notebooks/{{ note.slug }}">{{ note.title }}</a>
<span class="count">({{note.count}})</span>
</h3>
</article>
{% endfor %}