查询 MongoDB 中用户的评论
queries on users' comments in MongoDB
我是 MongoDB 的新手,我正在尝试为网站评论编写一些查询。
我有一个 collection 这样的:
{
"_id": "post1"
"comments" : [
{
"user" : "goodman",
"text" : "hello",
"replies" : [
{
"user" : "littleboy",
"text" : "hi"
}
]
},
{
"user" : "richpapa",
"text" : "haha"
},
{
"user" : "goodman",
"text" : "how is it going?!"
}
]
}
{
"_id": "post2"
"comments" : [
{
"user" : "katze",
"text" : "meow!",
"replies" : [
{
"user" : "kitten",
"text" : "miaw miaw!"
},
{
"user" : "littleboy",
"text" : "mioooooo"
},
{
"user" : "goodman",
"text" : "meme"
}
]
},
{
"user" : "kitten",
"text" : "ich bin katze",
"replies" : [
{
"user" : "sally",
"text" : "ja, du bist katze maw"
}
]
}
]
}
每个post可以有多个评论,每个评论也可以有多个回复。
以下是我未能做到的:
首先,我想打印回复自己评论的用户的姓名和评论。
其次,我想打印在网站上发送了一条以上消息的用户的姓名和数量(评论和回复都应该考虑)。
这是我对第一部分的尝试:
db.website.aggregate([{$group : {_id : "$comments.user",
repliers: {$addToSet : "$comments.replies.user"}}}])
如果我没记错的话,这会将发送评论的用户的姓名以及回复者的姓名作为数组给出。但是我无法用它来获得我想要的东西。
第一部分:
db.collection.aggregate([
{
$unwind: "$comments"
},
{
$project: {
_id: 0,
items: {
$filter: {
input: "$comments.replies",
as: "item",
cond: {
$eq: [
"$$item.user",
"$comments.user"
]
}
}
}
}
},
{
$unwind: "$items"
},
{
$project: {
"self_repling_user": "$items.user",
"self_repling_comment": "$items.text"
}
}
])
解释:
- 将评论数组转换为对象。
- 仅过滤 comment.user 是回复用户的回复。
- 展开带有自己回复的评论的数组
- 只显示自我回复的用户和他的评论
转至第二部分:
db.collection.aggregate([
{
$unwind: "$comments"
},
{
$unwind: "$comments.replies"
},
{
$group: {
_id: "allusers",
"user2": {
$push: "$comments.user"
},
user1: {
$push: "$comments.replies.user"
}
}
},
{
$project: {
users: {
$concatArrays: [
"$user1",
"$user2"
]
}
}
},
{
$unwind: "$users"
},
{
$group: {
_id: "$users",
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
}
])
解释:
- 展开评论和comments.recplies数组
- 仅与用户组成两个数组
- 将两个数组合并为一个数组
- 展开总用户数组
- 按用户分组以统计评论或回复的数量
- 只过滤评论或回复超过一次的评论
“comments-on-comments”方法的另一种方法是展平结构,将每个条目视为一个“post”,然后使用 $graphLookup
将它们组合在一起。原来的post可以根据需要在文档中携带额外的信息,后面的注释可以携带的更少,但基本结构是一样的。考虑以下条目;选项卡用于在视觉上帮助使层次结构更加明显,但请注意 _id
和 ref
:
之间的关系
var r = [
// target
{"_id": "P1", from:"OP", text: "orig", ts:new ISODate("2020-01-01")}
// level 0
,{"_id": "P2", from:"A", ref:"P1", text: "not bad", ts:new ISODate("2020-01-01")}
,{"_id": "P3", from:"B", ref:"P1", text: "corn", ts:new ISODate("2020-01-01")}
// level 1
,{"_id": "P4", from:"C", ref:"P3", text: "maybe", ts:new ISODate("2020-01-02")}
// level 2
,{"_id": "P41", from:"D", ref:"P4", text: "hoo", ts:new ISODate("2020-01-08")}
// level 3
,{"_id": "P61", from:"A", ref:"P41", text: "concerned", ts:new ISODate("2020-01-02")}
,{"_id": "P42", from:"E", ref:"P3", text: "hello", ts:new ISODate("2020-01-02")}
,{"_id": "P5", from:"OP2", text: "orig2", ts:new ISODate("2020-01-01")}
,{"_id": "P6", from:"C", ref:"P5", text: "another thing...", ts:new ISODate("2020-01-06")}
];
如果我们要查找所有以P1
开头的对话,我们可以这样做:
db.foo.aggregate([
{$match: {"_id": "P1"}},
{$graphLookup: {
from: "foo",
connectToField: "ref",
connectFromField: "_id",
startWith: "$_id", // usually value of connectFromField
depthField: "n",
as: "zz"
}}
]);
产生这个:
{
"_id" : "P1",
"from" : "OP",
"text" : "orig",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"zz" : [
{
"_id" : "P42",
"from" : "E",
"ref" : "P3",
"text" : "hello",
"ts" : ISODate("2020-01-02T00:00:00Z"),
"n" : NumberLong(1)
},
{
"_id" : "P4",
"from" : "C",
"ref" : "P3",
"text" : "maybe",
"ts" : ISODate("2020-01-02T00:00:00Z"),
"n" : NumberLong(1)
},
{
"_id" : "P2",
"from" : "A",
"ref" : "P1",
"text" : "not bad",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"n" : NumberLong(0)
},
{
"_id" : "P41",
"from" : "D",
"ref" : "P4",
"text" : "hoo",
"ts" : ISODate("2020-01-08T00:00:00Z"),
"n" : NumberLong(2)
},
{
"_id" : "P61",
"from" : "A",
"ref" : "P41",
"text" : "concerned",
"ts" : ISODate("2020-01-02T00:00:00Z"),
"n" : NumberLong(3)
},
{
"_id" : "P3",
"from" : "B",
"ref" : "P1",
"text" : "corn",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"n" : NumberLong(0)
}
]
}
这种设计有一些有趣的优点:
- 无需担心包含的递归。对于 comment-reply 对话的任何实际深度,查询都是相同的并且性能非常好。
- 跨多个 post 线程对其他内容(如日期、参与者、likes/votes 等)进行二次查找 容易得多,因为结构是扁平的。例如,在
from
上放置一个索引,您可以快速获得来自特定演员的所有输入。查找层次结构中的最新评论或自您上次查看以来的最新评论(当然是在某处说明)变得微不足道。扫描不当语言同样微不足道,因为每个文档的相同位置都有一个 text
字段。
- 因为
$graphLookup
提供了 depthField
(虽然承认不是按数字顺序...)trim 响应树变得容易。例如,要仅获得 2 个第一级(0 级)回复,您可以在 $graphLookup
: 之后添加此阶段
,{$addFields: {zz: {$slice: [ {$filter: {
input: "$zz",
cond: {$eq:[0,"$$this.n"]}
}}, 0, 2] }
}}
请注意层次结构和日期是独立的。要在线程中找到对评论的最新回复,我们可以使用以下阶段。请注意 2020-01-08
是文档 P41
:
上的最新条目
,{$addFields: {zz: {$reduce: {
input: "$zz",
initialValue: {ts:new ISODate("2000-01-01")}, // init to a REALLY old date
in: {$cond: [
{$gt:["$$this.ts","$$value.ts"]}, // if higher than prev
"$$this", // then use this one
"$$value" // else keep highest seen so far
]}
}}
}}
{
"_id" : "P1",
"from" : "OP",
"text" : "orig",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"zz" : {
"_id" : "P41",
"from" : "D",
"ref" : "P4",
"text" : "hoo",
"ts" : ISODate("2020-01-08T00:00:00Z"),
"n" : NumberLong(2)
}
}
最后,有时您会希望处理初始目标文档以及 $graphLookup
文档链。为此,只需 $project
top-level 文档信息与 $graphLookup
:
生成的数组的串联
,{$project: {zz: {$concatArrays: [
// Make array of 1. Call n -1 to identify it as the
// initial target doc:
[{_id: "$_id",n:-1,from:"$from",text:"$text",ts:"$ts"}], // make array of 1
"$zz"
]}
}}
{
"_id" : "P1",
"zz" : [
{
"_id" : "P1",
"n" : -1,
"from" : "OP",
"text" : "orig",
"ts" : ISODate("2020-01-01T00:00:00Z")
},
{
"_id" : "P2",
"from" : "A",
"ref" : "P1",
"text" : "not bad",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"n" : NumberLong(0)
},
....
我是 MongoDB 的新手,我正在尝试为网站评论编写一些查询。
我有一个 collection 这样的:
{
"_id": "post1"
"comments" : [
{
"user" : "goodman",
"text" : "hello",
"replies" : [
{
"user" : "littleboy",
"text" : "hi"
}
]
},
{
"user" : "richpapa",
"text" : "haha"
},
{
"user" : "goodman",
"text" : "how is it going?!"
}
]
}
{
"_id": "post2"
"comments" : [
{
"user" : "katze",
"text" : "meow!",
"replies" : [
{
"user" : "kitten",
"text" : "miaw miaw!"
},
{
"user" : "littleboy",
"text" : "mioooooo"
},
{
"user" : "goodman",
"text" : "meme"
}
]
},
{
"user" : "kitten",
"text" : "ich bin katze",
"replies" : [
{
"user" : "sally",
"text" : "ja, du bist katze maw"
}
]
}
]
}
每个post可以有多个评论,每个评论也可以有多个回复。 以下是我未能做到的:
首先,我想打印回复自己评论的用户的姓名和评论。 其次,我想打印在网站上发送了一条以上消息的用户的姓名和数量(评论和回复都应该考虑)。
这是我对第一部分的尝试:
db.website.aggregate([{$group : {_id : "$comments.user",
repliers: {$addToSet : "$comments.replies.user"}}}])
如果我没记错的话,这会将发送评论的用户的姓名以及回复者的姓名作为数组给出。但是我无法用它来获得我想要的东西。
第一部分:
db.collection.aggregate([
{
$unwind: "$comments"
},
{
$project: {
_id: 0,
items: {
$filter: {
input: "$comments.replies",
as: "item",
cond: {
$eq: [
"$$item.user",
"$comments.user"
]
}
}
}
}
},
{
$unwind: "$items"
},
{
$project: {
"self_repling_user": "$items.user",
"self_repling_comment": "$items.text"
}
}
])
解释:
- 将评论数组转换为对象。
- 仅过滤 comment.user 是回复用户的回复。
- 展开带有自己回复的评论的数组
- 只显示自我回复的用户和他的评论
转至第二部分:
db.collection.aggregate([
{
$unwind: "$comments"
},
{
$unwind: "$comments.replies"
},
{
$group: {
_id: "allusers",
"user2": {
$push: "$comments.user"
},
user1: {
$push: "$comments.replies.user"
}
}
},
{
$project: {
users: {
$concatArrays: [
"$user1",
"$user2"
]
}
}
},
{
$unwind: "$users"
},
{
$group: {
_id: "$users",
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
}
])
解释:
- 展开评论和comments.recplies数组
- 仅与用户组成两个数组
- 将两个数组合并为一个数组
- 展开总用户数组
- 按用户分组以统计评论或回复的数量
- 只过滤评论或回复超过一次的评论
“comments-on-comments”方法的另一种方法是展平结构,将每个条目视为一个“post”,然后使用 $graphLookup
将它们组合在一起。原来的post可以根据需要在文档中携带额外的信息,后面的注释可以携带的更少,但基本结构是一样的。考虑以下条目;选项卡用于在视觉上帮助使层次结构更加明显,但请注意 _id
和 ref
:
var r = [
// target
{"_id": "P1", from:"OP", text: "orig", ts:new ISODate("2020-01-01")}
// level 0
,{"_id": "P2", from:"A", ref:"P1", text: "not bad", ts:new ISODate("2020-01-01")}
,{"_id": "P3", from:"B", ref:"P1", text: "corn", ts:new ISODate("2020-01-01")}
// level 1
,{"_id": "P4", from:"C", ref:"P3", text: "maybe", ts:new ISODate("2020-01-02")}
// level 2
,{"_id": "P41", from:"D", ref:"P4", text: "hoo", ts:new ISODate("2020-01-08")}
// level 3
,{"_id": "P61", from:"A", ref:"P41", text: "concerned", ts:new ISODate("2020-01-02")}
,{"_id": "P42", from:"E", ref:"P3", text: "hello", ts:new ISODate("2020-01-02")}
,{"_id": "P5", from:"OP2", text: "orig2", ts:new ISODate("2020-01-01")}
,{"_id": "P6", from:"C", ref:"P5", text: "another thing...", ts:new ISODate("2020-01-06")}
];
如果我们要查找所有以P1
开头的对话,我们可以这样做:
db.foo.aggregate([
{$match: {"_id": "P1"}},
{$graphLookup: {
from: "foo",
connectToField: "ref",
connectFromField: "_id",
startWith: "$_id", // usually value of connectFromField
depthField: "n",
as: "zz"
}}
]);
产生这个:
{
"_id" : "P1",
"from" : "OP",
"text" : "orig",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"zz" : [
{
"_id" : "P42",
"from" : "E",
"ref" : "P3",
"text" : "hello",
"ts" : ISODate("2020-01-02T00:00:00Z"),
"n" : NumberLong(1)
},
{
"_id" : "P4",
"from" : "C",
"ref" : "P3",
"text" : "maybe",
"ts" : ISODate("2020-01-02T00:00:00Z"),
"n" : NumberLong(1)
},
{
"_id" : "P2",
"from" : "A",
"ref" : "P1",
"text" : "not bad",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"n" : NumberLong(0)
},
{
"_id" : "P41",
"from" : "D",
"ref" : "P4",
"text" : "hoo",
"ts" : ISODate("2020-01-08T00:00:00Z"),
"n" : NumberLong(2)
},
{
"_id" : "P61",
"from" : "A",
"ref" : "P41",
"text" : "concerned",
"ts" : ISODate("2020-01-02T00:00:00Z"),
"n" : NumberLong(3)
},
{
"_id" : "P3",
"from" : "B",
"ref" : "P1",
"text" : "corn",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"n" : NumberLong(0)
}
]
}
这种设计有一些有趣的优点:
- 无需担心包含的递归。对于 comment-reply 对话的任何实际深度,查询都是相同的并且性能非常好。
- 跨多个 post 线程对其他内容(如日期、参与者、likes/votes 等)进行二次查找 容易得多,因为结构是扁平的。例如,在
from
上放置一个索引,您可以快速获得来自特定演员的所有输入。查找层次结构中的最新评论或自您上次查看以来的最新评论(当然是在某处说明)变得微不足道。扫描不当语言同样微不足道,因为每个文档的相同位置都有一个text
字段。 - 因为
$graphLookup
提供了depthField
(虽然承认不是按数字顺序...)trim 响应树变得容易。例如,要仅获得 2 个第一级(0 级)回复,您可以在$graphLookup
: 之后添加此阶段
,{$addFields: {zz: {$slice: [ {$filter: {
input: "$zz",
cond: {$eq:[0,"$$this.n"]}
}}, 0, 2] }
}}
请注意层次结构和日期是独立的。要在线程中找到对评论的最新回复,我们可以使用以下阶段。请注意 2020-01-08
是文档 P41
:
,{$addFields: {zz: {$reduce: {
input: "$zz",
initialValue: {ts:new ISODate("2000-01-01")}, // init to a REALLY old date
in: {$cond: [
{$gt:["$$this.ts","$$value.ts"]}, // if higher than prev
"$$this", // then use this one
"$$value" // else keep highest seen so far
]}
}}
}}
{
"_id" : "P1",
"from" : "OP",
"text" : "orig",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"zz" : {
"_id" : "P41",
"from" : "D",
"ref" : "P4",
"text" : "hoo",
"ts" : ISODate("2020-01-08T00:00:00Z"),
"n" : NumberLong(2)
}
}
最后,有时您会希望处理初始目标文档以及 $graphLookup
文档链。为此,只需 $project
top-level 文档信息与 $graphLookup
:
,{$project: {zz: {$concatArrays: [
// Make array of 1. Call n -1 to identify it as the
// initial target doc:
[{_id: "$_id",n:-1,from:"$from",text:"$text",ts:"$ts"}], // make array of 1
"$zz"
]}
}}
{
"_id" : "P1",
"zz" : [
{
"_id" : "P1",
"n" : -1,
"from" : "OP",
"text" : "orig",
"ts" : ISODate("2020-01-01T00:00:00Z")
},
{
"_id" : "P2",
"from" : "A",
"ref" : "P1",
"text" : "not bad",
"ts" : ISODate("2020-01-01T00:00:00Z"),
"n" : NumberLong(0)
},
....