将 MongoDB 聚合转换为 ArangoDB COLLECT
Converting a MongoDB aggregate into an ArangoDB COLLECT
我正在将数据从 Mongo 迁移到 Arango,我需要重现 $group 聚合。我已经成功地重现了结果,但我担心我的方法可能不是最优的。 AQL可以提高吗?
我有一组数据,如下所示:
{
"_id" : ObjectId("5b17f9d85b2c1998598f054e"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b1808145b2c1998598f054f"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b18083c5b2c1998598f0550"),
"department" : "Development",
"region" : "Europe"
}
{
"_id" : ObjectId("5b1809a75b2c1998598f0551"),
"department" : "Sales"
}
注意值可以是字符串、数组或不存在
在 Mongo 中,我使用以下代码聚合数据:
db.test.aggregate([
{
$unwind:{
path:"$department",
preserveNullAndEmptyArrays: true
}
},
{
$unwind:{
path:"$region",
preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
department:{ $ifNull: [ "$department", "null" ] },
region:{ $ifNull: [ "$region", "null" ] },
},
count:{$sum:1}
}
}
])
在 Arango 中,我使用以下 AQL:
FOR i IN test
LET FIELD1=(FOR a IN APPEND([],NOT_NULL(i.department,"null")) RETURN a)
LET FIELD2=(FOR a IN APPEND([],NOT_NULL(i.region,"null")) RETURN a)
FOR f1 IN FIELD1
FOR f2 IN FIELD2
COLLECT id={department:f1,region:f2} WITH COUNT INTO counter
RETURN {_id:id,count:counter}
编辑:
APPEND 用于将字符串值转换为数组
两者都产生如下所示的结果;
{
"_id" : {
"department" : "Marketing",
"region" : "US"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Development",
"region" : "Europe"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Sales",
"region" : "null"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Marketing",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "US"
},
"count" : 2.0
}
你的方法看起来不错。我建议使用 TO_ARRAY()
而不是 APPEND()
以使其更容易理解。
两个函数都跳过空值,因此不可避免地要提供一些占位符,或者显式测试 null 和 return 带有 的数组null 值(或任何最适合您的值):
FOR doc IN test
FOR field1 IN doc.department == null ? [ null ] : TO_ARRAY(doc.department)
FOR field2 IN doc.region == null ? [ null ] : TO_ARRAY(doc.region)
COLLECT department = field1, region = field2
WITH COUNT INTO count
RETURN { _id: { department, region }, count }
Collection 测试:
[
{
"_key": "5b17f9d85b2c1998598f054e",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b18083c5b2c1998598f0550",
"department": "Development",
"region": "Europe"
},
{
"_key": "5b1808145b2c1998598f054f",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b1809a75b2c1998598f0551",
"department": "Sales"
}
]
结果:
[
{
"_id": {
"department": "Development",
"region": "Europe"
},
"count": 1
},
{
"_id": {
"department": "Marketing",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Marketing",
"region": "US"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": null
},
"count": 1
},
{
"_id": {
"department": "Sales",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": "US"
},
"count": 2
}
]
我正在将数据从 Mongo 迁移到 Arango,我需要重现 $group 聚合。我已经成功地重现了结果,但我担心我的方法可能不是最优的。 AQL可以提高吗?
我有一组数据,如下所示:
{
"_id" : ObjectId("5b17f9d85b2c1998598f054e"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b1808145b2c1998598f054f"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b18083c5b2c1998598f0550"),
"department" : "Development",
"region" : "Europe"
}
{
"_id" : ObjectId("5b1809a75b2c1998598f0551"),
"department" : "Sales"
}
注意值可以是字符串、数组或不存在
在 Mongo 中,我使用以下代码聚合数据:
db.test.aggregate([
{
$unwind:{
path:"$department",
preserveNullAndEmptyArrays: true
}
},
{
$unwind:{
path:"$region",
preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
department:{ $ifNull: [ "$department", "null" ] },
region:{ $ifNull: [ "$region", "null" ] },
},
count:{$sum:1}
}
}
])
在 Arango 中,我使用以下 AQL:
FOR i IN test
LET FIELD1=(FOR a IN APPEND([],NOT_NULL(i.department,"null")) RETURN a)
LET FIELD2=(FOR a IN APPEND([],NOT_NULL(i.region,"null")) RETURN a)
FOR f1 IN FIELD1
FOR f2 IN FIELD2
COLLECT id={department:f1,region:f2} WITH COUNT INTO counter
RETURN {_id:id,count:counter}
编辑: APPEND 用于将字符串值转换为数组
两者都产生如下所示的结果;
{
"_id" : {
"department" : "Marketing",
"region" : "US"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Development",
"region" : "Europe"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Sales",
"region" : "null"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Marketing",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "US"
},
"count" : 2.0
}
你的方法看起来不错。我建议使用 TO_ARRAY()
而不是 APPEND()
以使其更容易理解。
两个函数都跳过空值,因此不可避免地要提供一些占位符,或者显式测试 null 和 return 带有 的数组null 值(或任何最适合您的值):
FOR doc IN test
FOR field1 IN doc.department == null ? [ null ] : TO_ARRAY(doc.department)
FOR field2 IN doc.region == null ? [ null ] : TO_ARRAY(doc.region)
COLLECT department = field1, region = field2
WITH COUNT INTO count
RETURN { _id: { department, region }, count }
Collection 测试:
[
{
"_key": "5b17f9d85b2c1998598f054e",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b18083c5b2c1998598f0550",
"department": "Development",
"region": "Europe"
},
{
"_key": "5b1808145b2c1998598f054f",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b1809a75b2c1998598f0551",
"department": "Sales"
}
]
结果:
[
{
"_id": {
"department": "Development",
"region": "Europe"
},
"count": 1
},
{
"_id": {
"department": "Marketing",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Marketing",
"region": "US"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": null
},
"count": 1
},
{
"_id": {
"department": "Sales",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": "US"
},
"count": 2
}
]