找出用户在 MongoDB 上定期活跃的连续天数?
Find the number of consecutive days that users are regularly active on MongoDB?
我想找到最后一个用户id的最大连续天数。
userId(string) active_date(string) Note: Today (2022-02-20)
------------------------------------------
{ "userId": "DbdBve", "day": "2022-02-20" }
{ "userId": "DbdBve", "day": "2022-02-19" }
{ "userId": "DbdBve", "day": "2022-02-18" }
{ "userId": "DbdBve", "day": "2022-02-17" } <- Gap here | so user's been active for the last 3 days
{ "userId": "DbdBve", "day": "2022-02-15" }
userId(string) active_date(string)
------------------------------------------
{ "userId": "Gj6WEth", "day": "2022-02-20" }
{ "userId": "Gj6WEth", "day": "2022-02-15" } <- Gap here | so user's been active for the last 1 days
{ "userId": "Gj6WEth", "day": "2022-02-14" }
{ "userId": "Gj6WEth", "day": "2022-02-13" }
使用 mongodb v5:
首先使用$toLong
将日期转换为数值
然后使用 $setWindowFields
提取用户 active_days 范围
db.collection.aggregate([
{
"$addFields": {
"active_date": {
"$toLong": "$active_date"
}
}
},
{
$setWindowFields: {
partitionBy: "$user_id",
sortBy: {
active_date: 1
},
output: {
days: {
$push: "$active_date",
window: {
range: [
-86400000, // one day in millisecond
0
]
}
}
}
}
},
{
"$set": {
"days": {
"$cond": [
{
"$gt": [
{
"$size": "$days"
},
1
]
},
0,
1
]
}
}
},
{
$setWindowFields: {
partitionBy: "$user_id",
sortBy: {
active_date: 1
},
output: {
count: {
$sum: "$days",
window: {
documents: [
"unbounded",
"current"
]
}
}
}
}
},
{
"$group": {
"_id": {
user_id: "$user_id",
count: "$count"
},
"active_days": {
$sum: 1
},
"to": {
"$max": "$active_date"
},
"from": {
"$min": "$active_date"
}
}
}
])
最后通过添加以下两个阶段得到最新的 active_day 范围:
{
"$sort": {
to: -1
}
},
{
"$group": {
"_id": "$_id.user_id",
"last_active_days": {
"$first": "$active_days"
}
}
}
旧版本的 mongodb 使用 $reduce
db.collection.aggregate([
{
$sort: {
active_date: 1
}
},
{
"$group": {
_id: "$user_id",
dates: {
"$push": {
"$toLong": "$active_date"
},
},
from: {
$first: {
"$toLong": "$active_date"
}
},
to: {
$last: {
"$toLong": "$active_date"
}
}
}
},
{
$project: {
active_days: {
$let: {
vars: {
result: {
$reduce: {
input: "$dates",
initialValue: {
prev: {
$subtract: [
"$from",
86400000
]
},
range: {
from: "$from",
to: 0,
count: 0
},
ranges: []
},
in: {
$cond: [
{
$eq: [
{
$subtract: [
"$$this",
"$$value.prev"
]
},
86400000
]
},
{
prev: "$$this",
range: {
from: "$$value.range.from",
to: "$$value.range.to",
count: {
$add: [
"$$value.range.count",
1
]
}
},
ranges: "$$value.ranges"
},
{
ranges: {
$concatArrays: [
"$$value.ranges",
[
{
from: "$$value.range.from",
to: "$$value.prev",
count: "$$value.range.count"
}
]
]
},
range: {
from: "$$this",
to: "$to",
count: 1
},
prev: "$$this"
},
]
}
}
}
},
in: {
$concatArrays: [
"$$result.ranges",
[
"$$result.range"
]
]
}
}
}
}
},
{
"$project": {
active_days: {
"$last": "$active_days.count"
}
}
}
])
我想找到最后一个用户id的最大连续天数。
userId(string) active_date(string) Note: Today (2022-02-20)
------------------------------------------
{ "userId": "DbdBve", "day": "2022-02-20" }
{ "userId": "DbdBve", "day": "2022-02-19" }
{ "userId": "DbdBve", "day": "2022-02-18" }
{ "userId": "DbdBve", "day": "2022-02-17" } <- Gap here | so user's been active for the last 3 days
{ "userId": "DbdBve", "day": "2022-02-15" }
userId(string) active_date(string)
------------------------------------------
{ "userId": "Gj6WEth", "day": "2022-02-20" }
{ "userId": "Gj6WEth", "day": "2022-02-15" } <- Gap here | so user's been active for the last 1 days
{ "userId": "Gj6WEth", "day": "2022-02-14" }
{ "userId": "Gj6WEth", "day": "2022-02-13" }
使用 mongodb v5:
首先使用$toLong
然后使用 $setWindowFields
提取用户 active_days 范围
db.collection.aggregate([
{
"$addFields": {
"active_date": {
"$toLong": "$active_date"
}
}
},
{
$setWindowFields: {
partitionBy: "$user_id",
sortBy: {
active_date: 1
},
output: {
days: {
$push: "$active_date",
window: {
range: [
-86400000, // one day in millisecond
0
]
}
}
}
}
},
{
"$set": {
"days": {
"$cond": [
{
"$gt": [
{
"$size": "$days"
},
1
]
},
0,
1
]
}
}
},
{
$setWindowFields: {
partitionBy: "$user_id",
sortBy: {
active_date: 1
},
output: {
count: {
$sum: "$days",
window: {
documents: [
"unbounded",
"current"
]
}
}
}
}
},
{
"$group": {
"_id": {
user_id: "$user_id",
count: "$count"
},
"active_days": {
$sum: 1
},
"to": {
"$max": "$active_date"
},
"from": {
"$min": "$active_date"
}
}
}
])
最后通过添加以下两个阶段得到最新的 active_day 范围:
{
"$sort": {
to: -1
}
},
{
"$group": {
"_id": "$_id.user_id",
"last_active_days": {
"$first": "$active_days"
}
}
}
旧版本的 mongodb 使用 $reduce
db.collection.aggregate([
{
$sort: {
active_date: 1
}
},
{
"$group": {
_id: "$user_id",
dates: {
"$push": {
"$toLong": "$active_date"
},
},
from: {
$first: {
"$toLong": "$active_date"
}
},
to: {
$last: {
"$toLong": "$active_date"
}
}
}
},
{
$project: {
active_days: {
$let: {
vars: {
result: {
$reduce: {
input: "$dates",
initialValue: {
prev: {
$subtract: [
"$from",
86400000
]
},
range: {
from: "$from",
to: 0,
count: 0
},
ranges: []
},
in: {
$cond: [
{
$eq: [
{
$subtract: [
"$$this",
"$$value.prev"
]
},
86400000
]
},
{
prev: "$$this",
range: {
from: "$$value.range.from",
to: "$$value.range.to",
count: {
$add: [
"$$value.range.count",
1
]
}
},
ranges: "$$value.ranges"
},
{
ranges: {
$concatArrays: [
"$$value.ranges",
[
{
from: "$$value.range.from",
to: "$$value.prev",
count: "$$value.range.count"
}
]
]
},
range: {
from: "$$this",
to: "$to",
count: 1
},
prev: "$$this"
},
]
}
}
}
},
in: {
$concatArrays: [
"$$result.ranges",
[
"$$result.range"
]
]
}
}
}
}
},
{
"$project": {
active_days: {
"$last": "$active_days.count"
}
}
}
])