Mongo DB + java 在选定日期范围内按字段对文档进行分组
Mongo DB + java grouping documents by feild on selected date range
我是 MongoDB 的新手,在 Java 中使用 mongoJavaDriver.jar。
我的collection里还有很多文档,下面是部分样例。
{
"_id" : ObjectId("57ee4767d782023f80bd4f97"),
"USER_NAME" : "abc123xgvdfbvdf@einrot.com",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.598Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.598Z")
}
{
"_id" : ObjectId("57ee4767d782023f80bd4f98"),
"USER_NAME" : "abc123xgvdfbvdf@fleckens.hu",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.601Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.601Z")
}
{
"_id" : ObjectId("57ee4767d782023f80bd4f99"),
"USER_NAME" : "prashanthtfxgvdfbvdf@gmail.com",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.603Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.603Z")
}
{
"_id" : ObjectId("57ee4767d782023f80bd4f9e"),
"USER_NAME" : "57ee4767d782023f80bd4f9f@jourrapide.com",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.608Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.608Z")
}
{
"_id" : ObjectId("57ee4767d782023f80bd4f9f"),
"USER_NAME" : "zxcxgvdfbvdf@jourrapide.com",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.609Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.609Z")
}
我能够 运行 查询以查找选定日期范围内的文档,它 returns 一些值。
我的代码:
Date current = new Date();
Date current2 = new Date(current .getYear(), current.getMonth(), current.getDate()-days);
current2.setHours(0);
current2.setMinutes(0);
BasicDBObject dateRange = new BasicDBObject ("$gte",current2 );
dateRange.put("$lt", new Date(current.getYear(), current.getMonth(), current.getDate()+1));
BasicDBObject query = new BasicDBObject("LAST_UPDATED", dateRange);
System.out.println(collection.find().count());
System.out.println(collection.find(query).count());
此 collection 中的文档总数为 155。
根据所选日期计数为 37。
在这37篇文档中,username存在冗余值。我想根据 USER_NAME
字段对它们进行分组。这样这个计数就会小于 37。我该怎么做?
您可以使用聚合框架来进行所需的聚合。考虑 运行 mongo shell 中的以下聚合框架管道
它基本上使用 $match
过滤器来限制文档根据提供的日期范围查询进入管道进行处理,然后
使用 $group
and count the distinct values using $sum
:
按 USER_NAME
字段对文档进行分组
var now = new Date(),
days = 4,
start = new Date(now.getYear(), now.getMonth(), now.getDate()-days),
end = new Date(now.getYear(), now.getMonth(), now.getDate()+1);
end.setHours(0);
end.setMinutes(0);
var pipeline = [
{ "$match": { "LAST_UPDATED": { "$gte": start, "$lte": end } } },
{
"$group": {
"_id": "$USER_NAME",
"count": { "$sum": 1 }
}
}
];
db.collection.aggregate(pipeline);
将以上转换为 Java 变为:
public class JavaAggregation {
public static void main(String args[]) throws UnknownHostException {
MongoClient mongo = new MongoClient();
DB db = mongo.getDB("test"); // your database name
DBCollection coll = db.getCollection("collectionName"); // your collection name
// create the pipeline operations, first with the $match
Date now = new Date();
Date start = new Date(now.getYear(), now.getMonth(), now.getDate()-days);
Date end = new Date(now.getYear(), now.getMonth(), now.getDate()+1);
end.setHours(0);
end.setMinutes(0);
DBObject match = new BasicDBObject("$match",
new BasicDBObject("LAST_UPDATED",
new BasicDBObject("$gte", start).append("$lt", end)
)
);
// build the $group operations
DBObject groupFields = new BasicDBObject( "_id", "$USER_NAME");
groupFields.put("count", new BasicDBObject( "$sum", 1));
DBObject group = new BasicDBObject("$group", groupFields);
List<DBObject> pipeline = Arrays.asList(match, group);
AggregationOutput output = coll.aggregate(pipeline);
for (DBObject result : output.results()) {
System.out.println(result);
}
}
}
我是 MongoDB 的新手,在 Java 中使用 mongoJavaDriver.jar。
我的collection里还有很多文档,下面是部分样例。
{
"_id" : ObjectId("57ee4767d782023f80bd4f97"),
"USER_NAME" : "abc123xgvdfbvdf@einrot.com",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.598Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.598Z")
}
{
"_id" : ObjectId("57ee4767d782023f80bd4f98"),
"USER_NAME" : "abc123xgvdfbvdf@fleckens.hu",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.601Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.601Z")
}
{
"_id" : ObjectId("57ee4767d782023f80bd4f99"),
"USER_NAME" : "prashanthtfxgvdfbvdf@gmail.com",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.603Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.603Z")
}
{
"_id" : ObjectId("57ee4767d782023f80bd4f9e"),
"USER_NAME" : "57ee4767d782023f80bd4f9f@jourrapide.com",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.608Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.608Z")
}
{
"_id" : ObjectId("57ee4767d782023f80bd4f9f"),
"USER_NAME" : "zxcxgvdfbvdf@jourrapide.com",
"LOGIN" : ISODate("2016-09-29T18:30:00Z"),
"LOGOUT" : ISODate("2016-09-30T11:07:19.609Z"),
"LONGITUDE" : "long",
"LATITUDE" : "lat",
"SOURCE" : "Web",
"LAST_UPDATED" : ISODate("2016-09-30T11:07:19.609Z")
}
我能够 运行 查询以查找选定日期范围内的文档,它 returns 一些值。
我的代码:
Date current = new Date();
Date current2 = new Date(current .getYear(), current.getMonth(), current.getDate()-days);
current2.setHours(0);
current2.setMinutes(0);
BasicDBObject dateRange = new BasicDBObject ("$gte",current2 );
dateRange.put("$lt", new Date(current.getYear(), current.getMonth(), current.getDate()+1));
BasicDBObject query = new BasicDBObject("LAST_UPDATED", dateRange);
System.out.println(collection.find().count());
System.out.println(collection.find(query).count());
此 collection 中的文档总数为 155。
根据所选日期计数为 37。
在这37篇文档中,username存在冗余值。我想根据 USER_NAME
字段对它们进行分组。这样这个计数就会小于 37。我该怎么做?
您可以使用聚合框架来进行所需的聚合。考虑 运行 mongo shell 中的以下聚合框架管道
它基本上使用 $match
过滤器来限制文档根据提供的日期范围查询进入管道进行处理,然后
使用 $group
and count the distinct values using $sum
:
USER_NAME
字段对文档进行分组
var now = new Date(),
days = 4,
start = new Date(now.getYear(), now.getMonth(), now.getDate()-days),
end = new Date(now.getYear(), now.getMonth(), now.getDate()+1);
end.setHours(0);
end.setMinutes(0);
var pipeline = [
{ "$match": { "LAST_UPDATED": { "$gte": start, "$lte": end } } },
{
"$group": {
"_id": "$USER_NAME",
"count": { "$sum": 1 }
}
}
];
db.collection.aggregate(pipeline);
将以上转换为 Java 变为:
public class JavaAggregation {
public static void main(String args[]) throws UnknownHostException {
MongoClient mongo = new MongoClient();
DB db = mongo.getDB("test"); // your database name
DBCollection coll = db.getCollection("collectionName"); // your collection name
// create the pipeline operations, first with the $match
Date now = new Date();
Date start = new Date(now.getYear(), now.getMonth(), now.getDate()-days);
Date end = new Date(now.getYear(), now.getMonth(), now.getDate()+1);
end.setHours(0);
end.setMinutes(0);
DBObject match = new BasicDBObject("$match",
new BasicDBObject("LAST_UPDATED",
new BasicDBObject("$gte", start).append("$lt", end)
)
);
// build the $group operations
DBObject groupFields = new BasicDBObject( "_id", "$USER_NAME");
groupFields.put("count", new BasicDBObject( "$sum", 1));
DBObject group = new BasicDBObject("$group", groupFields);
List<DBObject> pipeline = Arrays.asList(match, group);
AggregationOutput output = coll.aggregate(pipeline);
for (DBObject result : output.results()) {
System.out.println(result);
}
}
}