猫鼬游标减慢直到停止
mongoose cursor slows down until it stops
我有一个 node.js 应用程序,它使用 kue 作业通过 mongoose 游标遍历大型集合。
kue 作业配置:
function childJob(cid, name, dataset, type, link, line, market, done) {
var deferred = Q.defer();
mongoose.model(dataset).count({processed:{$ne:1}}, function(err, total){
console.log('total', total);
var job = jobs.create('child', {
type: 'CHILD',
cid: cid,
title: name,
dataset: dataset,
kind: type,
link: link,
line: line,
market: market,
current: 0,
total: total
});
job
.on('complete', function() {
deferred.resolve({
done: true,
job: job.data,
success: true
});
})
.on('failed', function() {
deferred.resolve({
done: true,
job: job.data,
success: false
});
})
job.save();
return deferred.promise;
});
}
每当子作业启动时:
jobs.process('child', 10, function(job, done) {
var count = 0;
mongoose.model(job.data.dataset).find({processed:{$ne:1}}).lean().batchSize(1000).cursor().eachAsync(function(record){
functions.workRecord(record, job.data.link, job.data.line, job.data.market, myStats[job.data.line+job.data.market], function(stats){
count++;
job.data.current = count;
job.update();
job.progress(count, job.data.total, record);
if( count == job.data.total ){
myStats[job.data.line + job.data.market] = stats;
done();
}
});
});
})
随着时间的推移,cursor().eachAsync()
的记录数量急剧减少。从每秒处理 100 条记录到几秒钟内处理大约 1 或 2 条记录,然后完全停止。
这是配置问题吗?如何将查询设置为随着时间的推移在游标中具有一致的记录流?
EDIT1:最终我得到以下错误:
(node:61193) UnhandledPromiseRejectionWarning: MongoError: Cursor not
found, cursor id: 90600322391
at Function.MongoError.create (/home/sigma/SigmaCWCDataAnalysis/node_modules/mongoose/node_modules/mongodb-core/lib/error.js:31:11)
看起来发生的事情是光标超时。
解决方案是将 find() 查询设置为 {timeout:false},如下所示:
mongoose.model(job.data.dataset).find({},{timeout: false}).lean().cursor()
我有一个 node.js 应用程序,它使用 kue 作业通过 mongoose 游标遍历大型集合。
kue 作业配置:
function childJob(cid, name, dataset, type, link, line, market, done) {
var deferred = Q.defer();
mongoose.model(dataset).count({processed:{$ne:1}}, function(err, total){
console.log('total', total);
var job = jobs.create('child', {
type: 'CHILD',
cid: cid,
title: name,
dataset: dataset,
kind: type,
link: link,
line: line,
market: market,
current: 0,
total: total
});
job
.on('complete', function() {
deferred.resolve({
done: true,
job: job.data,
success: true
});
})
.on('failed', function() {
deferred.resolve({
done: true,
job: job.data,
success: false
});
})
job.save();
return deferred.promise;
});
}
每当子作业启动时:
jobs.process('child', 10, function(job, done) {
var count = 0;
mongoose.model(job.data.dataset).find({processed:{$ne:1}}).lean().batchSize(1000).cursor().eachAsync(function(record){
functions.workRecord(record, job.data.link, job.data.line, job.data.market, myStats[job.data.line+job.data.market], function(stats){
count++;
job.data.current = count;
job.update();
job.progress(count, job.data.total, record);
if( count == job.data.total ){
myStats[job.data.line + job.data.market] = stats;
done();
}
});
});
})
随着时间的推移,cursor().eachAsync()
的记录数量急剧减少。从每秒处理 100 条记录到几秒钟内处理大约 1 或 2 条记录,然后完全停止。
这是配置问题吗?如何将查询设置为随着时间的推移在游标中具有一致的记录流?
EDIT1:最终我得到以下错误:
(node:61193) UnhandledPromiseRejectionWarning: MongoError: Cursor not found, cursor id: 90600322391 at Function.MongoError.create (/home/sigma/SigmaCWCDataAnalysis/node_modules/mongoose/node_modules/mongodb-core/lib/error.js:31:11)
看起来发生的事情是光标超时。
解决方案是将 find() 查询设置为 {timeout:false},如下所示:
mongoose.model(job.data.dataset).find({},{timeout: false}).lean().cursor()