如何优化 array.forEach in node.js 的计算速度
How to optimize the computation speed in array.forEach in node.js
我有一段代码简单地循环遍历两个数组,对于第一个数组的每个元素,它在第二个数组中找到相关元素并仅更改第一次出现的元素并删除其余元素。
/**
* The aggregation data structure:
* "_id": {
* "geometry": geometry,
* "dups": [
* "5b3b25b4e54029249c459bfc", keep only the fisrt element in allDocs
* "5b3b25b4e54029249c459e65", delete it from allDocs
* "5b3b25b4e54029249c459d7d" delete it from allDocs
* ],
* "dupsProp": [ ], array of all properties of duplicatePoints
* "count": 3
*/
var aggregationRes =[46,000 objects]
var allDocs =[345,000 objects]
aggregationRes.forEach(function (resElem, counter) {
console.log(counter + "/" + aggregationRes.length)
//Delete objects in allDocs based on dups array except the first one
var foundIndex = allDocs.findIndex(x => x._id.toString() == resElem.dups[0]);
//assign the mergedProperties
allDocs[foundIndex].properties = resElem.dupsProp;
//delete the remaining ids in Docs from dups array
resElem.dups.forEach(function (dupElem, index) {
var tmpFoundIndex = allDocs.findIndex(x => x._id.toString() == resElem.dups[index + 1]);
if (tmpFoundIndex !== -1) {
allDocs.splice(tmpFoundIndex, 1)
}
})
})
这个脚本运行了将近 4 个小时。如您所见,计算非常简单,但由于 allDocs 数组很大,因此需要相当长的时间。如果有人给我提示如何减少计算时间,那就太好了。
提前致谢
借鉴 Bergi 的想法,我们通过 id 对文档进行索引,以避免必须查找昂贵的索引:
var allDocs =[345,000 objects]
var aggregationRes =[46,000 objects]
var allDocsIndexed = {};
allDocs.forEach(function(doc){
allDocsIndexed[doc._id.toString()] = doc;
});
aggregationRes.forEach(function (resElem, counter) {
allDocsIndexed[resElem.dups[0]].properties = resElem.dupsProp;
for (var i = 1; i < resElem.dupsProp.length; i++) {
delete allDocsIndexed[resElem.dupsProp[i]];
}
});
var allUndeletedDocs = allDocs.filter(doc => allDocsIndexed.hasOwnProperty(doc_id.toString()));
请注意,对于 javascript,这是一个有效的解决方案,但提供了更多详细信息,可能存在使用 mongodb 功能的更好的解决方案。
我有一段代码简单地循环遍历两个数组,对于第一个数组的每个元素,它在第二个数组中找到相关元素并仅更改第一次出现的元素并删除其余元素。
/**
* The aggregation data structure:
* "_id": {
* "geometry": geometry,
* "dups": [
* "5b3b25b4e54029249c459bfc", keep only the fisrt element in allDocs
* "5b3b25b4e54029249c459e65", delete it from allDocs
* "5b3b25b4e54029249c459d7d" delete it from allDocs
* ],
* "dupsProp": [ ], array of all properties of duplicatePoints
* "count": 3
*/
var aggregationRes =[46,000 objects]
var allDocs =[345,000 objects]
aggregationRes.forEach(function (resElem, counter) {
console.log(counter + "/" + aggregationRes.length)
//Delete objects in allDocs based on dups array except the first one
var foundIndex = allDocs.findIndex(x => x._id.toString() == resElem.dups[0]);
//assign the mergedProperties
allDocs[foundIndex].properties = resElem.dupsProp;
//delete the remaining ids in Docs from dups array
resElem.dups.forEach(function (dupElem, index) {
var tmpFoundIndex = allDocs.findIndex(x => x._id.toString() == resElem.dups[index + 1]);
if (tmpFoundIndex !== -1) {
allDocs.splice(tmpFoundIndex, 1)
}
})
})
这个脚本运行了将近 4 个小时。如您所见,计算非常简单,但由于 allDocs 数组很大,因此需要相当长的时间。如果有人给我提示如何减少计算时间,那就太好了。 提前致谢
借鉴 Bergi 的想法,我们通过 id 对文档进行索引,以避免必须查找昂贵的索引:
var allDocs =[345,000 objects]
var aggregationRes =[46,000 objects]
var allDocsIndexed = {};
allDocs.forEach(function(doc){
allDocsIndexed[doc._id.toString()] = doc;
});
aggregationRes.forEach(function (resElem, counter) {
allDocsIndexed[resElem.dups[0]].properties = resElem.dupsProp;
for (var i = 1; i < resElem.dupsProp.length; i++) {
delete allDocsIndexed[resElem.dupsProp[i]];
}
});
var allUndeletedDocs = allDocs.filter(doc => allDocsIndexed.hasOwnProperty(doc_id.toString()));
请注意,对于 javascript,这是一个有效的解决方案,但提供了更多详细信息,可能存在使用 mongodb 功能的更好的解决方案。