从具有大数据集(50k 记录)的对象数组中检索对象的最快方法
Fastest way to retrieve an object from array of objects with large data sets(50k records)
这里 totalObjects 包含大约 40k 条记录,我从中找到一个存在于 sapObjs 中的对象(20k 条记录)。因此,每当我找到该对象时,我都会尝试更改其 属性 值。对于这个操作,它花费的时间超过 200secs.Can 任何人都建议我克服这个问题并提高性能的方法。我已经尝试过 _.find 的替代方法,例如过滤器和其他数组函数,但我得到的下划线 js 给出的结果比 those.But 下划线据说要慢,我要得到其他方法为此,任何人都可以帮助我解决这个问题。
for (i = 0,sapLength = sapObjs.length;i < sapLength; i++) {
debugger;
currRecord = entriesInserted[sapObjs[i].name];
if (currRecord) {
dummyObj = _.find(totalObjects,{name:sapObjs[i].name});
dummyObj["sap_desc"] = sapObjs[i].sap_desc;
dummyObj.source = "";
}
else {
entriesInserted[sapObjs[i].name] = sapOpbs[i];
totalObjects.push(sapObjs[i]);
}
}
创建查找哈希。这是我一起破解的粗略页面来说明。使用 _.find,测试耗时 400 多秒。使用查找哈希,大约需要 40 毫秒。几乎是瞬间。
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
<script src="//code.jquery.com/jquery-2.2.0.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js"></script>
<script type="text/javascript">
// using globals so this can be run in separate steps.
var source_sapObjs = [];
var source_totalObjects = [];
function createArrays(){
jQuery.ajax({
url: 'https://raw.githubusercontent.com/dwyl/english-words/master/words.txt',
dataType: 'text',
type: 'GET'
}).done(function(textFile){
var words = textFile.split('\n');
// create an array with 20k and 40k records.
for(var i = 0; i < 20000; i++){
source_sapObjs.push({name: words[i], sap_desc: 'DESC-' + words[i]});
}
for(var j = 0; j < 40000; j++){
source_totalObjects.push({name: words[j], sap_desc: 'none'});
}
console.log("source_sapObjs.length:", source_sapObjs.length);
console.log("source_totalObjects.length:", source_totalObjects.length);
console.log("finished creating arrays");
});
}
function iter_arrays(){
var dummyObj = null;
var sapObjs = source_sapObjs.slice(); // create a copy so I can re-run this multiple times.
var totalObjects = source_totalObjects.slice(); // create a copy so I can re-run this multiple times.
var start = new Date().getTime();
var totalObjectsHash = {};
for(var k = 0; k < totalObjects.length; k++){
totalObjectsHash[totalObjects[k].name] = totalObjects[k];
}
for (var i = 0, sapLength = sapObjs.length; i < sapLength; i++) {
//dummyObj = _.find(totalObjects,{name:sapObjs[i].name});
dummyObj = totalObjectsHash[sapObjs[i].name];
dummyObj["sap_desc"] = sapObjs[i].sap_desc;
dummyObj.source = "";
}
var end = new Date().getTime();
var time = end - start;
console.log('Execution time: ' + time);
console.log(_.map(totalObjects, 'sap_desc'));
}
</script>
</head>
<body>
</body>
</html>
要运行它,在您的计算机上本地创建页面,在chrome中打开它,打开开发人员工具,然后从控制台运行第一个功能。完成后,运行 第二个函数。
这里 totalObjects 包含大约 40k 条记录,我从中找到一个存在于 sapObjs 中的对象(20k 条记录)。因此,每当我找到该对象时,我都会尝试更改其 属性 值。对于这个操作,它花费的时间超过 200secs.Can 任何人都建议我克服这个问题并提高性能的方法。我已经尝试过 _.find 的替代方法,例如过滤器和其他数组函数,但我得到的下划线 js 给出的结果比 those.But 下划线据说要慢,我要得到其他方法为此,任何人都可以帮助我解决这个问题。
for (i = 0,sapLength = sapObjs.length;i < sapLength; i++) {
debugger;
currRecord = entriesInserted[sapObjs[i].name];
if (currRecord) {
dummyObj = _.find(totalObjects,{name:sapObjs[i].name});
dummyObj["sap_desc"] = sapObjs[i].sap_desc;
dummyObj.source = "";
}
else {
entriesInserted[sapObjs[i].name] = sapOpbs[i];
totalObjects.push(sapObjs[i]);
}
}
创建查找哈希。这是我一起破解的粗略页面来说明。使用 _.find,测试耗时 400 多秒。使用查找哈希,大约需要 40 毫秒。几乎是瞬间。
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
<script src="//code.jquery.com/jquery-2.2.0.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js"></script>
<script type="text/javascript">
// using globals so this can be run in separate steps.
var source_sapObjs = [];
var source_totalObjects = [];
function createArrays(){
jQuery.ajax({
url: 'https://raw.githubusercontent.com/dwyl/english-words/master/words.txt',
dataType: 'text',
type: 'GET'
}).done(function(textFile){
var words = textFile.split('\n');
// create an array with 20k and 40k records.
for(var i = 0; i < 20000; i++){
source_sapObjs.push({name: words[i], sap_desc: 'DESC-' + words[i]});
}
for(var j = 0; j < 40000; j++){
source_totalObjects.push({name: words[j], sap_desc: 'none'});
}
console.log("source_sapObjs.length:", source_sapObjs.length);
console.log("source_totalObjects.length:", source_totalObjects.length);
console.log("finished creating arrays");
});
}
function iter_arrays(){
var dummyObj = null;
var sapObjs = source_sapObjs.slice(); // create a copy so I can re-run this multiple times.
var totalObjects = source_totalObjects.slice(); // create a copy so I can re-run this multiple times.
var start = new Date().getTime();
var totalObjectsHash = {};
for(var k = 0; k < totalObjects.length; k++){
totalObjectsHash[totalObjects[k].name] = totalObjects[k];
}
for (var i = 0, sapLength = sapObjs.length; i < sapLength; i++) {
//dummyObj = _.find(totalObjects,{name:sapObjs[i].name});
dummyObj = totalObjectsHash[sapObjs[i].name];
dummyObj["sap_desc"] = sapObjs[i].sap_desc;
dummyObj.source = "";
}
var end = new Date().getTime();
var time = end - start;
console.log('Execution time: ' + time);
console.log(_.map(totalObjects, 'sap_desc'));
}
</script>
</head>
<body>
</body>
</html>
要运行它,在您的计算机上本地创建页面,在chrome中打开它,打开开发人员工具,然后从控制台运行第一个功能。完成后,运行 第二个函数。