NodeJS循环中的异步双回调
Async double callback in NodeJS loop
我今天要进入我的网络爬虫的下一步!
我已经使用 async
在 url 数组上循环,我将在此回调中再次循环并等待其执行后再重新启动。
我不知道如何使用两个回调。
这是我的代码:
var getWebData = function(url) {
var data = [];
async.eachSeries(url, function(urlSingle, cb) {
request(urlSingle, function(err, resp, body) {
if (!err) {
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li').each(function(i, element) {
$(this).find('.nav_sous-menu_bloc li a').each(function(i, element) {
categoriesURL.push('https://blabla' + $(this).attr('href'));
})
// I WANT TO LOOP on the categoriesURL array HERE
var jsObject = { name : "", description : "", price: "", categorie: "", liter: "", kilo: "", pricePer: "", quantity: "", capacity: "", promotion: "", scrapingDate : "", url: "" };
data.push(jsObject);
})
}
cb();
})
}, function() {
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
getWebData(url);
app.listen('8080');
有人知道我该怎么做吗?
谢谢
您可以使用嵌套的 eachSeries。像这样:
var getWebData = function(url) {
var data = [];
async.eachSeries(url, function(urlSingle, cb) {
request(urlSingle, function(err, resp, body) {
if (!err) {
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li').each(function(i, element) {
$(this).find('.nav_sous-menu_bloc li a').each(function(i, element) {
categoriesURL.push('https://blablablac' + $(this).attr('href'));
})
async.eachSeries(caturl, function(categoriesURL, cb2) {
//Do whatever you want to do here
cb2();
}, function() {
//You can apply if and else for err an according to that you can set your callback responce here
cb();
};
})
}
})
}, function() {
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
getWebData(url);
app.listen('8080');
对您的代码进行了一些更改:
- 使用
.mapSeries
代替 .eachSeries
。这样您就可以按照与输入数组相同的顺序从迭代器函数中获取数据。意味着您将获得 [4,9] 用于输入 [2,3] 到平方函数,永远不会 [9,4]
- 将代码分解为函数,以便每个函数执行一项特定任务
- 将 categoriesURL 处理移出循环 1
- 早点回来。它提高了代码的可读性。
if (err) return callback(err);
function getWebData(url) {
// Using .mapSeries in place of .eachSeries as you seem to want to get data from iterator function
async.mapSeries(url, processUrl, function(err, results) {
// this will rum when loop is done
var json = JSON.stringify(results);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.error('Error', err);
console.log('File successfully written!');
});
});
}
function processUrl(url, callback) {
request(url, function(err, resp, body) {
if (err) // Return simple cases early; Improves code readability
return callback(err); // or return callback(); -- if you don't want to send error upwards
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li')
.each(function(i, element) { // loop 1
$(this)
.find('.nav_sous-menu_bloc li a')
.each(function(i, element) { // loop 2
categoriesURL.push('https://blablablac' + $(this)
.attr('href'));
}) // loop 2 end
}) // loop 1 end
// I WANT TO LOOP ON THE categoriesURL ARRAY HERE
// Using .mapSeries in place of .eachSeries for same above reason
async.mapSeries(categoriesURL, processCategoryUrl, function(err, results) {
if (err)
return callback(err);
// This function is called after process array categoriesURL
// Do what you want here then call callback provided to this method
return callback(null, results);
})
})
}
function processCategoryUrl(categoryUrl, callback) {
// Just process categoryUrl here and call callback with error or results
return callback();
}
getWebData(url);
app.listen('8080');
我今天要进入我的网络爬虫的下一步!
我已经使用 async
在 url 数组上循环,我将在此回调中再次循环并等待其执行后再重新启动。
我不知道如何使用两个回调。
这是我的代码:
var getWebData = function(url) {
var data = [];
async.eachSeries(url, function(urlSingle, cb) {
request(urlSingle, function(err, resp, body) {
if (!err) {
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li').each(function(i, element) {
$(this).find('.nav_sous-menu_bloc li a').each(function(i, element) {
categoriesURL.push('https://blabla' + $(this).attr('href'));
})
// I WANT TO LOOP on the categoriesURL array HERE
var jsObject = { name : "", description : "", price: "", categorie: "", liter: "", kilo: "", pricePer: "", quantity: "", capacity: "", promotion: "", scrapingDate : "", url: "" };
data.push(jsObject);
})
}
cb();
})
}, function() {
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
getWebData(url);
app.listen('8080');
有人知道我该怎么做吗?
谢谢
您可以使用嵌套的 eachSeries。像这样:
var getWebData = function(url) {
var data = [];
async.eachSeries(url, function(urlSingle, cb) {
request(urlSingle, function(err, resp, body) {
if (!err) {
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li').each(function(i, element) {
$(this).find('.nav_sous-menu_bloc li a').each(function(i, element) {
categoriesURL.push('https://blablablac' + $(this).attr('href'));
})
async.eachSeries(caturl, function(categoriesURL, cb2) {
//Do whatever you want to do here
cb2();
}, function() {
//You can apply if and else for err an according to that you can set your callback responce here
cb();
};
})
}
})
}, function() {
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
getWebData(url);
app.listen('8080');
对您的代码进行了一些更改:
- 使用
.mapSeries
代替.eachSeries
。这样您就可以按照与输入数组相同的顺序从迭代器函数中获取数据。意味着您将获得 [4,9] 用于输入 [2,3] 到平方函数,永远不会 [9,4] - 将代码分解为函数,以便每个函数执行一项特定任务
- 将 categoriesURL 处理移出循环 1
- 早点回来。它提高了代码的可读性。
if (err) return callback(err);
function getWebData(url) {
// Using .mapSeries in place of .eachSeries as you seem to want to get data from iterator function
async.mapSeries(url, processUrl, function(err, results) {
// this will rum when loop is done
var json = JSON.stringify(results);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.error('Error', err);
console.log('File successfully written!');
});
});
}
function processUrl(url, callback) {
request(url, function(err, resp, body) {
if (err) // Return simple cases early; Improves code readability
return callback(err); // or return callback(); -- if you don't want to send error upwards
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li')
.each(function(i, element) { // loop 1
$(this)
.find('.nav_sous-menu_bloc li a')
.each(function(i, element) { // loop 2
categoriesURL.push('https://blablablac' + $(this)
.attr('href'));
}) // loop 2 end
}) // loop 1 end
// I WANT TO LOOP ON THE categoriesURL ARRAY HERE
// Using .mapSeries in place of .eachSeries for same above reason
async.mapSeries(categoriesURL, processCategoryUrl, function(err, results) {
if (err)
return callback(err);
// This function is called after process array categoriesURL
// Do what you want here then call callback provided to this method
return callback(null, results);
})
})
}
function processCategoryUrl(categoryUrl, callback) {
// Just process categoryUrl here and call callback with error or results
return callback();
}
getWebData(url);
app.listen('8080');