Error: connect ETIMEDOUT when scraping
Error: connect ETIMEDOUT when scraping
我有一个函数:
1. 从集合 foo
.
的 mongoDB 文档中获取 3000 'id' 个属性的数组
2.为每个ID创建一个GET请求,获取id的'resp'obj,存入另一个数据库。
router.get('/', (req, res) => {
var collection = db.get().collection('foo');
var collection2 = db.get().collection('test');
collection.distinct('id', (err, idArr) => { // count: 3000+
idArr.forEach(id => {
let url = 'https://externalapi.io/id=' + id
request(url, (error, response, body) => {
if (error) {
console.log(error)
} else {
resp = JSON.parse(resp);
collection2.insert(resp);
}
});
});
节点错误日志:
[0] events.js:163
[0] throw er; // Unhandled 'error' event
[0] ^
[0]
[0] Error: connect ETIMEDOUT [EXT URL REDACTED]
[0] at Object.exports._errnoException (util.js:1050:11)
[0] at exports._exceptionWithHostPort (util.js:1073:20)
[0] at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1093:14)
我使用简单速率限制器不会导致速率限制 (25cps):
const limit = require("simple-rate-limiter");
const request = limit(require("request")).to(20).per(1000);
但是在 300-1700 个请求之间的任何地方,我都会收到此错误,导致命令行上的节点崩溃。
我该如何处理这个错误以防止我的应用程序崩溃?
我尝试了很多错误处理,但是 none 能够处理 connect ETIMEDOUT
正如评论中所讨论的那样,如果您想控制同时进行中的最大请求数,您可以使用 Bluebird 像这样轻松地做到这一点:
const Promise = require('bluebird');
const rp = require('request-promise');
router.get('/', (req, res) => {
let collection = db.get().collection('foo');
let collection2 = db.get().collection('test');
collection.distinct('id', (err, idArr) => { // count: 3000+
if (err) {
// handle error here, send some error response
res.status(501).send(...);
} else {
Promise.map(idArr, id => {
let url = 'https://externalapi.io/id=' + id
return rp(url).then(body => {
if (error) {
console.log(error)
} else {
let resp = JSON.parse(body);
// probably want to return a promise here too, but I'm unsure what DB you're using
collection2.insert(resp);
}
}).catch(err => {
// decide what you want to do when a single request fails here
// by providing a catch handler that does not rethrow, other requests will continue
});
// pick some concurrency value here that does not cause errors
}, {concurrency: 10}).then(() => {
// all requests are done, send final response
res.send(...);
}).catch(err => {
// your code may never get here (depends upon earlier .catch() handler)
});
}
});
});
我有一个函数:
1. 从集合 foo
.
的 mongoDB 文档中获取 3000 'id' 个属性的数组
2.为每个ID创建一个GET请求,获取id的'resp'obj,存入另一个数据库。
router.get('/', (req, res) => {
var collection = db.get().collection('foo');
var collection2 = db.get().collection('test');
collection.distinct('id', (err, idArr) => { // count: 3000+
idArr.forEach(id => {
let url = 'https://externalapi.io/id=' + id
request(url, (error, response, body) => {
if (error) {
console.log(error)
} else {
resp = JSON.parse(resp);
collection2.insert(resp);
}
});
});
节点错误日志:
[0] events.js:163
[0] throw er; // Unhandled 'error' event
[0] ^
[0]
[0] Error: connect ETIMEDOUT [EXT URL REDACTED]
[0] at Object.exports._errnoException (util.js:1050:11)
[0] at exports._exceptionWithHostPort (util.js:1073:20)
[0] at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1093:14)
我使用简单速率限制器不会导致速率限制 (25cps):
const limit = require("simple-rate-limiter");
const request = limit(require("request")).to(20).per(1000);
但是在 300-1700 个请求之间的任何地方,我都会收到此错误,导致命令行上的节点崩溃。 我该如何处理这个错误以防止我的应用程序崩溃?
我尝试了很多错误处理,但是 none 能够处理 connect ETIMEDOUT
正如评论中所讨论的那样,如果您想控制同时进行中的最大请求数,您可以使用 Bluebird 像这样轻松地做到这一点:
const Promise = require('bluebird');
const rp = require('request-promise');
router.get('/', (req, res) => {
let collection = db.get().collection('foo');
let collection2 = db.get().collection('test');
collection.distinct('id', (err, idArr) => { // count: 3000+
if (err) {
// handle error here, send some error response
res.status(501).send(...);
} else {
Promise.map(idArr, id => {
let url = 'https://externalapi.io/id=' + id
return rp(url).then(body => {
if (error) {
console.log(error)
} else {
let resp = JSON.parse(body);
// probably want to return a promise here too, but I'm unsure what DB you're using
collection2.insert(resp);
}
}).catch(err => {
// decide what you want to do when a single request fails here
// by providing a catch handler that does not rethrow, other requests will continue
});
// pick some concurrency value here that does not cause errors
}, {concurrency: 10}).then(() => {
// all requests are done, send final response
res.send(...);
}).catch(err => {
// your code may never get here (depends upon earlier .catch() handler)
});
}
});
});