nightmarejs 参数未定义
nightmarejs parament is not defined
我想 scrapy 并获取页面中每个 herf 的 html,然后输出到 csv.now 我第一次使用 nightmare.so 我对参数没有问题定义。
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs = require('fs');
var result ;
nightmare
.goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
.wait(1000)
.evaluate(function () {
var divs = document.querySelectorAll('a[target="_blank"]'),i;
for (i = 0,result = ""; i < divs.length; ++i) {
result += divs[i].href.toString()+"\n";
}
return divs;
})
.end()
.then(function (divs) {
console.log(divs)
///////////////////////////////////////////////////
fs.writeFile('8.csv', result, function (err) {
if (err) throw err;
console.log('It\'s saved!');
});
////////////////////////////////////////////////////
})
.then(function() {
//since Nightmare has an internal `.then()`, return the instance returned by the final call in the chain
return nightmare
//click the next button to get the next page of search results
.goto(divs[0].href)
//get the first HREF from the second page of results
.evaluate(function() {
return document.querySelector('div[class="outputDate"]');
})
})
.catch(function (error) {
console.error('Search failed:', error);
});
第二个 goto() 出现错误:未定义 div。非常感谢。
错误消息说明了一切:divs
未定义。这意味着在第二个 then
回调的范围内没有 divs
变量。
您可以 "merge" 将您那里的两个 then
回调合并为一个,并且有这样的东西:
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs = require('fs');
var result ;
nightmare
.goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
.wait(1000)
.evaluate(function () {
var divs = document.querySelectorAll('a[target="_blank"]'),i;
for (i = 0,result = ""; i < divs.length; ++i) {
result += divs[i].href.toString()+"\n";
}
return divs;
})
.then(function (divs) {
console.log(divs)
///////////////////////////////////////////////////
fs.writeFile('8.csv', result, function (err) {
if (err) throw err;
console.log('It\'s saved!');
});
////////////////////////////////////////////////////
nightmare
//click the next button to get the next page of search results
console.log('this is the link I want to navigate to', divs[0].href)
.goto(divs[0].href)
//get the first HREF from the second page of results
.evaluate(function() {
return document.querySelector('div[class="outputDate"]');
})
.then(function(div) {
console.log('I got the "div", now Ill do something with it', div)
})
})
.catch(function (error) {
console.error('Search failed:', error);
});
nightmare.end()
但是此代码也会失败,因为您的 divs
列表中充满了空对象。我已经更改了一些代码以使其对您有效:
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs = require('fs');
var result ;
nightmare
.goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
.wait(1000)
.evaluate(function () {
var divs = document.querySelectorAll('a[target="_blank"]'),i;
var links = []
for (i = 0,result = ""; i < divs.length; ++i) {
links.push(divs[i].href)
result += divs[i].href.toString()+"\n";
}
return links;
})
.then(function (links) {
console.log(links)
///////////////////////////////////////////////////
fs.writeFile('8.csv', links.join('\n'), function (err) {
if (err) throw err;
console.log('It\'s saved!');
});
////////////////////////////////////////////////////
console.log('this is the link I want to navigate to', links[0])
nightmare
//click the next button to get the next page of search results
.goto(links[0])
.wait(5000)
//get the first HREF from the second page of results
.evaluate(function() {
return document.querySelector('div[class="outputDate"]');
})
.end()
.then(function(div) {
console.log('I got the "div", now Ill do something with it', div)
})
})
.catch(function (error) {
console.error('Search failed:', error);
});
现在,您必须遍历链接列表并导航到每个链接。这对于 Nightmare 来说有点棘手,请查看 this resource and this 答案。我相信你会发现它们很有帮助。
我想 scrapy 并获取页面中每个 herf 的 html,然后输出到 csv.now 我第一次使用 nightmare.so 我对参数没有问题定义。
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs = require('fs');
var result ;
nightmare
.goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
.wait(1000)
.evaluate(function () {
var divs = document.querySelectorAll('a[target="_blank"]'),i;
for (i = 0,result = ""; i < divs.length; ++i) {
result += divs[i].href.toString()+"\n";
}
return divs;
})
.end()
.then(function (divs) {
console.log(divs)
///////////////////////////////////////////////////
fs.writeFile('8.csv', result, function (err) {
if (err) throw err;
console.log('It\'s saved!');
});
////////////////////////////////////////////////////
})
.then(function() {
//since Nightmare has an internal `.then()`, return the instance returned by the final call in the chain
return nightmare
//click the next button to get the next page of search results
.goto(divs[0].href)
//get the first HREF from the second page of results
.evaluate(function() {
return document.querySelector('div[class="outputDate"]');
})
})
.catch(function (error) {
console.error('Search failed:', error);
});
第二个 goto() 出现错误:未定义 div。非常感谢。
错误消息说明了一切:divs
未定义。这意味着在第二个 then
回调的范围内没有 divs
变量。
您可以 "merge" 将您那里的两个 then
回调合并为一个,并且有这样的东西:
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs = require('fs');
var result ;
nightmare
.goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
.wait(1000)
.evaluate(function () {
var divs = document.querySelectorAll('a[target="_blank"]'),i;
for (i = 0,result = ""; i < divs.length; ++i) {
result += divs[i].href.toString()+"\n";
}
return divs;
})
.then(function (divs) {
console.log(divs)
///////////////////////////////////////////////////
fs.writeFile('8.csv', result, function (err) {
if (err) throw err;
console.log('It\'s saved!');
});
////////////////////////////////////////////////////
nightmare
//click the next button to get the next page of search results
console.log('this is the link I want to navigate to', divs[0].href)
.goto(divs[0].href)
//get the first HREF from the second page of results
.evaluate(function() {
return document.querySelector('div[class="outputDate"]');
})
.then(function(div) {
console.log('I got the "div", now Ill do something with it', div)
})
})
.catch(function (error) {
console.error('Search failed:', error);
});
nightmare.end()
但是此代码也会失败,因为您的 divs
列表中充满了空对象。我已经更改了一些代码以使其对您有效:
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true });
var fs = require('fs');
var result ;
nightmare
.goto('http://football-system.jp/fss/pub_kaijyolist.php?lid=h0xYuxqKQ+M=')
.wait(1000)
.evaluate(function () {
var divs = document.querySelectorAll('a[target="_blank"]'),i;
var links = []
for (i = 0,result = ""; i < divs.length; ++i) {
links.push(divs[i].href)
result += divs[i].href.toString()+"\n";
}
return links;
})
.then(function (links) {
console.log(links)
///////////////////////////////////////////////////
fs.writeFile('8.csv', links.join('\n'), function (err) {
if (err) throw err;
console.log('It\'s saved!');
});
////////////////////////////////////////////////////
console.log('this is the link I want to navigate to', links[0])
nightmare
//click the next button to get the next page of search results
.goto(links[0])
.wait(5000)
//get the first HREF from the second page of results
.evaluate(function() {
return document.querySelector('div[class="outputDate"]');
})
.end()
.then(function(div) {
console.log('I got the "div", now Ill do something with it', div)
})
})
.catch(function (error) {
console.error('Search failed:', error);
});
现在,您必须遍历链接列表并导航到每个链接。这对于 Nightmare 来说有点棘手,请查看 this resource and this 答案。我相信你会发现它们很有帮助。