CasperJS-NodeList.lengthreturn0
CasperJS - NodeList.length return 0
我尝试使用 CasperJS 从一些网页中提取数据,我尝试在 getDetails() 中添加 this.wait(5000),但我不知道为什么 direktoriNodeList.length 总是 return 0
PhantomJS:2.0.0
CasperJS:1.1.0-beta3
//casperjs --proxy=127.0.0.1:9050 --proxy-type=socks5 axa-mandiri.casper.js
var casper = require("casper").create({
verbose: true,
logLevel: "info",
pageSettings: {
loadImages: false, //The script is much faster when this field is set to false
loadPlugins: false,
userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
}
});
var utils = require('utils');
var currentPage = 1;
var hospitals = [];
var url = 'https://www.axa-mandiri.co.id/direktori/rumah-sakit/';//Type your url
casper.start(url);//Start CasperJS
casper.waitForSelector('#main-direktori', processPage, stopScript);//Wait until content loads and then process the page
casper.run(function() {
utils.dump(hospitals);
this.exit();
});
function getDetails(){
/*
In this function you can put anything you want in order to extract your data from the website.
NOTE: This function is executed in page context, and will should be called as parameter to Casper's evaluate function.
*/
.
console.log("getDetails " + currentPage);
var details = [];
var direktoriNodeList = document.querySelectorAll("ul#main-direktori li.direktori-list");
console.log("direktoriNodeList.length " + direktoriNodeList.length);
utils.dump(direktoriNodeList);
for (var i = 0; i < direktoriNodeList.length; i++) {
console.log("querySelectorAll " + i);
var detail = {
name : direktoriNodeList[i].querySelector("div.details strong").textContent.replace(/\n/g, ''),
phone : direktoriNodeList[i].querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
map : direktoriNodeList[i].querySelector("div.map-details a.get-direction").getAttribute("href")
};
details.push(detail);
}
/*
[].forEach.call(document.querySelectorAll("ul#main-direktori li.direktori-list"), function(elem) {
console.log("querySelectorAll");
var detail = {
name : elem.querySelector("div.details strong").textContent.replace(/\n/g, ''),
phone : elem.querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
map : elem.querySelector("div.map-details a.get-direction").getAttribute("href")
};
details.push(detail);
});
*/
return JSON.stringify(details);
}
function stopScript() {
utils.dump(hospitals);
console.log("Exiting...");
this.exit();
};
function processPage() {
//your function which will do data scraping from the page. If you need to extract data from tables, from divs write your logic in this function
hospitals = hospitals.concat(this.evaluate(getDetails()));
//If there is no nextButton on the page, then exit a script because we hit the last page
if (this.exists("a.nextpostslink") == false) {
stopScript();
}
//Click on the next button
this.thenClick("a.nextpostslink").then(function() {
currentPage++;
this.waitForSelector("#main-direktori", processPage, stopScript);
});
};
casper.evaluate(fn, ...)
需要一个函数,而不是一个数组。变化
hospitals = hospitals.concat(this.evaluate(getDetails()));
到
hospitals = hospitals.concat(this.evaluate(getDetails));
这里的问题是您在外部上下文中执行函数,而不是将其传递到页面上下文中。不要忘记注册 "remote.message" 事件以查看来自页面上下文的 console.log()
调用:
casper.on("remote.message", function(msg){
this.echo("remote> " + msg);
});
我尝试使用 CasperJS 从一些网页中提取数据,我尝试在 getDetails() 中添加 this.wait(5000),但我不知道为什么 direktoriNodeList.length 总是 return 0
PhantomJS:2.0.0
CasperJS:1.1.0-beta3
//casperjs --proxy=127.0.0.1:9050 --proxy-type=socks5 axa-mandiri.casper.js
var casper = require("casper").create({
verbose: true,
logLevel: "info",
pageSettings: {
loadImages: false, //The script is much faster when this field is set to false
loadPlugins: false,
userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
}
});
var utils = require('utils');
var currentPage = 1;
var hospitals = [];
var url = 'https://www.axa-mandiri.co.id/direktori/rumah-sakit/';//Type your url
casper.start(url);//Start CasperJS
casper.waitForSelector('#main-direktori', processPage, stopScript);//Wait until content loads and then process the page
casper.run(function() {
utils.dump(hospitals);
this.exit();
});
function getDetails(){
/*
In this function you can put anything you want in order to extract your data from the website.
NOTE: This function is executed in page context, and will should be called as parameter to Casper's evaluate function.
*/
.
console.log("getDetails " + currentPage);
var details = [];
var direktoriNodeList = document.querySelectorAll("ul#main-direktori li.direktori-list");
console.log("direktoriNodeList.length " + direktoriNodeList.length);
utils.dump(direktoriNodeList);
for (var i = 0; i < direktoriNodeList.length; i++) {
console.log("querySelectorAll " + i);
var detail = {
name : direktoriNodeList[i].querySelector("div.details strong").textContent.replace(/\n/g, ''),
phone : direktoriNodeList[i].querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
map : direktoriNodeList[i].querySelector("div.map-details a.get-direction").getAttribute("href")
};
details.push(detail);
}
/*
[].forEach.call(document.querySelectorAll("ul#main-direktori li.direktori-list"), function(elem) {
console.log("querySelectorAll");
var detail = {
name : elem.querySelector("div.details strong").textContent.replace(/\n/g, ''),
phone : elem.querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
map : elem.querySelector("div.map-details a.get-direction").getAttribute("href")
};
details.push(detail);
});
*/
return JSON.stringify(details);
}
function stopScript() {
utils.dump(hospitals);
console.log("Exiting...");
this.exit();
};
function processPage() {
//your function which will do data scraping from the page. If you need to extract data from tables, from divs write your logic in this function
hospitals = hospitals.concat(this.evaluate(getDetails()));
//If there is no nextButton on the page, then exit a script because we hit the last page
if (this.exists("a.nextpostslink") == false) {
stopScript();
}
//Click on the next button
this.thenClick("a.nextpostslink").then(function() {
currentPage++;
this.waitForSelector("#main-direktori", processPage, stopScript);
});
};
casper.evaluate(fn, ...)
需要一个函数,而不是一个数组。变化
hospitals = hospitals.concat(this.evaluate(getDetails()));
到
hospitals = hospitals.concat(this.evaluate(getDetails));
这里的问题是您在外部上下文中执行函数,而不是将其传递到页面上下文中。不要忘记注册 "remote.message" 事件以查看来自页面上下文的 console.log()
调用:
casper.on("remote.message", function(msg){
this.echo("remote> " + msg);
});