CasperJS-NodeList.lengthreturn0

CasperJS - NodeList.length return 0

我尝试使用 CasperJS 从一些网页中提取数据,我尝试在 getDetails() 中添加 this.wait(5000),但我不知道为什么 direktoriNodeList.length 总是 return 0

PhantomJS:2.0.0
CasperJS:1.1.0-beta3

//casperjs --proxy=127.0.0.1:9050 --proxy-type=socks5 axa-mandiri.casper.js

var casper = require("casper").create({
    verbose: true,
    logLevel: "info",
    pageSettings: {
        loadImages: false, //The script is much faster when this field is set to false
        loadPlugins: false,
        userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
    }
});
var utils = require('utils');
var currentPage = 1;
var hospitals = [];

var url = 'https://www.axa-mandiri.co.id/direktori/rumah-sakit/';//Type your url
casper.start(url);//Start CasperJS
casper.waitForSelector('#main-direktori', processPage, stopScript);//Wait until content loads and then process the page

casper.run(function() {
    utils.dump(hospitals);
    this.exit();
});

function getDetails(){
    /*
    In this function you can put anything you want in order to extract your data from the website.
    NOTE: This function is executed in page context, and will should be called as parameter to Casper's evaluate function.
    */
.
    console.log("getDetails " + currentPage);
    var details = [];

    var direktoriNodeList = document.querySelectorAll("ul#main-direktori li.direktori-list");
    console.log("direktoriNodeList.length " + direktoriNodeList.length);
    utils.dump(direktoriNodeList);
    for (var i = 0; i < direktoriNodeList.length; i++) {
        console.log("querySelectorAll " + i);
        var detail = {
            name    : direktoriNodeList[i].querySelector("div.details strong").textContent.replace(/\n/g, ''),
            phone   : direktoriNodeList[i].querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
            map     : direktoriNodeList[i].querySelector("div.map-details a.get-direction").getAttribute("href")
        };

        details.push(detail);
    }

    /*
    [].forEach.call(document.querySelectorAll("ul#main-direktori li.direktori-list"), function(elem) {
        console.log("querySelectorAll");
        var detail = {
            name    : elem.querySelector("div.details strong").textContent.replace(/\n/g, ''),
            phone   : elem.querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
            map     : elem.querySelector("div.map-details a.get-direction").getAttribute("href")
        };
        details.push(detail);
    });
    */
    return JSON.stringify(details);
}

function stopScript() {
    utils.dump(hospitals);
    console.log("Exiting...");
    this.exit();
};

function processPage() {
    //your function which will do data scraping from the page. If you need to extract data from tables, from divs write your logic in this function
    hospitals = hospitals.concat(this.evaluate(getDetails()));

    //If there is no nextButton on the page, then exit a script because we hit the last page
    if (this.exists("a.nextpostslink") == false) {
        stopScript();
    }

    //Click on the next button
    this.thenClick("a.nextpostslink").then(function() {
        currentPage++;
        this.waitForSelector("#main-direktori", processPage, stopScript);
    });
};

casper.evaluate(fn, ...) 需要一个函数,而不是一个数组。变化

hospitals = hospitals.concat(this.evaluate(getDetails()));

hospitals = hospitals.concat(this.evaluate(getDetails));

这里的问题是您在外部上下文中执行函数,而不是将其传递到页面上下文中。不要忘记注册 "remote.message" 事件以查看来自页面上下文的 console.log() 调用:

casper.on("remote.message", function(msg){
    this.echo("remote> " + msg);
});