CasperJS evaluate() 不是从 each() 块内执行的

CasperJS evaluate() is not executing from within each() block

所以我正在抓取一个页面,收集链接,然后我想抓取这些链接以完成我的数据集。这是一些代码:

crawl.js:

var casper = require("casper").create({
    waitTimeout: 3000,
    pageSettings: {
        userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:23.0) Gecko/20130404 Firefox/23.0"
    },
    clientScripts: ["includes/jquery.min.js"],
    verbose: true
});

var followers = require('./followers');
var currentPage = 1;
var x = require('casper').selectXPath;

Object.size = function(obj) {
    var size = 0, key;
    for (key in obj) {
        if (obj.hasOwnProperty(key)) size++
    }
    return size;
};

var collectFollowers = function() {
    var url;
    this.echo("capturing page " + currentPage);
    this.capture("wowhead-p" + currentPage + ".png");

    // don't go too far down the rabbit hole
    if (currentPage >= 5 || !this.exists(x('//*[text()="Next ›"]'))) {
        processFollowers.call(casper);
        return terminate.call(casper);
    }

    currentPage++;
    this.echo("requesting next page: " + currentPage);
    url = this.getCurrentUrl();

    var links = this.evaluate(function() {
        var obj = {}
        $('.listview-cleartext').map(function(){ 
            obj[$(this).text()] = $(this).attr('href');
        });
        return obj;
    });

    for (key in links) {
        followers.followers[key] = links[key];
    }

    this.echo("Page links: " + Object.size(followers.followers));
    //this.emit('update.followers', links);
    this.thenClick(x('//*[text()="Next ›"]')).then(function() {
        this.waitFor(function() {
            return url !== this.getCurrentUrl();
        }, collectFollowers, processFollowers);
    });
};

var processFollowers = function() {
    this.echo("Total followers:" + Object.size(followers.followers));
    this.each(Object.keys(followers.followers), function(casper, key) {
        this.thenOpen('http://wowhead.com' + followers.followers[key]).then(function() {
            this.echo("On http://wowhead.com" + followers.followers[key]);
            this.evaluate(function() {
                this.echo("Inside the evaluate statement.");
                if ($('a[href=#quests]').length) {
                    this.echo("Has quest!");
                    $('a[href=#quests]').click();
                    var questURL = $('#tab-quests').show().find('.listview-cleartext').attr('href');
                    var questName = $('#tab-quests').show().find('.listview-cleartext').text();
                    this.echo("Quest URL: " + questURL);
                    followers.followers[key] = {"name": key, "quest": {"url": questURL, "name": questName}};
                } else {
                    this.echo("Does not have quest!");
                }    
            });
        });
    });
}

var terminate = function() {
    this.echo("Done.").exit();
}

casper.start("http://wowhead.com/followers=2");
casper.waitForSelector(x('//*[text()="Next ›"]'), collectFollowers, processFollowers);
casper.run();

followers.js:

var require = patchRequire(require);
var utils = require('utils');
var followers = {};
exports.followers = followers;

followers 用于存储全局变量,这是我在抓取页面时不断构建和更新的对象。所以我浏览了 3 页数据,成功收集了链接,然后开始处理它们。就目前而言,CasperJS 似乎成功打开了每个页面,但是从未调用过评估函数。

我能够通过一些异步逻辑让这个功能在 PhantomJS 中工作,但切换到 casper,因为它看起来好像这会在幕后得到处理。我已经尝试了 thenOpen()、then() 和 open()、没有 then() 的 thenOpen() 等的各种组合。我搞砸了什么?

casper.evalute() is the sandboxed page context in the same way the as the PhantomJS version (page.evaluate())。它无权访问外部定义的变量。

evaluate()里面的

this指的是window而不是casper,我怀疑有没有像window.echo()这样的函数。如果你想从页面上下文中接收控制台消息,你需要注册到 remote.message 事件:

casper.on("remote.message", function(msg){
    this.echo("remote: " + msg);
});

您必须明确地将结果传递到页面上下文之外并将其添加到那里:

var result = this.evaluate(function() {
    console.log("Inside the evaluate statement.");
    if ($('a[href=#quests]').length) {
        console.log("Has quest!");
        $('a[href=#quests]').click();
        var questURL = $('#tab-quests').show().find('.listview-cleartext').attr('href');
        var questName = $('#tab-quests').show().find('.listview-cleartext').text();
        console.log("Quest URL: " + questURL);
        return {"url": questURL, "name": questName}};
    } else {
        console.log("Does not have quest!");
        return null;
    }
});
if (result) {
    followers.followers[key] = {name: key, quest: result};
}