CasperJS,试图抓取 table

CasperJS, trying to scrape a table

此函数应该提取所有 table 行,但它不起作用。它没有输出。

var casper = require("casper").create({
pageSettings: {
    userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
},
verbose: true,
logLevel: 'debug'
});


var url = 'http://cnt.rm.ingv.it/';
casper.start(url);//
casper.waitForSelector('#dataTablesEvents', processPage, stopScript);
casper.run();


var stopScript = function() {
     casper.echo("STOPPING SCRIPT").exit();
};

var processPage = function() {

    pageData = this.evaluate(getPageData);

    if (this.exists('a[rel="next"]') == false) {
        stopScript();
    }

    this.thenClick('a[rel="next"]').then(function() {
        this.waitForSelector("#dataTablesEvents", processPage, stopScript);
    });
 };

function getPageData(){

   var rows = casper.evaluate(function(){
       return document.querySelectorAll("table tbody tr");
   });

   return rows;
}

我尝试调试,这是结果:

[debug] [phantom] opening url: http://cnt.rm.ingv.it/, HTTP GET
[debug] [phantom] Navigation requested: url=http://cnt.rm.ingv.it/, 
type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "http://cnt.rm.ingv.it/"
[debug] [phantom] Successfully injected Casper client-side utilities
[debug] [phantom] start page is loaded
[info] [phantom] Step _step 3/3 http://cnt.rm.ingv.it/ (HTTP 200)
[info] [phantom] Step _step 3/3: done in 945ms.
[info] [phantom] waitFor() finished in 40ms.
[info] [phantom] Done 3 steps in 1003ms
[debug] [phantom] Navigation requested: url=about:blank, type=Other, 
willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "about:blank"

我不太明白这个..好像 WaitForSelector 没有启动..有什么帮助吗?

这里有一个适合你的方法:

var casper = require('casper').create();
var url = 'http://cnt.rm.ingv.it/';
var length;

casper.start(url);

casper.then(function() {
    this.waitForSelector('table#dataTablesEvents');
});

function getCellContent(row, cell) {
    cellText = casper.evaluate(function(row, cell) {
        return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
    }, row, cell);
    return cellText;
}

casper.then(function() {
    var rows = casper.evaluate(function() {
        return document.querySelectorAll('table tbody tr');
    });
    length = rows.length;
    this.echo("table length: " + length);
});

// This part can be done nicer, but it's the way it should work ...
casper.then(function() {
    for (var i = 0; i < length; i++) {
        this.echo("Data: " + getCellContent(i, 1));
        this.echo("Magnitudo: " + getCellContent(i, 3));
        this.echo("Zona: " + getCellContent(i, 5));
        this.echo("Profondità: " + getCellContent(i, 7));
        this.echo("Latitudine: " + getCellContent(i, 9));
        this.echo("Longitudine: " + getCellContent(i, 11));
    }
});

casper.run();