CasperJS,试图抓取 table
CasperJS, trying to scrape a table
此函数应该提取所有 table 行,但它不起作用。它没有输出。
var casper = require("casper").create({
pageSettings: {
userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
},
verbose: true,
logLevel: 'debug'
});
var url = 'http://cnt.rm.ingv.it/';
casper.start(url);//
casper.waitForSelector('#dataTablesEvents', processPage, stopScript);
casper.run();
var stopScript = function() {
casper.echo("STOPPING SCRIPT").exit();
};
var processPage = function() {
pageData = this.evaluate(getPageData);
if (this.exists('a[rel="next"]') == false) {
stopScript();
}
this.thenClick('a[rel="next"]').then(function() {
this.waitForSelector("#dataTablesEvents", processPage, stopScript);
});
};
function getPageData(){
var rows = casper.evaluate(function(){
return document.querySelectorAll("table tbody tr");
});
return rows;
}
我尝试调试,这是结果:
[debug] [phantom] opening url: http://cnt.rm.ingv.it/, HTTP GET
[debug] [phantom] Navigation requested: url=http://cnt.rm.ingv.it/,
type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "http://cnt.rm.ingv.it/"
[debug] [phantom] Successfully injected Casper client-side utilities
[debug] [phantom] start page is loaded
[info] [phantom] Step _step 3/3 http://cnt.rm.ingv.it/ (HTTP 200)
[info] [phantom] Step _step 3/3: done in 945ms.
[info] [phantom] waitFor() finished in 40ms.
[info] [phantom] Done 3 steps in 1003ms
[debug] [phantom] Navigation requested: url=about:blank, type=Other,
willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "about:blank"
我不太明白这个..好像 WaitForSelector 没有启动..有什么帮助吗?
这里有一个适合你的方法:
var casper = require('casper').create();
var url = 'http://cnt.rm.ingv.it/';
var length;
casper.start(url);
casper.then(function() {
this.waitForSelector('table#dataTablesEvents');
});
function getCellContent(row, cell) {
cellText = casper.evaluate(function(row, cell) {
return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
}, row, cell);
return cellText;
}
casper.then(function() {
var rows = casper.evaluate(function() {
return document.querySelectorAll('table tbody tr');
});
length = rows.length;
this.echo("table length: " + length);
});
// This part can be done nicer, but it's the way it should work ...
casper.then(function() {
for (var i = 0; i < length; i++) {
this.echo("Data: " + getCellContent(i, 1));
this.echo("Magnitudo: " + getCellContent(i, 3));
this.echo("Zona: " + getCellContent(i, 5));
this.echo("Profondità: " + getCellContent(i, 7));
this.echo("Latitudine: " + getCellContent(i, 9));
this.echo("Longitudine: " + getCellContent(i, 11));
}
});
casper.run();
此函数应该提取所有 table 行,但它不起作用。它没有输出。
var casper = require("casper").create({
pageSettings: {
userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
},
verbose: true,
logLevel: 'debug'
});
var url = 'http://cnt.rm.ingv.it/';
casper.start(url);//
casper.waitForSelector('#dataTablesEvents', processPage, stopScript);
casper.run();
var stopScript = function() {
casper.echo("STOPPING SCRIPT").exit();
};
var processPage = function() {
pageData = this.evaluate(getPageData);
if (this.exists('a[rel="next"]') == false) {
stopScript();
}
this.thenClick('a[rel="next"]').then(function() {
this.waitForSelector("#dataTablesEvents", processPage, stopScript);
});
};
function getPageData(){
var rows = casper.evaluate(function(){
return document.querySelectorAll("table tbody tr");
});
return rows;
}
我尝试调试,这是结果:
[debug] [phantom] opening url: http://cnt.rm.ingv.it/, HTTP GET
[debug] [phantom] Navigation requested: url=http://cnt.rm.ingv.it/,
type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "http://cnt.rm.ingv.it/"
[debug] [phantom] Successfully injected Casper client-side utilities
[debug] [phantom] start page is loaded
[info] [phantom] Step _step 3/3 http://cnt.rm.ingv.it/ (HTTP 200)
[info] [phantom] Step _step 3/3: done in 945ms.
[info] [phantom] waitFor() finished in 40ms.
[info] [phantom] Done 3 steps in 1003ms
[debug] [phantom] Navigation requested: url=about:blank, type=Other,
willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "about:blank"
我不太明白这个..好像 WaitForSelector 没有启动..有什么帮助吗?
这里有一个适合你的方法:
var casper = require('casper').create();
var url = 'http://cnt.rm.ingv.it/';
var length;
casper.start(url);
casper.then(function() {
this.waitForSelector('table#dataTablesEvents');
});
function getCellContent(row, cell) {
cellText = casper.evaluate(function(row, cell) {
return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
}, row, cell);
return cellText;
}
casper.then(function() {
var rows = casper.evaluate(function() {
return document.querySelectorAll('table tbody tr');
});
length = rows.length;
this.echo("table length: " + length);
});
// This part can be done nicer, but it's the way it should work ...
casper.then(function() {
for (var i = 0; i < length; i++) {
this.echo("Data: " + getCellContent(i, 1));
this.echo("Magnitudo: " + getCellContent(i, 3));
this.echo("Zona: " + getCellContent(i, 5));
this.echo("Profondità: " + getCellContent(i, 7));
this.echo("Latitudine: " + getCellContent(i, 9));
this.echo("Longitudine: " + getCellContent(i, 11));
}
});
casper.run();