如何使用 casperjs 从提供的网站上抓取 table?
How do scrape table from the provided website using casperjs?
最终目标是从提供的经纪人网站以 table 形式检索股票数据并将其保存到某个文本文件中。这是到目前为止我通过阅读一些教程设法编译的代码:
var casper = require("casper").create();
var url = 'https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';
var terminate = function() {
this.echo("Exiting ...").exit();
};
var processPage = function() {
var rows = document.querySelectorAll('#mCSB_3_container > table'); //get table from broker site (copy/paste via copy selector in chrome tools)
//var nodes = document.getElementsByClassName('mCSB_container');
this.echo(rows);
this.echo(rows.length);
for (var i = 0; i < rows.length; i++)
{
var cell = rows[i].querySelector('.quotes-table-result__date');
this.echo(cell); //print each cell
}
};
casper.start(url);
casper.waitForSelector('#mCSB_3_container', processPage, terminate);
casper.run();
此代码应检索股票价格 table 并打印出每个单元格。但是,我得到的只是 'undefined',这可能意味着我没有得到 queryselector 调用返回的任何对象。并且请假设我不知道任何网络编程 (HTML,CSS).
首先,问题是waitFor没有设置好,必须等待rows/cells。
你在这个页面上得到的节点有点连线,如果有人有一个更抽象的解决方案,在我的解决方案中更好地处理子节点,我会非常感兴趣:
var casper = require('casper').create();
var url = 'https://eu.iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';
var length;
casper.start(url);
casper.then(function() {
this.waitForSelector('#mCSB_3_container table tbody tr');
});
function getCellContent(row, cell) {
cellText = casper.evaluate(function(row, cell) {
return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
}, row, cell);
return cellText;
}
casper.then(function() {
var rows = casper.evaluate(function() {
return document.querySelectorAll('table tbody tr');
});
length = rows.length;
this.echo("table length: " + length);
});
// This part can be done nicer, but it's the way it should work ...
casper.then(function() {
for (var i = 0; i < length; i++) {
this.echo("Date: " + getCellContent(i, 0));
this.echo("Bid: " + getCellContent(i, 1));
this.echo("Ask: " + getCellContent(i, 2));
this.echo("Quotes: " + getCellContent(i, 3));
}
});
casper.run();
最终目标是从提供的经纪人网站以 table 形式检索股票数据并将其保存到某个文本文件中。这是到目前为止我通过阅读一些教程设法编译的代码:
var casper = require("casper").create();
var url = 'https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';
var terminate = function() {
this.echo("Exiting ...").exit();
};
var processPage = function() {
var rows = document.querySelectorAll('#mCSB_3_container > table'); //get table from broker site (copy/paste via copy selector in chrome tools)
//var nodes = document.getElementsByClassName('mCSB_container');
this.echo(rows);
this.echo(rows.length);
for (var i = 0; i < rows.length; i++)
{
var cell = rows[i].querySelector('.quotes-table-result__date');
this.echo(cell); //print each cell
}
};
casper.start(url);
casper.waitForSelector('#mCSB_3_container', processPage, terminate);
casper.run();
此代码应检索股票价格 table 并打印出每个单元格。但是,我得到的只是 'undefined',这可能意味着我没有得到 queryselector 调用返回的任何对象。并且请假设我不知道任何网络编程 (HTML,CSS).
首先,问题是waitFor没有设置好,必须等待rows/cells。
你在这个页面上得到的节点有点连线,如果有人有一个更抽象的解决方案,在我的解决方案中更好地处理子节点,我会非常感兴趣:
var casper = require('casper').create();
var url = 'https://eu.iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';
var length;
casper.start(url);
casper.then(function() {
this.waitForSelector('#mCSB_3_container table tbody tr');
});
function getCellContent(row, cell) {
cellText = casper.evaluate(function(row, cell) {
return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
}, row, cell);
return cellText;
}
casper.then(function() {
var rows = casper.evaluate(function() {
return document.querySelectorAll('table tbody tr');
});
length = rows.length;
this.echo("table length: " + length);
});
// This part can be done nicer, but it's the way it should work ...
casper.then(function() {
for (var i = 0; i < length; i++) {
this.echo("Date: " + getCellContent(i, 0));
this.echo("Bid: " + getCellContent(i, 1));
this.echo("Ask: " + getCellContent(i, 2));
this.echo("Quotes: " + getCellContent(i, 3));
}
});
casper.run();