CasperJS - 遍历 url 的数组

CasperJS - loop through an array of url

我正在尝试使用 CasperJS 循环 url 的数组并执行几个步骤。

var links = ['https://www.facebook.com/delivagri/inbox/?selected_item_id=1921693171204929',
'https://www.facebook.com/delivagri/inbox/?selected_item_id=1879523705421876'];

但是当我使用求值函数时,return 变量要么为空,要么只对第一个 URL 有效:

    casper.start().each(links, function(self, link) {
        self.thenOpen(link, function() {
            var list = this.evaluate(function(){
                return document.getElementsByClassName("_50u0 _60p- _14hj")
            });
        console.log("This page contains :", list.length, " unanswered comments");
    });
});

感谢您的回答。

我试过你的代码,它对我来说工作得很好,这里有一个扩展版本供你尝试:

var casper = require('casper').create({
  logLevel: 'debug',
  verbose: true,
  viewportSize: {width: 1200, height: 1080 }
});

var links = [
  'https://www.facebook.com/delivagri/inbox/?selected_item_id=1921693171204929',
  'https://www.facebook.com/delivagri/inbox/?selected_item_id=1879523705421876'
];

casper
  .start()
  .each(links, function (self, link) {
    self.thenOpen(link, function () {
      var list = this.evaluate(function () {
        return document.getElementsByClassName("_50u0 _60p- _14hj")
      });
      console.log("This page contains :", list.length, " unanswered comments");
  });
})
.run();

这是我的输出:

[info] [phantom] Starting...
[info] [phantom] Running suite: 4 steps
[debug] [phantom] opening url: https://www.facebook.com/delivagri/inbox/?selected_item_id=1921693171204929, HTTP GET
[debug] [phantom] Navigation requested: url=https://www.facebook.com/delivagri/inbox/?selected_item_id=1921693171204929, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "https://www.facebook.com/delivagri/inbox/?selected_item_id=1921693171204929"
[debug] [phantom] Navigation requested: url=about:blank, type=Other, willNavigate=true, isMainFrame=false
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step anonymous 2/4 https://www.facebook.com/delivagri/inbox/?selected_item_id=1921693171204929 (HTTP 200)
This page contains : 0  unanswered comments
[info] [phantom] Step anonymous 2/4: done in 2670ms.
[debug] [phantom] opening url: https://www.facebook.com/delivagri/inbox/?selected_item_id=1879523705421876, HTTP GET
[debug] [phantom] Navigation requested: url=https://www.facebook.com/delivagri/inbox/?selected_item_id=1879523705421876, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "https://www.facebook.com/delivagri/inbox/?selected_item_id=1879523705421876"
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step anonymous 4/4 https://www.facebook.com/delivagri/inbox/?selected_item_id=1879523705421876 (HTTP 200)
This page contains : 0  unanswered comments
[info] [phantom] Step anonymous 4/4: done in 3524ms.
[info] [phantom] Done 4 steps in 3547ms
[debug] [phantom] Navigation requested: url=about:blank, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "about:blank"

这显然会为数组中的两个链接运行代码。

您可以使用 each() to loop through each page and getElementsInfo() 检索有关与提供的选择器匹配的所有元素的信息。

这将允许您避免在 CasperJS 环境和远程 DOM 环境之间切换。您可以从 documentation.

了解更多关于不同环境的信息

简单的解决方案:

var links = [
  'https://www.facebook.com/delivagri/inbox/?selected_item_id=1921693171204929',
  'https://www.facebook.com/delivagri/inbox/?selected_item_id=1879523705421876'
];

var casper = require('casper').create();

casper.each(links, function (self, link) {
  self.thenOpen(link);
  
  self.then(function () {
    var list = this.getElementsInfo('._50u0._60p-._14hj');
    
    this.echo('This page contains: ' + list.length + ' unanswered comments');
  });
});

casper.run();

Note: This method does not return a NodeList, only a simple array of object representations of matching elements; this is because the Casper environment and the page DOM environment are not the same, so DOM objects need to be serialized.