带有噩梦 JS 的延迟加载滚动

Lazy load scroll with nightmare JS

我是 NightmareJS 的新手,写了一个脚本来抓取网站。 这就像那样工作。我登录到我的个人资料,等待网站加载,然后转到我喜欢的个人资料,然后我想向下滚动到网站末尾。目前我使用这种丑陋的解决方法,想知道是否有办法向下滚动到页面底部以获取所有结果,然后转到下一步。

var Nightmare = require('nightmare');
var vo = require('vo');
vo(run)(function(err, result) {
  if (err) throw err;
});
function *run() {
  var nightmare = Nightmare({ show: true,
                             webPreferences: { partition: 'your-custom-partition'}});
    yield nightmare
    .goto('https://facebook.com/login')
    .type('input[id="email"]', "user")
    .type("input[id='pass']", "pass")
    .click('#loginbutton')
    .wait('._8u._42ef')
    .goto('https://www.facebook.com/myprofile/likes')
    .wait(1000)
    yield nightmare
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
        .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
        .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
        .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
    .evaluate(function() {
        window.document.body.scrollTop = document.body.scrollHeight;
    })
    .wait(3000)
  var title = yield nightmare
    .evaluate(function() {
              var jsonObject = new Array('');
              var links = document.getElementsByClassName("_5rz _5k3a _5rz3 _1v6c");
              var numProfiles = links.length;
                for(var i = 0; i< numProfiles; i++){
                var elem;
                elem = links[i].querySelector(".fsl.fwb.fcb a").href;
                console.log(elem);
                jsonObject.push(elem);
              }
              if(numProfiles > 0) {
              //then delete that element, so we don't overlaod the page
              for(var j = 0; j < numProfiles; j++){
                links[0].parentNode.removeChild(links[0]);
                }
                window.document.body.scrollTop = document.body.scrollHeight;
              }
              return jsonObject;
    });
  console.log(title);
  yield nightmare.end();
}

我认为您所追求的与 , which is related to segmentio/nightmare#625 类似。

为了完整起见,下面包含了参考答案中提供的解决方案的副本。


这是非常回答您问题的天真方法:

var Nightmare = require('nightmare');
var vo = require('vo');
var nightmare = Nightmare({
  show: true
});

var run = function * () {
  yield nightmare.goto('http://someInfiniteScrollPage.tld');

  var previousHeight, currentHeight=0;
  while(previousHeight !== currentHeight) {
    previousHeight = currentHeight;
    var currentHeight = yield nightmare.evaluate(function() {
      return document.body.scrollHeight;
    });
    yield nightmare.scrollTo(currentHeight, 0)
      .wait(3000);
  }
  yield nightmare.end();
};

vo(run)(function(err) {
  console.dir(err);
  console.log('done');
});

这种方法存在问题:当您访问 actually is an infinite scroll 的页面时,上述内容将永远不会结束。此外,可以将 .wait() 调用替换为等待滚动元素计数更改,以可能减少延迟并提高稳健性。尽管如此,这应该足以让您入门。