带有噩梦 JS 的延迟加载滚动
Lazy load scroll with nightmare JS
我是 NightmareJS 的新手,写了一个脚本来抓取网站。
这就像那样工作。我登录到我的个人资料,等待网站加载,然后转到我喜欢的个人资料,然后我想向下滚动到网站末尾。目前我使用这种丑陋的解决方法,想知道是否有办法向下滚动到页面底部以获取所有结果,然后转到下一步。
var Nightmare = require('nightmare');
var vo = require('vo');
vo(run)(function(err, result) {
if (err) throw err;
});
function *run() {
var nightmare = Nightmare({ show: true,
webPreferences: { partition: 'your-custom-partition'}});
yield nightmare
.goto('https://facebook.com/login')
.type('input[id="email"]', "user")
.type("input[id='pass']", "pass")
.click('#loginbutton')
.wait('._8u._42ef')
.goto('https://www.facebook.com/myprofile/likes')
.wait(1000)
yield nightmare
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
var title = yield nightmare
.evaluate(function() {
var jsonObject = new Array('');
var links = document.getElementsByClassName("_5rz _5k3a _5rz3 _1v6c");
var numProfiles = links.length;
for(var i = 0; i< numProfiles; i++){
var elem;
elem = links[i].querySelector(".fsl.fwb.fcb a").href;
console.log(elem);
jsonObject.push(elem);
}
if(numProfiles > 0) {
//then delete that element, so we don't overlaod the page
for(var j = 0; j < numProfiles; j++){
links[0].parentNode.removeChild(links[0]);
}
window.document.body.scrollTop = document.body.scrollHeight;
}
return jsonObject;
});
console.log(title);
yield nightmare.end();
}
我认为您所追求的与 , which is related to segmentio/nightmare#625 类似。
为了完整起见,下面包含了参考答案中提供的解决方案的副本。
这是非常回答您问题的天真方法:
var Nightmare = require('nightmare');
var vo = require('vo');
var nightmare = Nightmare({
show: true
});
var run = function * () {
yield nightmare.goto('http://someInfiniteScrollPage.tld');
var previousHeight, currentHeight=0;
while(previousHeight !== currentHeight) {
previousHeight = currentHeight;
var currentHeight = yield nightmare.evaluate(function() {
return document.body.scrollHeight;
});
yield nightmare.scrollTo(currentHeight, 0)
.wait(3000);
}
yield nightmare.end();
};
vo(run)(function(err) {
console.dir(err);
console.log('done');
});
这种方法存在问题:当您访问 actually is an infinite scroll 的页面时,上述内容将永远不会结束。此外,可以将 .wait()
调用替换为等待滚动元素计数更改,以可能减少延迟并提高稳健性。尽管如此,这应该足以让您入门。
我是 NightmareJS 的新手,写了一个脚本来抓取网站。 这就像那样工作。我登录到我的个人资料,等待网站加载,然后转到我喜欢的个人资料,然后我想向下滚动到网站末尾。目前我使用这种丑陋的解决方法,想知道是否有办法向下滚动到页面底部以获取所有结果,然后转到下一步。
var Nightmare = require('nightmare');
var vo = require('vo');
vo(run)(function(err, result) {
if (err) throw err;
});
function *run() {
var nightmare = Nightmare({ show: true,
webPreferences: { partition: 'your-custom-partition'}});
yield nightmare
.goto('https://facebook.com/login')
.type('input[id="email"]', "user")
.type("input[id='pass']", "pass")
.click('#loginbutton')
.wait('._8u._42ef')
.goto('https://www.facebook.com/myprofile/likes')
.wait(1000)
yield nightmare
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
.evaluate(function() {
window.document.body.scrollTop = document.body.scrollHeight;
})
.wait(3000)
var title = yield nightmare
.evaluate(function() {
var jsonObject = new Array('');
var links = document.getElementsByClassName("_5rz _5k3a _5rz3 _1v6c");
var numProfiles = links.length;
for(var i = 0; i< numProfiles; i++){
var elem;
elem = links[i].querySelector(".fsl.fwb.fcb a").href;
console.log(elem);
jsonObject.push(elem);
}
if(numProfiles > 0) {
//then delete that element, so we don't overlaod the page
for(var j = 0; j < numProfiles; j++){
links[0].parentNode.removeChild(links[0]);
}
window.document.body.scrollTop = document.body.scrollHeight;
}
return jsonObject;
});
console.log(title);
yield nightmare.end();
}
我认为您所追求的与
为了完整起见,下面包含了参考答案中提供的解决方案的副本。
这是非常回答您问题的天真方法:
var Nightmare = require('nightmare');
var vo = require('vo');
var nightmare = Nightmare({
show: true
});
var run = function * () {
yield nightmare.goto('http://someInfiniteScrollPage.tld');
var previousHeight, currentHeight=0;
while(previousHeight !== currentHeight) {
previousHeight = currentHeight;
var currentHeight = yield nightmare.evaluate(function() {
return document.body.scrollHeight;
});
yield nightmare.scrollTo(currentHeight, 0)
.wait(3000);
}
yield nightmare.end();
};
vo(run)(function(err) {
console.dir(err);
console.log('done');
});
这种方法存在问题:当您访问 actually is an infinite scroll 的页面时,上述内容将永远不会结束。此外,可以将 .wait()
调用替换为等待滚动元素计数更改,以可能减少延迟并提高稳健性。尽管如此,这应该足以让您入门。