使用 CasperJS 接受 cookie 策略
Accepting cookie policy with CasperJS
我一直在尝试使用 CasperJS 登录 Marktplaats.nl (CraigsList-like webpage in Netherlands)。但是,我坚持接受 cookie 政策。
到目前为止,这是我的脚本:
var casper = require('casper').create({
verbose: true,
logLevel: 'debug',
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
}
});
var x = require('casper').selectXPath;
casper.start('https://www.marktplaats.nl', function() {
this.echo(this.getTitle()); //Prints "Marktplaats Cookiewall"
//POINT1
//Here I just check and log if the "Agree" button exists.
if (casper.exists(x("//input[contains(@value, 'Cookies accepteren')]"))) {
casper.echo("Agree button found");
}
else
{
casper.echo("Agree button not found");
}
});
casper.then(function() {
if(this.getTitle().indexOf("Cookiewall") !== -1)
{
//POINT2
//If we are on the cookiewall page, click on agree.
casper.echo("Clicking on agree");
casper.click(x("//input[contains(@value, 'Cookies accepteren')]"));
}
});
casper.thenOpen('https://www.marktplaats.nl', function() {
//POINT3
//Reloaded page
this.echo('Second Page: ' + this.getTitle());
});
casper.run();
首先尝试导航到主页(在代码中标记为 POINT1),但被重定向到要我接受 cookie 政策的 Cookiewall 页面。在浏览器中截取的屏幕截图,没有连接到 CasperJS。
在 POINT2,我的脚本点击了 "Cookies accepteren" - 这被记录为:
[debug] [phantom] Mouse event 'mousedown' on selector: xpath selector: //input[contains(@value, 'Cookies accepteren')]
[debug] [phantom] Mouse event 'mouseup' on selector: xpath selector: //input[contains(@value, 'Cookies accepteren')]
[debug] [phantom] Mouse event 'click' on selector: xpath selector: //input[contains(@value, 'Cookies accepteren')]
我是 CasperJS 的新手,但这对我来说很好。
最后,在第 3 点,我重新加载主页,但再次被重定向到 Cookiewall 页面 - casper 记录 Cookiewall 标题和重定向被记录
评论后更新:我根据 Artjom 的评论注册了 resource.error、page.error、remote.message 和 casper.page.onResourceTimeout。出现了 2 个 ResourceErrors。我相应地编辑了这个日志:
[info] [phantom] Step anonymous 3/5: done in 2018ms.
[debug] [phantom] opening url: https://www.marktplaats.nl/, HTTP GET
ResourceError: {
"errorCode": 5,
"errorString": "Operation canceled",
"id": 7,
"status": null,
"statusText": null,
"url": "http://s3.amazonaws.com/ki.js/56612/b7M.js"
}
[debug] [phantom] Navigation requested: url=https://www.marktplaats.nl/, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] Navigation requested: url=http://www.marktplaats.nl/, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] Navigation requested: url=http://www.marktplaats.nl/cookiewall
/?target=http%3A%2F%2Fwww.marktplaats.nl%2F, type=Other, willNavigate=true, isMa
inFrame=true
[debug] [phantom] url changed to "http://www.marktplaats.nl/cookiewall/?target=http%3A%2F%2Fwww.marktplaats.nl%2F"
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step anonymous 5/5 http://www.marktplaats.nl/cookiewall/?target
=http://www.marktplaats.nl/ (HTTP 200)
Second Page: ? Marktplaats - Cookiewall
[info] [phantom] Step anonymous 5/5: done in 2224ms.
[info] [phantom] Done 5 steps in 2243ms
[debug] [phantom] Navigation requested: url=about:blank, type=Other, willNavigate=true, isMainFrame=true
ResourceError: {
"errorCode": 5,
"errorString": "Operation canceled",
"id": 11,
"status": null,
"statusText": null,
"url": "http://s3.amazonaws.com/ki.js/56612/b7M.js"
}
[debug] [phantom] url changed to "about:blank"
我似乎无法访问主页。
你需要做两件事:
- 加载图片,因为未加载图片时按钮似乎没有尺寸。
- 点击后请稍等。 CasperJS 似乎没有意识到页面加载正在发生。
完整脚本:
var casper = require('casper').create({
//verbose: true,
//logLevel: 'debug',
pageSettings: {
loadImages: true,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
}
});
var x = require('casper').selectXPath;
var acceptBtn = x("//input[contains(@value, 'Cookies accepteren')]");
casper.start('http://www.marktplaats.nl', function() {
this.echo(this.getTitle());
})
.waitForSelector(acceptBtn)
.thenClick(acceptBtn)
.wait(100)
.then(function(){
this.echo(this.getTitle());
})
.run();
我一直在尝试使用 CasperJS 登录 Marktplaats.nl (CraigsList-like webpage in Netherlands)。但是,我坚持接受 cookie 政策。
到目前为止,这是我的脚本:
var casper = require('casper').create({
verbose: true,
logLevel: 'debug',
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
}
});
var x = require('casper').selectXPath;
casper.start('https://www.marktplaats.nl', function() {
this.echo(this.getTitle()); //Prints "Marktplaats Cookiewall"
//POINT1
//Here I just check and log if the "Agree" button exists.
if (casper.exists(x("//input[contains(@value, 'Cookies accepteren')]"))) {
casper.echo("Agree button found");
}
else
{
casper.echo("Agree button not found");
}
});
casper.then(function() {
if(this.getTitle().indexOf("Cookiewall") !== -1)
{
//POINT2
//If we are on the cookiewall page, click on agree.
casper.echo("Clicking on agree");
casper.click(x("//input[contains(@value, 'Cookies accepteren')]"));
}
});
casper.thenOpen('https://www.marktplaats.nl', function() {
//POINT3
//Reloaded page
this.echo('Second Page: ' + this.getTitle());
});
casper.run();
首先尝试导航到主页(在代码中标记为 POINT1),但被重定向到要我接受 cookie 政策的 Cookiewall 页面。在浏览器中截取的屏幕截图,没有连接到 CasperJS。
在 POINT2,我的脚本点击了 "Cookies accepteren" - 这被记录为:
[debug] [phantom] Mouse event 'mousedown' on selector: xpath selector: //input[contains(@value, 'Cookies accepteren')]
[debug] [phantom] Mouse event 'mouseup' on selector: xpath selector: //input[contains(@value, 'Cookies accepteren')]
[debug] [phantom] Mouse event 'click' on selector: xpath selector: //input[contains(@value, 'Cookies accepteren')]
我是 CasperJS 的新手,但这对我来说很好。
最后,在第 3 点,我重新加载主页,但再次被重定向到 Cookiewall 页面 - casper 记录 Cookiewall 标题和重定向被记录 评论后更新:我根据 Artjom 的评论注册了 resource.error、page.error、remote.message 和 casper.page.onResourceTimeout。出现了 2 个 ResourceErrors。我相应地编辑了这个日志:
[info] [phantom] Step anonymous 3/5: done in 2018ms.
[debug] [phantom] opening url: https://www.marktplaats.nl/, HTTP GET
ResourceError: {
"errorCode": 5,
"errorString": "Operation canceled",
"id": 7,
"status": null,
"statusText": null,
"url": "http://s3.amazonaws.com/ki.js/56612/b7M.js"
}
[debug] [phantom] Navigation requested: url=https://www.marktplaats.nl/, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] Navigation requested: url=http://www.marktplaats.nl/, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] Navigation requested: url=http://www.marktplaats.nl/cookiewall
/?target=http%3A%2F%2Fwww.marktplaats.nl%2F, type=Other, willNavigate=true, isMa
inFrame=true
[debug] [phantom] url changed to "http://www.marktplaats.nl/cookiewall/?target=http%3A%2F%2Fwww.marktplaats.nl%2F"
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step anonymous 5/5 http://www.marktplaats.nl/cookiewall/?target
=http://www.marktplaats.nl/ (HTTP 200)
Second Page: ? Marktplaats - Cookiewall
[info] [phantom] Step anonymous 5/5: done in 2224ms.
[info] [phantom] Done 5 steps in 2243ms
[debug] [phantom] Navigation requested: url=about:blank, type=Other, willNavigate=true, isMainFrame=true
ResourceError: {
"errorCode": 5,
"errorString": "Operation canceled",
"id": 11,
"status": null,
"statusText": null,
"url": "http://s3.amazonaws.com/ki.js/56612/b7M.js"
}
[debug] [phantom] url changed to "about:blank"
我似乎无法访问主页。
你需要做两件事:
- 加载图片,因为未加载图片时按钮似乎没有尺寸。
- 点击后请稍等。 CasperJS 似乎没有意识到页面加载正在发生。
完整脚本:
var casper = require('casper').create({
//verbose: true,
//logLevel: 'debug',
pageSettings: {
loadImages: true,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
}
});
var x = require('casper').selectXPath;
var acceptBtn = x("//input[contains(@value, 'Cookies accepteren')]");
casper.start('http://www.marktplaats.nl', function() {
this.echo(this.getTitle());
})
.waitForSelector(acceptBtn)
.thenClick(acceptBtn)
.wait(100)
.then(function(){
this.echo(this.getTitle());
})
.run();