CasperJS 不适用于亚马逊 iframe
CasperJS doesn't work with Amazon iframe
我是 运行 一个小项目,需要一些亚马逊图书的预览内容(它们可以是 PNG 图像或 html 内容)。
例如这本书:https://www.amazon.com/gp/product/B00JNYEXCK/.
当点击"Look inside"徽章(id="sitbLogoImg"的img标签)时,会出现一个新的框架,显示本书的预览内容。它有2个版本,打印预览(这是PNG图像,我可以得到这些)和kindle预览(这是iframe文档)。
我被 kindle 预览的 iframe 困住了,它基本上是这样的:
<div id="scrollElm-0" class="pageHtml">
<div id="sitbReaderKindleSample">
<iframe id="sitbReaderFrame">
<html>
<head></head>
<body>
<p>.......</p>
<div>......</div>
....
</body>
</html>
</iframe>
</div>
</div>
这是我的 CasperJS 脚本:
var fs = require('fs');
var casper = require('casper').create({
pageSettings: {
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
}
});
casper.options.viewportSize = {
width: 1366,
height: 768
};
casper.options.waitTimeout = 10000;
// use any cookies
var cookieFilename = "cookies.txt";
var data = fs.read(cookieFilename);
if (data) {
phantom.cookies = JSON.parse(data);
}
casper.start('https://www.amazon.com/gp/product/B00JNYEXCK/', function() {
this.echo(this.status(true));
this.captureSelector('before.png', 'html');
});
casper.waitForSelector('img#sitbLogoImg', function() {
//this.captureSelector('before.png','html');
});
casper.then(function() {
this.click('img#sitbLogoImg');
});
casper.waitForSelector('div#sitbLBHeader', function() {
});
var lis_content = '';
casper.wait(3000, function() {
this.captureSelector('after.png', 'html');
});
casper.withFrame(1, function() {
lis_content = this.getHTML();
this.captureSelector('lis_content.png', 'html');
});
//Write the sitbReaderFrame to file
casper.then(function() {
var lis_content_filename = 'lis_content.html';
fs.write(lis_content_filename, lis_content, 644);
});
// write the cookies
casper.wait(1000, function() {
var cookies = JSON.stringify(phantom.cookies);
fs.write(cookieFilename, cookies, 644);
});
casper.run();
问题是 iframe 只有 id="sitbReaderFrame" 但没有名称,我试过 casperjs.withFrame 框架索引号从 0 到 4 但它似乎没有在 CapserJS 中退出查看。
我很想听听你的建议,因为我真的被困在这里了。非常感谢,抱歉我的英语不好。
CasperJS
脚本:
function on_init (page){
var width='1600',height='900';
page.viewportSize = {width:width,height:height}
page.evaluate(function (width,height){
screen = {width:width,height:height,availWidth:width,availHeight:height};
innerWidth=width; innerHeight=height; outerWidth=width; outerHeight=height;
window.navigator = {
plugins: {length: 2, 'Shockwave Flash': {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}},
mimeTypes: {length: 2, "application/x-shockwave-flash": {description: "Shockwave Flash", suffixes: "swf", type: "application/x-shockwave-flash", enabledPlugin: {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}}},
appCodeName: "Mozilla",
appName: "Netscape",
appVersion: "5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
cookieEnabled: 1,
languages: "en-US,en",
language: "en",
onLine: 1,
doNotTrack: null,
platform: "Linux x86_64",
product: "Gecko",
vendor: "Google Inc.",
vendorSub: "",
productSub: 20030107,
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
geolocation: {getCurrentPosition: function getCurrentPosition(){},watchPosition: function watchPosition(){},clearWatch: function clearWatch(){}},
javaEnabled: function javaEnabled(){return 0} };},width,height);};
var casper = require('casper').create({
verbose: true,
logLevel: 'debug',
waitTimeout: 5000,
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36'
}), fs = require('fs');
casper
.on("error", function(msg){ this.echo("error: " + msg, "ERROR") })
.on("page.error", function(msg, trace){ this.echo("Page Error: " + msg, "ERROR") })
.on("remote.message", function(msg){ this.echo("Info: " + msg, "INFO") })
.on('page.initialized', on_init)
.start("https://www.amazon.com/gp/product/B00JNYEXCK/", function(){
this.click('#ebooksSitbLogoImg');
this
.capture('lis.png')
.wait(3000,function(){
var index =this.evaluate(function(){var i,x=document.querySelectorAll('iframe'),r;
for(i=0;i<x.length;i++){if(x[i].id=="sitbReaderFrame"){r=i+1}}return r;});
this
.echo("The index is: "+index,"INFO")
.capture('lis_content.png')
.withFrame(index,function(){
fs.write('lis_content.html', this.getHTML(), 644);
})
})
})
.run();
You need to use the --cookies-file
option, to avoid blocking.
./casperjs --cookies-file=./<a href="https://drive.google.com/open?id=0B_tqnSHhFPBnSjZMM3NvUWttQjQ" rel="nofollow noreferrer">cookies_1.txt</a> casis.js >/dev/stdout
如果要打印:
error: CasperError: Cannot dispatch mousedown event on nonexistent selector: #ebooksSitbLogoImg
无论如何都避免不了阻塞
In that case
Try again after reconnecting to the internet and getting new IP address.
我是 运行 一个小项目,需要一些亚马逊图书的预览内容(它们可以是 PNG 图像或 html 内容)。
例如这本书:https://www.amazon.com/gp/product/B00JNYEXCK/.
当点击"Look inside"徽章(id="sitbLogoImg"的img标签)时,会出现一个新的框架,显示本书的预览内容。它有2个版本,打印预览(这是PNG图像,我可以得到这些)和kindle预览(这是iframe文档)。
我被 kindle 预览的 iframe 困住了,它基本上是这样的:
<div id="scrollElm-0" class="pageHtml">
<div id="sitbReaderKindleSample">
<iframe id="sitbReaderFrame">
<html>
<head></head>
<body>
<p>.......</p>
<div>......</div>
....
</body>
</html>
</iframe>
</div>
</div>
这是我的 CasperJS 脚本:
var fs = require('fs');
var casper = require('casper').create({
pageSettings: {
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
}
});
casper.options.viewportSize = {
width: 1366,
height: 768
};
casper.options.waitTimeout = 10000;
// use any cookies
var cookieFilename = "cookies.txt";
var data = fs.read(cookieFilename);
if (data) {
phantom.cookies = JSON.parse(data);
}
casper.start('https://www.amazon.com/gp/product/B00JNYEXCK/', function() {
this.echo(this.status(true));
this.captureSelector('before.png', 'html');
});
casper.waitForSelector('img#sitbLogoImg', function() {
//this.captureSelector('before.png','html');
});
casper.then(function() {
this.click('img#sitbLogoImg');
});
casper.waitForSelector('div#sitbLBHeader', function() {
});
var lis_content = '';
casper.wait(3000, function() {
this.captureSelector('after.png', 'html');
});
casper.withFrame(1, function() {
lis_content = this.getHTML();
this.captureSelector('lis_content.png', 'html');
});
//Write the sitbReaderFrame to file
casper.then(function() {
var lis_content_filename = 'lis_content.html';
fs.write(lis_content_filename, lis_content, 644);
});
// write the cookies
casper.wait(1000, function() {
var cookies = JSON.stringify(phantom.cookies);
fs.write(cookieFilename, cookies, 644);
});
casper.run();
问题是 iframe 只有 id="sitbReaderFrame" 但没有名称,我试过 casperjs.withFrame 框架索引号从 0 到 4 但它似乎没有在 CapserJS 中退出查看。
我很想听听你的建议,因为我真的被困在这里了。非常感谢,抱歉我的英语不好。
CasperJS
脚本:
function on_init (page){
var width='1600',height='900';
page.viewportSize = {width:width,height:height}
page.evaluate(function (width,height){
screen = {width:width,height:height,availWidth:width,availHeight:height};
innerWidth=width; innerHeight=height; outerWidth=width; outerHeight=height;
window.navigator = {
plugins: {length: 2, 'Shockwave Flash': {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}},
mimeTypes: {length: 2, "application/x-shockwave-flash": {description: "Shockwave Flash", suffixes: "swf", type: "application/x-shockwave-flash", enabledPlugin: {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}}},
appCodeName: "Mozilla",
appName: "Netscape",
appVersion: "5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
cookieEnabled: 1,
languages: "en-US,en",
language: "en",
onLine: 1,
doNotTrack: null,
platform: "Linux x86_64",
product: "Gecko",
vendor: "Google Inc.",
vendorSub: "",
productSub: 20030107,
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
geolocation: {getCurrentPosition: function getCurrentPosition(){},watchPosition: function watchPosition(){},clearWatch: function clearWatch(){}},
javaEnabled: function javaEnabled(){return 0} };},width,height);};
var casper = require('casper').create({
verbose: true,
logLevel: 'debug',
waitTimeout: 5000,
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36'
}), fs = require('fs');
casper
.on("error", function(msg){ this.echo("error: " + msg, "ERROR") })
.on("page.error", function(msg, trace){ this.echo("Page Error: " + msg, "ERROR") })
.on("remote.message", function(msg){ this.echo("Info: " + msg, "INFO") })
.on('page.initialized', on_init)
.start("https://www.amazon.com/gp/product/B00JNYEXCK/", function(){
this.click('#ebooksSitbLogoImg');
this
.capture('lis.png')
.wait(3000,function(){
var index =this.evaluate(function(){var i,x=document.querySelectorAll('iframe'),r;
for(i=0;i<x.length;i++){if(x[i].id=="sitbReaderFrame"){r=i+1}}return r;});
this
.echo("The index is: "+index,"INFO")
.capture('lis_content.png')
.withFrame(index,function(){
fs.write('lis_content.html', this.getHTML(), 644);
})
})
})
.run();
You need to use the --cookies-file
option, to avoid blocking.
./casperjs --cookies-file=./<a href="https://drive.google.com/open?id=0B_tqnSHhFPBnSjZMM3NvUWttQjQ" rel="nofollow noreferrer">cookies_1.txt</a> casis.js >/dev/stdout
如果要打印:
error: CasperError: Cannot dispatch mousedown event on nonexistent selector: #ebooksSitbLogoImg
无论如何都避免不了阻塞
In that case
Try again after reconnecting to the internet and getting new IP address.