CasperJS 不适用于亚马逊 iframe

CasperJS doesn't work with Amazon iframe

我是 运行 一个小项目,需要一些亚马逊图书的预览内容(它们可以是 PNG 图像或 html 内容)。

例如这本书:https://www.amazon.com/gp/product/B00JNYEXCK/.

当点击"Look inside"徽章(id="sitbLogoImg"的img标签)时,会出现一个新的框架,显示本书的预览内容。它有2个版本,打印预览(这是PNG图像,我可以得到这些)和kindle预览(这是iframe文档)。

我被 kindle 预览的 iframe 困住了,它基本上是这样的:

<div id="scrollElm-0" class="pageHtml">
  <div id="sitbReaderKindleSample">
    <iframe id="sitbReaderFrame">
      <html>
      <head></head>
      <body>
        <p>.......</p>
        <div>......</div>
        ....
      </body>
      </html>
    </iframe>
  </div>
</div>

这是我的 CasperJS 脚本:

var fs = require('fs');
var casper = require('casper').create({
  pageSettings: {
    loadPlugins: false,
    userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
  }
});
casper.options.viewportSize = {
  width: 1366,
  height: 768
};
casper.options.waitTimeout = 10000;

// use any cookies
var cookieFilename = "cookies.txt";
var data = fs.read(cookieFilename);
if (data) {
  phantom.cookies = JSON.parse(data);
}

casper.start('https://www.amazon.com/gp/product/B00JNYEXCK/', function() {
  this.echo(this.status(true));
  this.captureSelector('before.png', 'html');
});
casper.waitForSelector('img#sitbLogoImg', function() {
  //this.captureSelector('before.png','html');
});
casper.then(function() {
  this.click('img#sitbLogoImg');
});
casper.waitForSelector('div#sitbLBHeader', function() {

});
var lis_content = '';
casper.wait(3000, function() {
  this.captureSelector('after.png', 'html');
});
casper.withFrame(1, function() {
  lis_content = this.getHTML();
  this.captureSelector('lis_content.png', 'html');
});

//Write the sitbReaderFrame to file
casper.then(function() {
  var lis_content_filename = 'lis_content.html';
  fs.write(lis_content_filename, lis_content, 644);
});

// write the cookies
casper.wait(1000, function() {
  var cookies = JSON.stringify(phantom.cookies);
  fs.write(cookieFilename, cookies, 644);
});
casper.run();

问题是 iframe 只有 id="sitbReaderFrame" 但没有名称,我试过 casperjs.withFrame 框架索引号从 0 到 4 但它似乎没有在 CapserJS 中退出查看。

我很想听听你的建议,因为我真的被困在这里了。非常感谢,抱歉我的英语不好。

CasperJS 脚本:

function on_init (page){
var width='1600',height='900';

page.viewportSize = {width:width,height:height}
page.evaluate(function (width,height){
screen = {width:width,height:height,availWidth:width,availHeight:height};
innerWidth=width;  innerHeight=height;   outerWidth=width;  outerHeight=height;
window.navigator = {
plugins: {length: 2, 'Shockwave Flash': {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}},
mimeTypes: {length: 2, "application/x-shockwave-flash": {description: "Shockwave Flash", suffixes: "swf", type: "application/x-shockwave-flash", enabledPlugin: {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}}},
appCodeName: "Mozilla",
appName: "Netscape",
appVersion: "5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
cookieEnabled: 1,
languages: "en-US,en",
language: "en",
onLine: 1,
doNotTrack: null,
platform: "Linux x86_64",
product: "Gecko",
vendor: "Google Inc.",
vendorSub: "",
productSub: 20030107,
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
geolocation: {getCurrentPosition: function getCurrentPosition(){},watchPosition: function watchPosition(){},clearWatch: function clearWatch(){}},
javaEnabled: function javaEnabled(){return 0} };},width,height);};

var casper = require('casper').create({
    verbose: true,
    logLevel: 'debug',
    waitTimeout: 5000,
    userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36'
}), fs = require('fs');

casper
.on("error", function(msg){ this.echo("error: " + msg, "ERROR") })
.on("page.error", function(msg, trace){ this.echo("Page Error: " + msg, "ERROR") })
.on("remote.message", function(msg){ this.echo("Info: " + msg, "INFO") })
.on('page.initialized', on_init)

    .start("https://www.amazon.com/gp/product/B00JNYEXCK/", function(){
    this.click('#ebooksSitbLogoImg');
    this
    .capture('lis.png')
    .wait(3000,function(){
    var index =this.evaluate(function(){var i,x=document.querySelectorAll('iframe'),r;
    for(i=0;i<x.length;i++){if(x[i].id=="sitbReaderFrame"){r=i+1}}return r;});
    this
    .echo("The index is: "+index,"INFO")
    .capture('lis_content.png')
    .withFrame(index,function(){
    fs.write('lis_content.html', this.getHTML(), 644);
    })
})
})
      .run();
You need to use the --cookies-file option, to avoid blocking.

./casperjs --cookies-file=./<a href="https://drive.google.com/open?id=0B_tqnSHhFPBnSjZMM3NvUWttQjQ" rel="nofollow noreferrer">cookies_1.txt</a> casis.js >/dev/stdout

如果要打印:

error: CasperError: Cannot dispatch mousedown event on nonexistent selector: #ebooksSitbLogoImg

无论如何都避免不了阻塞

In that case
Try again after reconnecting to the internet and getting new IP address.