从 html 个元素中抓取链接 - casperjs
Scraping links from html elements - casperjs
我目前正在尝试借助 casperjs 从这个 SITE 中抓取 link 和缩略图。我能够轻松地找出 html 结构(如下所示)。我正在尝试从所有 a
标签中提取在 href
属性中找到的 link。我 运行 我的脚本,但我收到 video_links
的错误。我怎样才能抓取所有 link 和缩略图并输出到一个数组中?
错误
TypeError: 'undefined' is not an object (evaluating 'video_links.length')
脚本
var casper = require('casper').create({}),video_links,video_thumbnails;
//Functions
function getLinks() {
var element = document.querySelectorAll('.cne-episode-block a');
return Array.prototype.map.call(element, function(e) {
return e.getAttribute('href');
});
}
casper.start('http://video.wired.com/');
casper.then(function() {
video_links = this.evaluate(getLinks);
});
casper.run( this.echo(video_links.length + ' links found.') );
HTML
<div class="cne-thumb-grid-container cne-context-container">
<div class="cne-thumb cne-episode-block " data-videoid="551dc13461646d11aa020000">
<div class="cne-thumb-image cne-rollover" data-powertiptarget="551dc13461646d11aa020000">
<a class="cne-thumbnail cne-zoom-effect js-ajax-video-load" href="/watch/angry-nerd-will-netflix-s-daredevil-fly-or-flop" data-video-series="Angry Nerd" data-video-series-id="518d55c268f9dac897000003" data-video-id="551dc13461646d11aa020000" data-video-categories="[" Movies \u0026 TV "]">
<img class="cne-video-thumb" src="http://dwgyu36up6iuz.cloudfront.net/heru80fdn/image/upload/c_fill,d_placeholder_thescene.jpg,fl_progressive,g_face,h_151,q_80,w_270/v1428076783/wired_angry-nerd-will-netflix-s-daredevil-fly-or-flop.jpg" alt="Will Netflix’s Daredevil Fly or Flop?">
<div class="cne-thumbnail-play">Play</div>
</a>
</div>
<div class="cne-thumb-title the-thumb-title">
<a class="js-ajax-video-load" href="/watch/angry-nerd-will-netflix-s-daredevil-fly-or-flop" data-video-id="551dc13461646d11aa020000">Will Netflix’s Daredevil Fly or Flop?</a>
<div class="cne-thumb-subtitle">
<a href="/series/angry-nerd">Angry Nerd</a>
</div>
</div>
<div id="551dc13461646d11aa020000" class="cne-thumb-rollover">
<div class="cne-thumb-rollover-box">
<span class="cne-rollover-category"> Movies & TV </span>
<span class="cne-rollover-name"> Will Netflix’s Daredevil Fly or Flop? </span>
<span class="cne-rollover-description"> If Netflix’s new Daredevil series is anything like Ben Affleck’s Daredevil film, we’re all in trouble. Angry Nerd explains what the latest incarnation needs to get right to make sure the man without fear doesn’t turn into a total flop. </span>
</div>
</div>
</div>
</div>
如果选择器在同一级别,则您只需要其中一个。因此,只需在您的 querySelectorAll 中使用 cne-thumb
或 cne-episode-block
,而不是两者都使用。
我目前正在尝试借助 casperjs 从这个 SITE 中抓取 link 和缩略图。我能够轻松地找出 html 结构(如下所示)。我正在尝试从所有 a
标签中提取在 href
属性中找到的 link。我 运行 我的脚本,但我收到 video_links
的错误。我怎样才能抓取所有 link 和缩略图并输出到一个数组中?
错误
TypeError: 'undefined' is not an object (evaluating 'video_links.length')
脚本
var casper = require('casper').create({}),video_links,video_thumbnails;
//Functions
function getLinks() {
var element = document.querySelectorAll('.cne-episode-block a');
return Array.prototype.map.call(element, function(e) {
return e.getAttribute('href');
});
}
casper.start('http://video.wired.com/');
casper.then(function() {
video_links = this.evaluate(getLinks);
});
casper.run( this.echo(video_links.length + ' links found.') );
HTML
<div class="cne-thumb-grid-container cne-context-container">
<div class="cne-thumb cne-episode-block " data-videoid="551dc13461646d11aa020000">
<div class="cne-thumb-image cne-rollover" data-powertiptarget="551dc13461646d11aa020000">
<a class="cne-thumbnail cne-zoom-effect js-ajax-video-load" href="/watch/angry-nerd-will-netflix-s-daredevil-fly-or-flop" data-video-series="Angry Nerd" data-video-series-id="518d55c268f9dac897000003" data-video-id="551dc13461646d11aa020000" data-video-categories="[" Movies \u0026 TV "]">
<img class="cne-video-thumb" src="http://dwgyu36up6iuz.cloudfront.net/heru80fdn/image/upload/c_fill,d_placeholder_thescene.jpg,fl_progressive,g_face,h_151,q_80,w_270/v1428076783/wired_angry-nerd-will-netflix-s-daredevil-fly-or-flop.jpg" alt="Will Netflix’s Daredevil Fly or Flop?">
<div class="cne-thumbnail-play">Play</div>
</a>
</div>
<div class="cne-thumb-title the-thumb-title">
<a class="js-ajax-video-load" href="/watch/angry-nerd-will-netflix-s-daredevil-fly-or-flop" data-video-id="551dc13461646d11aa020000">Will Netflix’s Daredevil Fly or Flop?</a>
<div class="cne-thumb-subtitle">
<a href="/series/angry-nerd">Angry Nerd</a>
</div>
</div>
<div id="551dc13461646d11aa020000" class="cne-thumb-rollover">
<div class="cne-thumb-rollover-box">
<span class="cne-rollover-category"> Movies & TV </span>
<span class="cne-rollover-name"> Will Netflix’s Daredevil Fly or Flop? </span>
<span class="cne-rollover-description"> If Netflix’s new Daredevil series is anything like Ben Affleck’s Daredevil film, we’re all in trouble. Angry Nerd explains what the latest incarnation needs to get right to make sure the man without fear doesn’t turn into a total flop. </span>
</div>
</div>
</div>
</div>
如果选择器在同一级别,则您只需要其中一个。因此,只需在您的 querySelectorAll 中使用 cne-thumb
或 cne-episode-block
,而不是两者都使用。