我无法从使用 nodejs 的网站获取 url
I cannot get url from a website using nodejs
我希望使用下面的代码从这个网站抓取 url:
var request = require("request");
cheerio = require("cheerio");
urls = [];
request("http://news.sabay.com.kh/topics/sport", function(err, resp, body){
if(!err && resp.statusCode ==200){
var $ = cheerio.load(body);
$(".article","h4.title").each(function(){
var url = this.attr("href");
urls.push(url);
});
console.log(urls);
}
});
但是我无法得到结果。当我 运行 我得到这个
$ node server.js
[]
首先,使用合适的 CSS 选择器:
.article h4.title > a
然后,使用正确的字段:
var url = this.attribs.href
给出:
var request = require("request");
cheerio = require("cheerio");
urls = [];
request("http://news.sabay.com.kh/topics/sport", function(err, resp, body){
if(!err && resp.statusCode ==200){
var $ = cheerio.load(body);
$(".article h4.title > a").each(function(){
var url = this.attribs.href;
urls.push(url);
});
console.log(urls);
}
});
并输出:
[ 'http://news.sabay.com.kh/article/546826',
'http://news.sabay.com.kh/article/546763',
'http://news.sabay.com.kh/article/546520',
'http://news.sabay.com.kh/article/546568',
'http://news.sabay.com.kh/article/546460',
'http://news.sabay.com.kh/article/546448',
'http://news.sabay.com.kh/article/545674',
'http://news.sabay.com.kh/article/546235',
'http://news.sabay.com.kh/article/545698',
'http://news.sabay.com.kh/article/546091' ]
我希望使用下面的代码从这个网站抓取 url:
var request = require("request");
cheerio = require("cheerio");
urls = [];
request("http://news.sabay.com.kh/topics/sport", function(err, resp, body){
if(!err && resp.statusCode ==200){
var $ = cheerio.load(body);
$(".article","h4.title").each(function(){
var url = this.attr("href");
urls.push(url);
});
console.log(urls);
}
});
但是我无法得到结果。当我 运行 我得到这个
$ node server.js
[]
首先,使用合适的 CSS 选择器:
.article h4.title > a
然后,使用正确的字段:
var url = this.attribs.href
给出:
var request = require("request");
cheerio = require("cheerio");
urls = [];
request("http://news.sabay.com.kh/topics/sport", function(err, resp, body){
if(!err && resp.statusCode ==200){
var $ = cheerio.load(body);
$(".article h4.title > a").each(function(){
var url = this.attribs.href;
urls.push(url);
});
console.log(urls);
}
});
并输出:
[ 'http://news.sabay.com.kh/article/546826',
'http://news.sabay.com.kh/article/546763',
'http://news.sabay.com.kh/article/546520',
'http://news.sabay.com.kh/article/546568',
'http://news.sabay.com.kh/article/546460',
'http://news.sabay.com.kh/article/546448',
'http://news.sabay.com.kh/article/545674',
'http://news.sabay.com.kh/article/546235',
'http://news.sabay.com.kh/article/545698',
'http://news.sabay.com.kh/article/546091' ]