用 Node.JS 拼凑 <head>?
Scrape <head> with Node.JS?
我想从带有 Node.JS 的网页上抓取头部,但我不知道该怎么做。我可以访问所有 body 感谢 cheerio 像这样:
request(webUrl, function(err, resp, body){
if(!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
//Getting all the links 'a' from the webpage
$('a').each(function(){
//Getting the href attribute from the 'a' link
var url = $(this).attr('href');
//We keep the link only if it is the same root (in order to avoid the 'undefined' links and the subdomains or outside links (like social media links))
if(url != undefined && url[0] == '/') {
//We add the domain name to the url we got in order to have the full
url = websiteUrl + url;
urls.push(url);
}
});
console.log(urls);
}
});
但是用这种方法是不可能得到人头的。我试过了,但它只给了我 body 脚本,而不是 header 中的脚本:
request(webUrl, function(err, resp, body){
if(!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
$('script').each(function(){
//Getting the href attribute from the 'a' link
var url = $(this).attr('src');
console.log(url);
if(url != undefined) {
wowo.push(url);
}
});
console.log(wowo);
}
});
有人可以帮我解决这个问题吗? :'(
我想从带有 Node.JS 的网页上抓取头部,但我不知道该怎么做。我可以访问所有 body 感谢 cheerio 像这样:
request(webUrl, function(err, resp, body){
if(!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
//Getting all the links 'a' from the webpage
$('a').each(function(){
//Getting the href attribute from the 'a' link
var url = $(this).attr('href');
//We keep the link only if it is the same root (in order to avoid the 'undefined' links and the subdomains or outside links (like social media links))
if(url != undefined && url[0] == '/') {
//We add the domain name to the url we got in order to have the full
url = websiteUrl + url;
urls.push(url);
}
});
console.log(urls);
}
});
但是用这种方法是不可能得到人头的。我试过了,但它只给了我 body 脚本,而不是 header 中的脚本:
request(webUrl, function(err, resp, body){
if(!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
$('script').each(function(){
//Getting the href attribute from the 'a' link
var url = $(this).attr('src');
console.log(url);
if(url != undefined) {
wowo.push(url);
}
});
console.log(wowo);
}
});
有人可以帮我解决这个问题吗? :'(