如何使用 cheerio 仅从 html 链接中获取文本
How to fetch only text from html links using cheerio
你好,我有一个网页有这样的HTML
<div class="css-content">
<div class="css-2aj">
<img src="" >
<div data-bn-type="text" id="/48" class="">Latest News</div>
</div>
<div class="css-6f9">
<div class="css-content">
<a data-bn-type="link" href="/en/blog/news/523hshhshhshhs3331adc0" class="css-1ej">US could be on cusp of new Covid surge</a>
<a data-bn-type="link" href="/en/blog/news/423hshhshhshhs3331adc0" class="css-1ej">Stop sharing your vaccine cards on social media</>
<a data-bn-type="link" href="/en/blog/news/2222hshhshhshhs3331adc0" class="css-1ej">Italians can be fined up to ,000 for selling the world's 'most dangerous' cheese</a>
<a data-bn-type="link" href="/en/blog/news/2223hshhshhshhs3331adc0" class="css-1ej">The Masked Singer' reveals the identity of The Phoenix<a/>
</div>
</div>
</div>
我想要这样的结果
美国可能正处于新的 Covid 浪潮的风口浪尖
意大利人可能因销售世界上“最
危险的奶酪
《蒙面唱将》揭晓凤凰身份
这是我试过的
var list = [];
$('div[class="css-6f9"]').find('div > a').each(function (index, element) {
list.push($(element).attr('href'));
});
console.log(list);
结果为空数组
我是新手,不知道如何获取 <a></a>
标签中的结果
请帮助
试试这个
不需要 cheerio
作为 $
const html = `<div class="css-content">
<div class="css-2aj">
<img src="" >
<div data-bn-type="text" id="/48" class="">Latest News</div>
</div>
<div class="css-6f9">
<div class="css-content">
<a data-bn-type="link" href="/en/blog/news/523hshhshhshhs3331adc0" class="css-1ej">US could be on cusp of new Covid surge</a>
<a data-bn-type="link" href="/en/blog/news/423hshhshhshhs3331adc0" class="css-1ej">Stop sharing your vaccine cards on social media</>
<a data-bn-type="link" href="/en/blog/news/2222hshhshhshhs3331adc0" class="css-1ej">Italians can be fined up to ,000 for selling the world's 'most dangerous' cheese</a>
<a data-bn-type="link" href="/en/blog/news/2223hshhshhshhs3331adc0" class="css-1ej">The Masked Singer' reveals the identity of The Phoenix<a/>
</div>
</div>
</div>`;
const cheerio = require('cheerio');
const $ = cheerio.load(html);
let list = [];
$('.css-content > a').each(function () {
list.push($(this).text().trim());
});
console.log(list.filter((item) => Boolean(item)));
你好,我有一个网页有这样的HTML
<div class="css-content">
<div class="css-2aj">
<img src="" >
<div data-bn-type="text" id="/48" class="">Latest News</div>
</div>
<div class="css-6f9">
<div class="css-content">
<a data-bn-type="link" href="/en/blog/news/523hshhshhshhs3331adc0" class="css-1ej">US could be on cusp of new Covid surge</a>
<a data-bn-type="link" href="/en/blog/news/423hshhshhshhs3331adc0" class="css-1ej">Stop sharing your vaccine cards on social media</>
<a data-bn-type="link" href="/en/blog/news/2222hshhshhshhs3331adc0" class="css-1ej">Italians can be fined up to ,000 for selling the world's 'most dangerous' cheese</a>
<a data-bn-type="link" href="/en/blog/news/2223hshhshhshhs3331adc0" class="css-1ej">The Masked Singer' reveals the identity of The Phoenix<a/>
</div>
</div>
</div>
我想要这样的结果
美国可能正处于新的 Covid 浪潮的风口浪尖
意大利人可能因销售世界上“最 危险的奶酪
《蒙面唱将》揭晓凤凰身份
这是我试过的
var list = [];
$('div[class="css-6f9"]').find('div > a').each(function (index, element) {
list.push($(element).attr('href'));
});
console.log(list);
结果为空数组
我是新手,不知道如何获取 <a></a>
标签中的结果
请帮助
试试这个
不需要 cheerio
作为 $
const html = `<div class="css-content">
<div class="css-2aj">
<img src="" >
<div data-bn-type="text" id="/48" class="">Latest News</div>
</div>
<div class="css-6f9">
<div class="css-content">
<a data-bn-type="link" href="/en/blog/news/523hshhshhshhs3331adc0" class="css-1ej">US could be on cusp of new Covid surge</a>
<a data-bn-type="link" href="/en/blog/news/423hshhshhshhs3331adc0" class="css-1ej">Stop sharing your vaccine cards on social media</>
<a data-bn-type="link" href="/en/blog/news/2222hshhshhshhs3331adc0" class="css-1ej">Italians can be fined up to ,000 for selling the world's 'most dangerous' cheese</a>
<a data-bn-type="link" href="/en/blog/news/2223hshhshhshhs3331adc0" class="css-1ej">The Masked Singer' reveals the identity of The Phoenix<a/>
</div>
</div>
</div>`;
const cheerio = require('cheerio');
const $ = cheerio.load(html);
let list = [];
$('.css-content > a').each(function () {
list.push($(this).text().trim());
});
console.log(list.filter((item) => Boolean(item)));