Web-scraping - 如何在 Puppeteer JS 可用时导航 link
Web-scraping - How to navigate whenever there is an available link with Puppeteer JS
我想对 url https://data.anbima.com.br/debentures/AGRU12/agenda
中主要 table 正文中的所有数据执行网络 抓取 ...然而,由于它实现了分页,我无法轻松完成......我想出了以下不起作用的代码......我收到错误 ReferenceError: list is not defined
,尽管我已经在while 循环...
const puppeteer = require('puppeteer');
const fs = require('fs');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(`https://data.anbima.com.br/debentures/AGRU12/agenda`);
await page.waitForSelector('.normal-text');
var list = [];
while (true) {
let nextButton;
await page.evaluate(async () => {
const nodeList = document.querySelectorAll(
'.anbima-ui-table > tbody > tr'
);
let nodeArray = [...nodeList];
nextButton = document.querySelector('.anbima-ui-pagination__next-button');
let listA = nodeArray
.map((tbody) => [...tbody.children].map((td) => [...td.children]))
.map((tr) =>
tr.map((span) =>
span[0].innerHTML
.replace('<label class="flag__children">', '')
.replace('</label>', '')
)
);
list.push(listA);
});
if (!nextButton) {
break;
} else {
await page.goto(nextButton.href);
}
}
fs.writeFile('eventDates.json', JSON.stringify(list[0], null, 2), (err) => {
if (err) throw new Error('Something went wrong');
console.log('well done you got the dates');
});
await browser.close();
})();
List 在回调函数中未定义。您需要 return page.evaluate 中的数组,然后使用该 returned 数组将其推送到列表。
const list = [];
while (true) {
let nextButton;
const listA = await page.evaluate(async () => {
const nodeList = document.querySelectorAll(
'.anbima-ui-table > tbody > tr'
);
let nodeArray = [...nodeList];
nextButton = document.querySelector('.anbima-ui-pagination__next-button');
return nodeArray
.map((tbody) => [...tbody.children].map((td) => [...td.children]))
.map((tr) =>
tr.map((span) =>
span[0].innerHTML
.replace('<label class="flag__children">', '')
.replace('</label>', '')
)
);
});
list.push(...listA);
编辑:更正了示例中的最后一行。
我想对 url https://data.anbima.com.br/debentures/AGRU12/agenda
中主要 table 正文中的所有数据执行网络 抓取 ...然而,由于它实现了分页,我无法轻松完成......我想出了以下不起作用的代码......我收到错误 ReferenceError: list is not defined
,尽管我已经在while 循环...
const puppeteer = require('puppeteer');
const fs = require('fs');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(`https://data.anbima.com.br/debentures/AGRU12/agenda`);
await page.waitForSelector('.normal-text');
var list = [];
while (true) {
let nextButton;
await page.evaluate(async () => {
const nodeList = document.querySelectorAll(
'.anbima-ui-table > tbody > tr'
);
let nodeArray = [...nodeList];
nextButton = document.querySelector('.anbima-ui-pagination__next-button');
let listA = nodeArray
.map((tbody) => [...tbody.children].map((td) => [...td.children]))
.map((tr) =>
tr.map((span) =>
span[0].innerHTML
.replace('<label class="flag__children">', '')
.replace('</label>', '')
)
);
list.push(listA);
});
if (!nextButton) {
break;
} else {
await page.goto(nextButton.href);
}
}
fs.writeFile('eventDates.json', JSON.stringify(list[0], null, 2), (err) => {
if (err) throw new Error('Something went wrong');
console.log('well done you got the dates');
});
await browser.close();
})();
List 在回调函数中未定义。您需要 return page.evaluate 中的数组,然后使用该 returned 数组将其推送到列表。
const list = [];
while (true) {
let nextButton;
const listA = await page.evaluate(async () => {
const nodeList = document.querySelectorAll(
'.anbima-ui-table > tbody > tr'
);
let nodeArray = [...nodeList];
nextButton = document.querySelector('.anbima-ui-pagination__next-button');
return nodeArray
.map((tbody) => [...tbody.children].map((td) => [...td.children]))
.map((tr) =>
tr.map((span) =>
span[0].innerHTML
.replace('<label class="flag__children">', '')
.replace('</label>', '')
)
);
});
list.push(...listA);
编辑:更正了示例中的最后一行。