如何从单个数组中的多个页面获取链接
How to get links from multiple pages in a single array
我有一个工作代码可以成功地从至少有 20% 折扣的多个页面获取所有产品链接。唯一的问题是它 returns 分别链接到每个页面的数组中。但是,我希望它将所有页面的 return 链接放在一个数组中,然后将它们传输到另一个函数。我尝试创建一个字符串 var all_links = [] 并将每个页面的所有链接推送到其中,然后 return 它们就像 return all_links,正如我从更简单的例子。但是,我在这种情况下没有成功,因为我没有编码经验。三周前我开始学习基础知识。如果您能帮助我完成整个代码,我将不胜感激,因为我没有必要的先验知识。
const puppeteer = require('puppeteer')
const minDiscount = 20;
async function getLinks() {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
});
const page = await browser.newPage();
const url = 'https://www.mytoys.de/spielzeug-spiele/holz/';
await page.goto(url);
// getting all the products, this will return an array of ElementHandle
while(await page.$(".pager__link--next")){
await page.waitForSelector(".pager__link--next")
await page.waitForTimeout(1000);
await page.click('.pager__link--next')
await page.waitForTimeout(1500);
const products = await page.$$('.prod-grid.js-prod-grid .prod-grid__item.js-prod-grid_item');
const proms = await Promise.allSettled(
products.map(async (prod) => {
// searching for a discount on each product
const disc = await prod.$$eval(
'.prod-grid.js-prod-grid .prod-flag.prod-flag-sale',
(discount) =>
discount.map((discItem) =>
discItem.innerText.replace(/[^0-9.]/g, '').replace(/\D+/g,'0')
)
);
// if it has a discount
if (disc.length > 0) {
// we parse the discount to Integer type to compare it to minDiscount
const discountInt = parseInt(disc[0], 10);
if (discountInt >= minDiscount) {
// we get the link of the product
const link = await prod.$$eval('.prod-grid.js-prod-grid .prod-tile__link.js-prodlink', (allAs) => allAs.map((a) => a.href));
if (link.length > 0) {
// push an object containing the discount and the link of the product
return link[0];
}
}
}
return null;
})
);
const bulkArray = proms.map((item) => {
if (item.status === 'fulfilled') return item.value;
});
const endArray = bulkArray.filter(item => item !== null);
console.log(endArray);
}
}
getLinks();
我目前获得的结果示例
[
'https://www.mytoys.de/erzi-kinderwurst-sortiment-spiellebensmittel-6749036.html',
'https://www.mytoys.de/chr-tanner-spiellebensmittel-wurststaender-1031946.html',
'https://www.mytoys.de/hape-xylophon-und-hammerspiel-2503719.html',
'https://www.mytoys.de/erzi-kinderparty-spiellebensmittel-6749035.html',
]
[
'https://www.mytoys.de/brio-holzeisenbahnset-landleben-5501952.html',
'https://www.mytoys.de/brio-brio-33277-bahn-ir-reisezug-set-4592516.html',
'https://www.mytoys.de/brio-parkhaus-strassen-schienen-3175226.html',
'https://www.mytoys.de/mytoys-steckwuerfel-12-tlg-11389814.html',
'https://www.mytoys.de/brio-schienen-und-weichensortiment-1758325.html',
]
[
'https://www.mytoys.de/hape-grosser-baukran-4141517.html',
'https://www.mytoys.de/noris-mein-buntes-tuermchenspiel-3421170.html',
'https://www.mytoys.de/goki-ziehtier-schaf-suse-2488933.html',
'https://www.mytoys.de/eichhorn-colorsoundzug-mit-licht-1521635.html',
]
您希望获得的结果示例
[
'https://www.mytoys.de/erzi-kinderwurst-sortiment-spiellebensmittel-6749036.html',
'https://www.mytoys.de/chr-tanner-spiellebensmittel-wurststaender-1031946.html',
'https://www.mytoys.de/hape-xylophon-und-hammerspiel-2503719.html',
'https://www.mytoys.de/erzi-kinderparty-spiellebensmittel-6749035.html',
'https://www.mytoys.de/brio-holzeisenbahnset-landleben-5501952.html',
'https://www.mytoys.de/brio-brio-33277-bahn-ir-reisezug-set-4592516.html',
'https://www.mytoys.de/brio-parkhaus-strassen-schienen-3175226.html',
'https://www.mytoys.de/mytoys-steckwuerfel-12-tlg-11389814.html',
'https://www.mytoys.de/brio-schienen-und-weichensortiment-1758325.html',
'https://www.mytoys.de/hape-grosser-baukran-4141517.html',
'https://www.mytoys.de/noris-mein-buntes-tuermchenspiel-3421170.html',
'https://www.mytoys.de/goki-ziehtier-schaf-suse-2488933.html',
'https://www.mytoys.de/eichhorn-colorsoundzug-mit-licht-1521635.html',
]
- 在循环之前为链接收集声明新变量:
const allLinks = []; // <--
while(await page.$(".pager__link--next")){ ... }
- 将所有链接推入其中:
...
const endArray = bulkArray.filter(item => item !== null);
console.log(endArray);
allLinks.push(endArray); // <--
- Return / 循环执行后记录结果:
async function getLinks() {
...
return allLinks.flat(); // <--
}
console.log(await getLinks()) // result array
我有一个工作代码可以成功地从至少有 20% 折扣的多个页面获取所有产品链接。唯一的问题是它 returns 分别链接到每个页面的数组中。但是,我希望它将所有页面的 return 链接放在一个数组中,然后将它们传输到另一个函数。我尝试创建一个字符串 var all_links = [] 并将每个页面的所有链接推送到其中,然后 return 它们就像 return all_links,正如我从更简单的例子。但是,我在这种情况下没有成功,因为我没有编码经验。三周前我开始学习基础知识。如果您能帮助我完成整个代码,我将不胜感激,因为我没有必要的先验知识。
const puppeteer = require('puppeteer')
const minDiscount = 20;
async function getLinks() {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
});
const page = await browser.newPage();
const url = 'https://www.mytoys.de/spielzeug-spiele/holz/';
await page.goto(url);
// getting all the products, this will return an array of ElementHandle
while(await page.$(".pager__link--next")){
await page.waitForSelector(".pager__link--next")
await page.waitForTimeout(1000);
await page.click('.pager__link--next')
await page.waitForTimeout(1500);
const products = await page.$$('.prod-grid.js-prod-grid .prod-grid__item.js-prod-grid_item');
const proms = await Promise.allSettled(
products.map(async (prod) => {
// searching for a discount on each product
const disc = await prod.$$eval(
'.prod-grid.js-prod-grid .prod-flag.prod-flag-sale',
(discount) =>
discount.map((discItem) =>
discItem.innerText.replace(/[^0-9.]/g, '').replace(/\D+/g,'0')
)
);
// if it has a discount
if (disc.length > 0) {
// we parse the discount to Integer type to compare it to minDiscount
const discountInt = parseInt(disc[0], 10);
if (discountInt >= minDiscount) {
// we get the link of the product
const link = await prod.$$eval('.prod-grid.js-prod-grid .prod-tile__link.js-prodlink', (allAs) => allAs.map((a) => a.href));
if (link.length > 0) {
// push an object containing the discount and the link of the product
return link[0];
}
}
}
return null;
})
);
const bulkArray = proms.map((item) => {
if (item.status === 'fulfilled') return item.value;
});
const endArray = bulkArray.filter(item => item !== null);
console.log(endArray);
}
}
getLinks();
我目前获得的结果示例
[
'https://www.mytoys.de/erzi-kinderwurst-sortiment-spiellebensmittel-6749036.html',
'https://www.mytoys.de/chr-tanner-spiellebensmittel-wurststaender-1031946.html',
'https://www.mytoys.de/hape-xylophon-und-hammerspiel-2503719.html',
'https://www.mytoys.de/erzi-kinderparty-spiellebensmittel-6749035.html',
]
[
'https://www.mytoys.de/brio-holzeisenbahnset-landleben-5501952.html',
'https://www.mytoys.de/brio-brio-33277-bahn-ir-reisezug-set-4592516.html',
'https://www.mytoys.de/brio-parkhaus-strassen-schienen-3175226.html',
'https://www.mytoys.de/mytoys-steckwuerfel-12-tlg-11389814.html',
'https://www.mytoys.de/brio-schienen-und-weichensortiment-1758325.html',
]
[
'https://www.mytoys.de/hape-grosser-baukran-4141517.html',
'https://www.mytoys.de/noris-mein-buntes-tuermchenspiel-3421170.html',
'https://www.mytoys.de/goki-ziehtier-schaf-suse-2488933.html',
'https://www.mytoys.de/eichhorn-colorsoundzug-mit-licht-1521635.html',
]
您希望获得的结果示例
[
'https://www.mytoys.de/erzi-kinderwurst-sortiment-spiellebensmittel-6749036.html',
'https://www.mytoys.de/chr-tanner-spiellebensmittel-wurststaender-1031946.html',
'https://www.mytoys.de/hape-xylophon-und-hammerspiel-2503719.html',
'https://www.mytoys.de/erzi-kinderparty-spiellebensmittel-6749035.html',
'https://www.mytoys.de/brio-holzeisenbahnset-landleben-5501952.html',
'https://www.mytoys.de/brio-brio-33277-bahn-ir-reisezug-set-4592516.html',
'https://www.mytoys.de/brio-parkhaus-strassen-schienen-3175226.html',
'https://www.mytoys.de/mytoys-steckwuerfel-12-tlg-11389814.html',
'https://www.mytoys.de/brio-schienen-und-weichensortiment-1758325.html',
'https://www.mytoys.de/hape-grosser-baukran-4141517.html',
'https://www.mytoys.de/noris-mein-buntes-tuermchenspiel-3421170.html',
'https://www.mytoys.de/goki-ziehtier-schaf-suse-2488933.html',
'https://www.mytoys.de/eichhorn-colorsoundzug-mit-licht-1521635.html',
]
- 在循环之前为链接收集声明新变量:
const allLinks = []; // <--
while(await page.$(".pager__link--next")){ ... }
- 将所有链接推入其中:
...
const endArray = bulkArray.filter(item => item !== null);
console.log(endArray);
allLinks.push(endArray); // <--
- Return / 循环执行后记录结果:
async function getLinks() {
...
return allLinks.flat(); // <--
}
console.log(await getLinks()) // result array