通过 puppeteer 库进行多选择器搜索
Multiple selector search via puppeteer library
我使用 puppeteer 来获取有关商店的数据。我使用 p.shop-page-content__text_large, span.shop-list-item__address
选择器进行搜索,但我 运行 遇到这样一个问题,即页面上只能出现其中一个。我试图通过以下方式解决问题,但它不起作用。告诉我如何解决这个问题?
const puppeteer = require('puppeteer');
const browser = await puppeteer.launch({
headless: false,
slowMo: 150,
});
const cities = [{'CITY': 'Town1', 'LINK': '/shops/town1/'}, {'CITY': 'Town2', 'LINK': '/shops/town2/'}];
async function getData(page, selector) {
return await page.$$eval(selector, info => info.map((data) => {
let str = data.textContent.trim(),
from = str.search(','),
to = str.length;
return {
'COUNTRY': 'unknow',
'STREET' : str.substring(from, to)
}
}));
}
const result = [];
for (let val of cities) {
console.log(val.LINK, val.CITY);
const page = await browser.newPage();
await page.goto('https://www.example-site.ru' + val.LINK);
data = await page.waitForFunction('.shop-page-content').then(async() => {
console.log('ok');
return await getData(page, 'p.shop-page-content__text_large');
}).catch(async (e) => {
console.log('fail');
await page.waitForSelector('.shops-info__section');
return await getData(page, 'span.shop-list-item__address');
// result.push(data);
});
result.push(data);
await browser.close();
}
console.log(result);
结果是这样的:
const browser = await puppeteer.launch({
headless: false,
slowMo: 150,
});
const cities = [{'CITY': 'Town1', 'LINK': '/shops/town1/'}, {'CITY': 'Town2', 'LINK': '/shops/town2/'}];
const page = await browser.newPage();
const result = [];
for (let val of cities) {
await page.goto('https://www.example-site.ru' + val.LINK);
const list = await page.evaluate(() => {
const data = [];
const elements = document.querySelectorAll('p.shop-page-content__text_large').length
? document.querySelectorAll('p.shop-page-content__text_large')
: document.querySelectorAll('span.shop-list-item__address');
for (const element of elements) {
data.push(element.innerText);
}
return data;
});
result.push({
link: val.LINK,
city: val.CITY,
list
})
}
await browser.close();
我使用 puppeteer 来获取有关商店的数据。我使用 p.shop-page-content__text_large, span.shop-list-item__address
选择器进行搜索,但我 运行 遇到这样一个问题,即页面上只能出现其中一个。我试图通过以下方式解决问题,但它不起作用。告诉我如何解决这个问题?
const puppeteer = require('puppeteer');
const browser = await puppeteer.launch({
headless: false,
slowMo: 150,
});
const cities = [{'CITY': 'Town1', 'LINK': '/shops/town1/'}, {'CITY': 'Town2', 'LINK': '/shops/town2/'}];
async function getData(page, selector) {
return await page.$$eval(selector, info => info.map((data) => {
let str = data.textContent.trim(),
from = str.search(','),
to = str.length;
return {
'COUNTRY': 'unknow',
'STREET' : str.substring(from, to)
}
}));
}
const result = [];
for (let val of cities) {
console.log(val.LINK, val.CITY);
const page = await browser.newPage();
await page.goto('https://www.example-site.ru' + val.LINK);
data = await page.waitForFunction('.shop-page-content').then(async() => {
console.log('ok');
return await getData(page, 'p.shop-page-content__text_large');
}).catch(async (e) => {
console.log('fail');
await page.waitForSelector('.shops-info__section');
return await getData(page, 'span.shop-list-item__address');
// result.push(data);
});
result.push(data);
await browser.close();
}
console.log(result);
结果是这样的:
const browser = await puppeteer.launch({
headless: false,
slowMo: 150,
});
const cities = [{'CITY': 'Town1', 'LINK': '/shops/town1/'}, {'CITY': 'Town2', 'LINK': '/shops/town2/'}];
const page = await browser.newPage();
const result = [];
for (let val of cities) {
await page.goto('https://www.example-site.ru' + val.LINK);
const list = await page.evaluate(() => {
const data = [];
const elements = document.querySelectorAll('p.shop-page-content__text_large').length
? document.querySelectorAll('p.shop-page-content__text_large')
: document.querySelectorAll('span.shop-list-item__address');
for (const element of elements) {
data.push(element.innerText);
}
return data;
});
result.push({
link: val.LINK,
city: val.CITY,
list
})
}
await browser.close();