Dynamic API 没有调用 puppeteer 中的滚动条

Dynamic API is not calling on a scroll in puppeteer

我正在尝试截取一个页面的屏幕截图,其中的图像在滚动条上动态加载。在卷轴上使用 puppeteer,image API 没有调用。因此,所有图像都不会加载页面。我正在使用下面的代码。请为此提供帮助。

const puppeteer = require('puppeteer');
    
    (async () => {
        const browser = await puppeteer.launch({
            headless: false
        });
        const page = await browser.newPage();
        await page.goto('https://www.chapters.indigo.ca/en-ca/new-arrivals/?link-usage=Header%3A%20New&mc=NewArrivals&lu=Main/', { waitUntil: 'load' });
        await page.setViewport({
            width: 1200,
            height: 800
        });
    
        await autoScroll(page);
    
        await page.screenshot({
            path: 'yoursite.png',
            fullPage: true
        });
    
        await browser.close();
    })();
    
    async function autoScroll(page) {
        await page.evaluate(async () => {
            await new Promise((resolve, reject) => {
                var totalHeight = 0;
                var distance = 100;
                var timer = setInterval(() => {
                    var scrollHeight = document.body.scrollHeight;
                    window.scrollBy(0, distance);
                    totalHeight += distance;
    
                    if (totalHeight >= scrollHeight) {
                        clearInterval(timer);
                        resolve();
                    }
                }, 100);
            });
        });
    }

首先,您需要将页面转到 networkidle0 或至少 networkidle2,因为第一个“首屏”中的所有图像都需要加载。

const url = 'https://www.chapters.indigo.ca/en-ca/new-arrivals/?link-usage=Header%3A%20New&mc=NewArrivals&lu=Main/'
await page.goto(url, { waitUntil: 'networkidle0' })

添加一些关闭弹出窗口
(需要在 try-catch 子句内,因为这可能是随机的或偶然的)

try {
    await page.click('section#browseCart-genericPopup-wrapper button[aria-label="Close Popup"]')
} catch (error) {}

下面的这个方法需要在 DOM
通过在数组中列出所有产品图片来检查产品图片

const productImages = Array.from(document.querySelectorAll('a[aria-label="Product image"] > img'))

由于本网站通过延迟加载产品图片工作,您需要等待所有图片 src 属性不包含 data,并且值与 data-original 属性值相同,并且所有图片都已完成加载。


let completeLoading = image.complete
let imageNotLoading = image.getAttribute('src').startsWith('data')
let imageIsOriginal = image.getAttribute('src') === image.getAttribute('data-original')

要测试脚本,您可以使用如下所示的预定义网络仿真

const networkEmulation = {
    slow3G: puppeteer.networkConditions['Slow 3G'],
    fast3G: puppeteer.networkConditions['Fast 3G'],
    dialUp: { download: 15000, upload: 15000, latency: 1000 }
}
    // You can test this load and wait script by uncomment this line below
    // change to relevant and preferred speed like slow3G or fast3G
await page.emulateNetworkConditions(networkEmulation.dialUp)




这是完整的脚本。
如果您认为此解决方案有用且正确,请不要忘记 select 此解决方案作为正确答案。

const puppeteer = require('puppeteer')
const url = 'https://www.chapters.indigo.ca/en-ca/new-arrivals/?link-usage=Header%3A%20New&mc=NewArrivals&lu=Main/'


;(async () => {
    const browser = await puppeteer.launch({
        headless: true
    })

    const page = await browser.newPage()

//  const networkEmulation = {
//      slow3G: puppeteer.networkConditions['Slow 3G'],
//      fast3G: puppeteer.networkConditions['Fast 3G'],
//      dialUp: { download: 15000, upload: 15000, latency: 1000 }
//  }
    // You can test this load and wait script by uncomment this line below
    // change to relevant and preferred speed like slow3G or fast3G
//  await page.emulateNetworkConditions(networkEmulation.dialUp)

    await page.setViewport({
        width: 1200,
        height: 800
    })

    await page.goto(url, { waitUntil: 'networkidle0', timeout: 0 })

    try {
        await page.click('section#browseCart-genericPopup-wrapper button[aria-label="Close Popup"]')
    } catch (error) {}

    await autoScroll(page)

    while (await page.evaluate(() => {
        const productImages = Array.from(document.querySelectorAll('a[aria-label="Product image"] > img'))
        return productImages.filter(image => {
            let completeLoading = image.complete
            let imageNotLoading = image.getAttribute('src').startsWith('data')
            let imageIsOriginal = image.getAttribute('src') === image.getAttribute('data-original')
            return !completeLoading || !imageIsOriginal || imageNotLoading
        }).length !== 0
    })) {
        await page.waitForTimeout(500)
    }

    await page.screenshot({
        path: 'yoursite.png',
        fullPage: true
    })

    await browser.close()
})()

async function autoScroll(page) {
    await page.evaluate(async () => {
        await new Promise((resolve, reject) => {
            var totalHeight = 0
            var distance = 100
            var timer = setInterval(() => {
                var scrollHeight = document.body.scrollHeight
                window.scrollBy(0, distance)
                totalHeight += distance
                if (totalHeight >= scrollHeight) {
                    clearInterval(timer)
                    resolve()
                }
            }, 100)
        })
    })
}