如何在 Node 中启动 puppeteer.launch() 的单个进程并向其发送页面?

How can I launch a single process of puppeteer.launch() and just send pages to it in Node?

以下代码在我的每个请求上运行,我担心它每次都会尝试启动浏览器并导致 Heroku 上的服务器出现问题。我想像 Singleton 实例一样启动 puppeteer,我只启动一次,然后我的请求将只触发 browser.newPage()。我没有 JS 经验来解决这个问题。

 (async () => {
      const browser = await puppeteer.launch({ headless: true});
      const page = await browser.newPage();    

      await page.on('response', interceptedResponse =>{
        let status = interceptedResponse.status();
        interceptedResponse.text()
          .then((text) => {          
            handleResponse(text)
            browser.close();
          })
          .catch(err => {
            console.error(`interceptedResponse error: ${err}`)
            browser.close();
          });
      });

      await page.goto(url);
    })();

您可以创建一个 class 来为您处理这个问题。它可能不是“官方单例”,但 id 做你想做的事:

结帐browser.js:

var puppeteer = require('puppeteer')

class PuppeteerApi {

    browser = null
    constructor(config) {
        this.config = config
    }
    
    setConfig(config) {
        this.config = config
    }

    async newBrowser() {
        return await puppeteer.launch(this.config)
    }

    async getBrowser() {

        if (!this.browser) {
            this.browser = await this.newBrowser()
        }

        return this.browser
    }

    async newPage() {
        const browser = await this.getBrowser()
        const page = await browser.newPage()
        return page
    }

    async handBack(page) {

        // close the page or even reuse it?.
        await page.close()

        // you could add logic for closing the whole browser instance depending what
        // you want.
    }

    async shutdown() {
        await this.browser.close()
    }


}

const config = {
    headless: false
}

const browserApi = new PuppeteerApi(config)
export default browserApi

// use it like:

// import and set config once!.
var browserApi = require('./browser.js')
const config = { headless: true }
browserApi.setConfig(config)

// in an request handler you could do this:
(async () => {
    
    var page = await browserApi.newPage()

    // do some stuff..

    // in the end hand the page back for eitehr closing it 
    // or maybe putting it in a pool? .
    await browser.handBack(page) 
})()

我不知道 puppeteer 例如打开 30 页时的行为。这是一个示例,可以并行打开给定数量的 browser 个实例。

var puppeteer = require('puppeteer')

class PuppeteerApi {

    browsers = []
    index = 0

    constructor(browserLimit, config) {
        this.config = config
        this.browserLimit = browserLimit

        if (typeof this.browserLimit !== 'number' || this.browserLimit < 1) {
            throw 'BrowserLimit needs atleast to be 1!!'
        }
    }

    setConfig(config) {
        this.config = config
    }

    async newBrowser() {
        return await puppeteer.launch(this.config)
    }

    async getBrowser() {
        if (this.index >= this.browserLimit) {
            this.index = 0
        }

        if (!this.browsers[this.index]) {
            this.browsers[this.index] = await this.newBrowser()
        }

        // iterate through browsers.
        return this.browsers[this.index++]

    }

    async newPage() {
        const browser = await this.getBrowser()
        const page = await browser.newPage()
        return page
    }

    async handBack(page) {
        await page.close()
    }

    async shutdown() {
        const proms = this.browsers.map(b => b.close())
        await Promise.all(proms)
    }

}

const config = {
    headless: false
}

const limit = 5
const browserApi = new PuppeteerApi(limit, config)
export default browserApi

如果你喜欢函数式风格(代码更少),适应起来很快。这是第一个例子:

var puppeteer = require('puppeteer')

let browser = null

let config = {
    headless: false
}

const newBrowser = async() => {
    return await puppeteer.launch(this.config)
}

export const setPuppeteerConfig = (_config) => {
    config = _config
}

export const getPage = async() => {
    const browser = await getBrowser()
    return await browser.newPage()
}

const getBrowser = async() => {
    if (!browser) {
        browser = await newBrowser()
    }
    return browser
}

export const handback = async(page) => {
    await page.close()
}

export const shutdown = async() => {
    await browser.close()
}

// usage:

const { setPuppeteerConfig , shutdown, getPage, handback } = require('./browser')
// setconfig..
(async () => {
    const page = await getPage()

    // do some stuff..
    
    await handback(page)
}) 

如果有任何内容无法正常工作,请随时发表评论。