如何在 Node 中启动 puppeteer.launch() 的单个进程并向其发送页面?
How can I launch a single process of puppeteer.launch() and just send pages to it in Node?
以下代码在我的每个请求上运行,我担心它每次都会尝试启动浏览器并导致 Heroku 上的服务器出现问题。我想像 Singleton 实例一样启动 puppeteer,我只启动一次,然后我的请求将只触发 browser.newPage()。我没有 JS 经验来解决这个问题。
(async () => {
const browser = await puppeteer.launch({ headless: true});
const page = await browser.newPage();
await page.on('response', interceptedResponse =>{
let status = interceptedResponse.status();
interceptedResponse.text()
.then((text) => {
handleResponse(text)
browser.close();
})
.catch(err => {
console.error(`interceptedResponse error: ${err}`)
browser.close();
});
});
await page.goto(url);
})();
您可以创建一个 class 来为您处理这个问题。它可能不是“官方单例”,但 id 做你想做的事:
结帐browser.js
:
var puppeteer = require('puppeteer')
class PuppeteerApi {
browser = null
constructor(config) {
this.config = config
}
setConfig(config) {
this.config = config
}
async newBrowser() {
return await puppeteer.launch(this.config)
}
async getBrowser() {
if (!this.browser) {
this.browser = await this.newBrowser()
}
return this.browser
}
async newPage() {
const browser = await this.getBrowser()
const page = await browser.newPage()
return page
}
async handBack(page) {
// close the page or even reuse it?.
await page.close()
// you could add logic for closing the whole browser instance depending what
// you want.
}
async shutdown() {
await this.browser.close()
}
}
const config = {
headless: false
}
const browserApi = new PuppeteerApi(config)
export default browserApi
// use it like:
// import and set config once!.
var browserApi = require('./browser.js')
const config = { headless: true }
browserApi.setConfig(config)
// in an request handler you could do this:
(async () => {
var page = await browserApi.newPage()
// do some stuff..
// in the end hand the page back for eitehr closing it
// or maybe putting it in a pool? .
await browser.handBack(page)
})()
我不知道 puppeteer
例如打开 30 页时的行为。这是一个示例,可以并行打开给定数量的 browser
个实例。
var puppeteer = require('puppeteer')
class PuppeteerApi {
browsers = []
index = 0
constructor(browserLimit, config) {
this.config = config
this.browserLimit = browserLimit
if (typeof this.browserLimit !== 'number' || this.browserLimit < 1) {
throw 'BrowserLimit needs atleast to be 1!!'
}
}
setConfig(config) {
this.config = config
}
async newBrowser() {
return await puppeteer.launch(this.config)
}
async getBrowser() {
if (this.index >= this.browserLimit) {
this.index = 0
}
if (!this.browsers[this.index]) {
this.browsers[this.index] = await this.newBrowser()
}
// iterate through browsers.
return this.browsers[this.index++]
}
async newPage() {
const browser = await this.getBrowser()
const page = await browser.newPage()
return page
}
async handBack(page) {
await page.close()
}
async shutdown() {
const proms = this.browsers.map(b => b.close())
await Promise.all(proms)
}
}
const config = {
headless: false
}
const limit = 5
const browserApi = new PuppeteerApi(limit, config)
export default browserApi
如果你喜欢函数式风格(代码更少),适应起来很快。这是第一个例子:
var puppeteer = require('puppeteer')
let browser = null
let config = {
headless: false
}
const newBrowser = async() => {
return await puppeteer.launch(this.config)
}
export const setPuppeteerConfig = (_config) => {
config = _config
}
export const getPage = async() => {
const browser = await getBrowser()
return await browser.newPage()
}
const getBrowser = async() => {
if (!browser) {
browser = await newBrowser()
}
return browser
}
export const handback = async(page) => {
await page.close()
}
export const shutdown = async() => {
await browser.close()
}
// usage:
const { setPuppeteerConfig , shutdown, getPage, handback } = require('./browser')
// setconfig..
(async () => {
const page = await getPage()
// do some stuff..
await handback(page)
})
如果有任何内容无法正常工作,请随时发表评论。
以下代码在我的每个请求上运行,我担心它每次都会尝试启动浏览器并导致 Heroku 上的服务器出现问题。我想像 Singleton 实例一样启动 puppeteer,我只启动一次,然后我的请求将只触发 browser.newPage()。我没有 JS 经验来解决这个问题。
(async () => {
const browser = await puppeteer.launch({ headless: true});
const page = await browser.newPage();
await page.on('response', interceptedResponse =>{
let status = interceptedResponse.status();
interceptedResponse.text()
.then((text) => {
handleResponse(text)
browser.close();
})
.catch(err => {
console.error(`interceptedResponse error: ${err}`)
browser.close();
});
});
await page.goto(url);
})();
您可以创建一个 class 来为您处理这个问题。它可能不是“官方单例”,但 id 做你想做的事:
结帐browser.js
:
var puppeteer = require('puppeteer')
class PuppeteerApi {
browser = null
constructor(config) {
this.config = config
}
setConfig(config) {
this.config = config
}
async newBrowser() {
return await puppeteer.launch(this.config)
}
async getBrowser() {
if (!this.browser) {
this.browser = await this.newBrowser()
}
return this.browser
}
async newPage() {
const browser = await this.getBrowser()
const page = await browser.newPage()
return page
}
async handBack(page) {
// close the page or even reuse it?.
await page.close()
// you could add logic for closing the whole browser instance depending what
// you want.
}
async shutdown() {
await this.browser.close()
}
}
const config = {
headless: false
}
const browserApi = new PuppeteerApi(config)
export default browserApi
// use it like:
// import and set config once!.
var browserApi = require('./browser.js')
const config = { headless: true }
browserApi.setConfig(config)
// in an request handler you could do this:
(async () => {
var page = await browserApi.newPage()
// do some stuff..
// in the end hand the page back for eitehr closing it
// or maybe putting it in a pool? .
await browser.handBack(page)
})()
我不知道 puppeteer
例如打开 30 页时的行为。这是一个示例,可以并行打开给定数量的 browser
个实例。
var puppeteer = require('puppeteer')
class PuppeteerApi {
browsers = []
index = 0
constructor(browserLimit, config) {
this.config = config
this.browserLimit = browserLimit
if (typeof this.browserLimit !== 'number' || this.browserLimit < 1) {
throw 'BrowserLimit needs atleast to be 1!!'
}
}
setConfig(config) {
this.config = config
}
async newBrowser() {
return await puppeteer.launch(this.config)
}
async getBrowser() {
if (this.index >= this.browserLimit) {
this.index = 0
}
if (!this.browsers[this.index]) {
this.browsers[this.index] = await this.newBrowser()
}
// iterate through browsers.
return this.browsers[this.index++]
}
async newPage() {
const browser = await this.getBrowser()
const page = await browser.newPage()
return page
}
async handBack(page) {
await page.close()
}
async shutdown() {
const proms = this.browsers.map(b => b.close())
await Promise.all(proms)
}
}
const config = {
headless: false
}
const limit = 5
const browserApi = new PuppeteerApi(limit, config)
export default browserApi
如果你喜欢函数式风格(代码更少),适应起来很快。这是第一个例子:
var puppeteer = require('puppeteer')
let browser = null
let config = {
headless: false
}
const newBrowser = async() => {
return await puppeteer.launch(this.config)
}
export const setPuppeteerConfig = (_config) => {
config = _config
}
export const getPage = async() => {
const browser = await getBrowser()
return await browser.newPage()
}
const getBrowser = async() => {
if (!browser) {
browser = await newBrowser()
}
return browser
}
export const handback = async(page) => {
await page.close()
}
export const shutdown = async() => {
await browser.close()
}
// usage:
const { setPuppeteerConfig , shutdown, getPage, handback } = require('./browser')
// setconfig..
(async () => {
const page = await getPage()
// do some stuff..
await handback(page)
})
如果有任何内容无法正常工作,请随时发表评论。