如何检查网络流量并获取资源请求的URL?
How to inspect network traffic and get the URL of resource requests?
我想监控一个页面的网络并获取所有JavaScript网络事件的URL,类似于PhantomJS'page.onResourceRequested
所做的事情,但我无法弄清楚如何用 Google Chrome 的 Puppeteer 做到这一点。
我一直在尝试 Google Chrome's puppeteer,但我不知道如何让它工作,因为它的输出看起来像这样:
Page {
domain: null,
_events: {
request: [Function]
},
_eventsCount: 1,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: {
'Page.frameAttached': [Function],
'Page.frameNavigated': [Function],
'Page.frameDetached': [Function],
'Runtime.executionContextCreated': [Function],
'Network.requestWillBeSent': [Function: bound _onRequestWillBeSent],
'Network.requestIntercepted': [Function: bound _onRequestIntercepted],
'Network.responseReceived': [Function: bound _onResponseReceived],
'Network.loadingFinished': [Function: bound _onLoadingFinished],
'Network.loadingFailed': [Function: bound _onLoadingFailed],
'Page.loadEventFired': [Function],
'Runtime.consoleAPICalled': [Function],
'Page.javascriptDialogOpening': [Function],
'Runtime.exceptionThrown': [Function],
'Security.certificateError': [Function],
'Inspector.targetCrashed': [Function]
},
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: Connection {
domain: null,
_events: {},
_eventsCount: 0,
_maxListeners: undefined,
_url: 'ws://127.0.0.1:65343/devtools/browser/ca214df4-4357-4b8f-8552-a1524d6652ff',
_lastId: 17,
_callbacks: Map {},
_delay: 0,
_ws: [Object],
_sessions: [Object]
},
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_keyboard: Keyboard {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_modifiers: 0,
_pressedKeys: Set {}
},
_mouse: Mouse {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_keyboard: Keyboard {
_client: [Object],
_modifiers: 0,
_pressedKeys: Set {}
},
_x: 0,
_y: 0,
_button: 'none'
},
_frameManager: FrameManager {
domain: null,
_events: {
frameattached: [Function],
framedetached: [Function],
framenavigated: [Function]
},
_eventsCount: 3,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_mouse: Mouse {
_client: [Object],
_keyboard: [Object],
_x: 0,
_y: 0,
_button: 'none'
},
_frames: Map {
'232.1' => [Object]
},
_mainFrame: Frame {
_client: [Object],
_mouse: [Object],
_parentFrame: null,
_url: 'http://mytestdomain.com/',
_id: '232.1',
_defaultContextId: 4,
_waitTasks: Set {},
_childFrames: Set {},
_name: undefined,
_loadingFailed: false
}
},
_networkManager: NetworkManager {
domain: null,
_events: {
request: [Function],
response: [Function],
requestfailed: [Function],
requestfinished: [Function]
},
_eventsCount: 4,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_requestIdToRequest: Map {},
_interceptionIdToRequest: Map {
null => [Object], 'id-1' => [Object], 'id-2' => [Object], 'id-3' => [Object]
},
_extraHTTPHeaders: Map {},
_requestInterceptionEnabled: true,
_requestHashToRequestIds: Multimap {
_map: [Object]
},
_requestHashToInterceptions: Multimap {
_map: Map {}
}
},
_emulationManager: EmulationManager {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_emulatingMobile: false,
_injectedTouchScriptId: null
},
_tracing: Tracing {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_recording: false,
_path: ''
},
_pageBindings: Map {},
_ignoreHTTPSErrors: false,
_screenshotTaskQueue: TaskQueue {
_chain: Promise {
undefined
}
},
_viewport: {
width: 800,
height: 600
}
}
你能告诉我如何使用 Puppeteer 获取 JavaScript 网络事件的所有 URL 吗?
查看拦截图像请求的sample。易于修改以查看其他类型的资源请求:
await page.setRequestInterceptionEnabled(true);
page.on('request', request => {
if (/\.js$/i.test(request.url)) {
// request for js resource
}
request.continue();
});
await page.goto('https://example.com');
遇到这个 post 并且 SetRequestInterceptionEnabled 已重命名为
page.setRequestInterception(value)
这是我在文档中找到的一段代码:
const puppeteer = require('puppeteer');
puppeteer.launch().then(async browser => {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', interceptedRequest => {
if (interceptedRequest.url.endsWith('.png') || interceptedRequest.url.endsWith('.jpg'))
interceptedRequest.abort();
else
interceptedRequest.continue();
});
await page.goto('https://example.com');
await browser.close();
});
NOTE Enabling request interception disables page caching.
这里是 puppeteer 文档的 URL:Puppeteer Documentation
我认为使用 Page.on()
个侦听器可以更准确地解决问题,并且不会干扰流量。
类似于:
page.on('request', (req) => console.log(req)); // 'requestFinished' and 'requestFailed' are other options
page.on('response', (res) => console.log(res));
我想监控一个页面的网络并获取所有JavaScript网络事件的URL,类似于PhantomJS'page.onResourceRequested
所做的事情,但我无法弄清楚如何用 Google Chrome 的 Puppeteer 做到这一点。
我一直在尝试 Google Chrome's puppeteer,但我不知道如何让它工作,因为它的输出看起来像这样:
Page {
domain: null,
_events: {
request: [Function]
},
_eventsCount: 1,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: {
'Page.frameAttached': [Function],
'Page.frameNavigated': [Function],
'Page.frameDetached': [Function],
'Runtime.executionContextCreated': [Function],
'Network.requestWillBeSent': [Function: bound _onRequestWillBeSent],
'Network.requestIntercepted': [Function: bound _onRequestIntercepted],
'Network.responseReceived': [Function: bound _onResponseReceived],
'Network.loadingFinished': [Function: bound _onLoadingFinished],
'Network.loadingFailed': [Function: bound _onLoadingFailed],
'Page.loadEventFired': [Function],
'Runtime.consoleAPICalled': [Function],
'Page.javascriptDialogOpening': [Function],
'Runtime.exceptionThrown': [Function],
'Security.certificateError': [Function],
'Inspector.targetCrashed': [Function]
},
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: Connection {
domain: null,
_events: {},
_eventsCount: 0,
_maxListeners: undefined,
_url: 'ws://127.0.0.1:65343/devtools/browser/ca214df4-4357-4b8f-8552-a1524d6652ff',
_lastId: 17,
_callbacks: Map {},
_delay: 0,
_ws: [Object],
_sessions: [Object]
},
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_keyboard: Keyboard {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_modifiers: 0,
_pressedKeys: Set {}
},
_mouse: Mouse {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_keyboard: Keyboard {
_client: [Object],
_modifiers: 0,
_pressedKeys: Set {}
},
_x: 0,
_y: 0,
_button: 'none'
},
_frameManager: FrameManager {
domain: null,
_events: {
frameattached: [Function],
framedetached: [Function],
framenavigated: [Function]
},
_eventsCount: 3,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_mouse: Mouse {
_client: [Object],
_keyboard: [Object],
_x: 0,
_y: 0,
_button: 'none'
},
_frames: Map {
'232.1' => [Object]
},
_mainFrame: Frame {
_client: [Object],
_mouse: [Object],
_parentFrame: null,
_url: 'http://mytestdomain.com/',
_id: '232.1',
_defaultContextId: 4,
_waitTasks: Set {},
_childFrames: Set {},
_name: undefined,
_loadingFailed: false
}
},
_networkManager: NetworkManager {
domain: null,
_events: {
request: [Function],
response: [Function],
requestfailed: [Function],
requestfinished: [Function]
},
_eventsCount: 4,
_maxListeners: undefined,
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_requestIdToRequest: Map {},
_interceptionIdToRequest: Map {
null => [Object], 'id-1' => [Object], 'id-2' => [Object], 'id-3' => [Object]
},
_extraHTTPHeaders: Map {},
_requestInterceptionEnabled: true,
_requestHashToRequestIds: Multimap {
_map: [Object]
},
_requestHashToInterceptions: Multimap {
_map: Map {}
}
},
_emulationManager: EmulationManager {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_emulatingMobile: false,
_injectedTouchScriptId: null
},
_tracing: Tracing {
_client: Session {
domain: null,
_events: [Object],
_eventsCount: 15,
_maxListeners: undefined,
_lastId: 14,
_callbacks: Map {},
_connection: [Object],
_targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
_sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
},
_recording: false,
_path: ''
},
_pageBindings: Map {},
_ignoreHTTPSErrors: false,
_screenshotTaskQueue: TaskQueue {
_chain: Promise {
undefined
}
},
_viewport: {
width: 800,
height: 600
}
}
你能告诉我如何使用 Puppeteer 获取 JavaScript 网络事件的所有 URL 吗?
查看拦截图像请求的sample。易于修改以查看其他类型的资源请求:
await page.setRequestInterceptionEnabled(true);
page.on('request', request => {
if (/\.js$/i.test(request.url)) {
// request for js resource
}
request.continue();
});
await page.goto('https://example.com');
遇到这个 post 并且 SetRequestInterceptionEnabled 已重命名为
page.setRequestInterception(value)
这是我在文档中找到的一段代码:
const puppeteer = require('puppeteer');
puppeteer.launch().then(async browser => {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', interceptedRequest => {
if (interceptedRequest.url.endsWith('.png') || interceptedRequest.url.endsWith('.jpg'))
interceptedRequest.abort();
else
interceptedRequest.continue();
});
await page.goto('https://example.com');
await browser.close();
});
NOTE Enabling request interception disables page caching.
这里是 puppeteer 文档的 URL:Puppeteer Documentation
我认为使用 Page.on()
个侦听器可以更准确地解决问题,并且不会干扰流量。
类似于:
page.on('request', (req) => console.log(req)); // 'requestFinished' and 'requestFailed' are other options
page.on('response', (res) => console.log(res));