如何检查网络流量并获取资源请求的URL?

How to inspect network traffic and get the URL of resource requests?

我想监控一个页面的网络并获取所有JavaScript网络事件的URL,类似于PhantomJS'page.onResourceRequested所做的事情,但我无法弄清楚如何用 Google Chrome 的 Puppeteer 做到这一点。

我一直在尝试 Google Chrome's puppeteer,但我不知道如何让它工作,因为它的输出看起来像这样:

Page {
    domain: null,
    _events: {
        request: [Function]
    },
    _eventsCount: 1,
    _maxListeners: undefined,
    _client: Session {
        domain: null,
        _events: {
            'Page.frameAttached': [Function],
            'Page.frameNavigated': [Function],
            'Page.frameDetached': [Function],
            'Runtime.executionContextCreated': [Function],
            'Network.requestWillBeSent': [Function: bound _onRequestWillBeSent],
            'Network.requestIntercepted': [Function: bound _onRequestIntercepted],
            'Network.responseReceived': [Function: bound _onResponseReceived],
            'Network.loadingFinished': [Function: bound _onLoadingFinished],
            'Network.loadingFailed': [Function: bound _onLoadingFailed],
            'Page.loadEventFired': [Function],
            'Runtime.consoleAPICalled': [Function],
            'Page.javascriptDialogOpening': [Function],
            'Runtime.exceptionThrown': [Function],
            'Security.certificateError': [Function],
            'Inspector.targetCrashed': [Function]
        },
        _eventsCount: 15,
        _maxListeners: undefined,
        _lastId: 14,
        _callbacks: Map {},
        _connection: Connection {
            domain: null,
            _events: {},
            _eventsCount: 0,
            _maxListeners: undefined,
            _url: 'ws://127.0.0.1:65343/devtools/browser/ca214df4-4357-4b8f-8552-a1524d6652ff',
            _lastId: 17,
            _callbacks: Map {},
            _delay: 0,
            _ws: [Object],
            _sessions: [Object]
        },
        _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
        _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
    },
    _keyboard: Keyboard {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _modifiers: 0,
        _pressedKeys: Set {}
    },
    _mouse: Mouse {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _keyboard: Keyboard {
            _client: [Object],
            _modifiers: 0,
            _pressedKeys: Set {}
        },
        _x: 0,
        _y: 0,
        _button: 'none'
    },
    _frameManager: FrameManager {
        domain: null,
        _events: {
            frameattached: [Function],
            framedetached: [Function],
            framenavigated: [Function]
        },
        _eventsCount: 3,
        _maxListeners: undefined,
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _mouse: Mouse {
            _client: [Object],
            _keyboard: [Object],
            _x: 0,
            _y: 0,
            _button: 'none'
        },
        _frames: Map {
            '232.1' => [Object]
        },
        _mainFrame: Frame {
            _client: [Object],
            _mouse: [Object],
            _parentFrame: null,
            _url: 'http://mytestdomain.com/',
            _id: '232.1',
            _defaultContextId: 4,
            _waitTasks: Set {},
            _childFrames: Set {},
            _name: undefined,
            _loadingFailed: false
        }
    },
    _networkManager: NetworkManager {
        domain: null,
        _events: {
            request: [Function],
            response: [Function],
            requestfailed: [Function],
            requestfinished: [Function]
        },
        _eventsCount: 4,
        _maxListeners: undefined,
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _requestIdToRequest: Map {},
        _interceptionIdToRequest: Map {
            null => [Object], 'id-1' => [Object], 'id-2' => [Object], 'id-3' => [Object]
        },
        _extraHTTPHeaders: Map {},
        _requestInterceptionEnabled: true,
        _requestHashToRequestIds: Multimap {
            _map: [Object]
        },
        _requestHashToInterceptions: Multimap {
            _map: Map {}
        }
    },
    _emulationManager: EmulationManager {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _emulatingMobile: false,
        _injectedTouchScriptId: null
    },
    _tracing: Tracing {
        _client: Session {
            domain: null,
            _events: [Object],
            _eventsCount: 15,
            _maxListeners: undefined,
            _lastId: 14,
            _callbacks: Map {},
            _connection: [Object],
            _targetId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923',
            _sessionId: 'bbd35bf4-d3ce-4497-a2a0-8cc98b4f0923:1'
        },
        _recording: false,
        _path: ''
    },
    _pageBindings: Map {},
    _ignoreHTTPSErrors: false,
    _screenshotTaskQueue: TaskQueue {
        _chain: Promise {
            undefined
        }
    },
    _viewport: {
        width: 800,
        height: 600
    }
}

你能告诉我如何使用 Puppeteer 获取 JavaScript 网络事件的所有 URL 吗?

查看拦截图像请求的sample。易于修改以查看其他类型的资源请求:

await page.setRequestInterceptionEnabled(true);
page.on('request', request => {
  if (/\.js$/i.test(request.url)) {
    // request for js resource
  }
  request.continue();
});
await page.goto('https://example.com');

遇到这个 post 并且 SetRequestInterceptionEnabled 已重命名为

page.setRequestInterception(value)

这是我在文档中找到的一段代码:

const puppeteer = require('puppeteer');

puppeteer.launch().then(async browser => {
  const page = await browser.newPage();
  await page.setRequestInterception(true);
  page.on('request', interceptedRequest => {
    if (interceptedRequest.url.endsWith('.png') || interceptedRequest.url.endsWith('.jpg'))
      interceptedRequest.abort();
    else
      interceptedRequest.continue();
  });
  await page.goto('https://example.com');
  await browser.close();
});

NOTE Enabling request interception disables page caching.

这里是 puppeteer 文档的 URL:Puppeteer Documentation

我认为使用 Page.on() 个侦听器可以更准确地解决问题,并且不会干扰流量。

类似于:

page.on('request', (req) => console.log(req)); // 'requestFinished' and 'requestFailed' are other options
page.on('response', (res) => console.log(res));