如何使用 chrome-remote-interface 获取 iframe 内容?

how to get iframe content with chrome-remote-interface?

我正在构建一个爬虫但是我需要爬取 iframe 内容chrome-远程接口不转储 iframe 内容有什么办法可以做到这一点。

代码

     CDP.New({'url':url},(err,target) => {
                if(!err){
                    CDP({target},(client) => {
                        const {Network, Page, Runtime} = client;
                        Network.setUserAgentOverride({'userAgent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36'});
                        Network.enable();
                        Page.enable();
                        Runtime.enable();
                        Page.navigate({url});
                            Page.loadEventFired(() => {
                                Runtime.evaluate({
//I have no idea what to do ..
                                    expression:'document.documentElement',
                                    returnValue:true})
                                    .then(({result}) => {
                                        cb(null,{html:result.value})
                                        CDP.Close({id:target.id})
                                    })
                            })


                    })

我认为这是禁止的,你可以做的是使用 DOM.getDocument 来获取整棵树,你不能将它直接转换为 HTML 字符串:

const {root} = await DOM.getDocument({depth: -1, pierce: true});

使用Chrome启动器

https://www.npmjs.com/package/chrome-launcher

如果您使用 chromeLauncher 并传入 '--disable-web-security' 标志,这实际上很容易做到。这是一个如何设置的示例。

const chromeLauncher = require('chrome-launcher');
const CDP = require('chrome-remote-interface');


let launchChrome = () => {
  console.log('launchChrome..');
  return chromeLauncher.launch({
    chromeFlags: [
      '--disable-web-security', // Query within iframes
    ],
    logLevel: 'error'
  }).catch(function(e) {
    console.log('Error launching chrome: ' + e);
  });
}

let initChrome = async () => {
  console.log('initChrome..');
  const chrome = await launchChrome();
  const protocol = await CDP({port: chrome.port});

  const {Page, Runtime, Network} = protocol;
  const userAgent = 'Mozilla/5.0 (X11; Linux x86_64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36';
  await Promise.all([Page.enable(), Runtime.enable(), Network.setUserAgentOverride({userAgent})]);

  return {chrome: chrome, protocol: protocol, Page: Page, Runtime: Runtime}
}

let run = async () => {

  let {chrome, protocol, Page, Runtime} = await initChrome();

  try {

    await Page.navigate({url: 'https://www.example.com/'});
    await Page.loadEventFired();

    //////////////////////////////////
    // YOU CAN NOW QUERY IN IFRAMES //
    console.log(await Runtime.evaluate({expression: `document.querySelector('iframe')`, returnByValue: true}));
    //////////////////////////////////
    
    console.log('..Finished');
  } catch (err) {
    console.log(err);
  }

  protocol.close();
  chrome.kill();
}