Chrome 无头下载pdf

Chrome headless download pdf

我有一个脚本可以从一个每月更新的网站下载 pdf,我想自动执行此操作。它可以工作,但我无法让它无头工作,我认为这是因为它没有正确处理下载。它似乎以无头方式启动 chrome 并且我的导航命令似乎有效,但是当它去下载时没有任何反应。

#!/usr/bin/env ruby
#
require 'capybara'
require 'rb-inotify'
require 'webdrivers/chromedriver'

def initialise
  Capybara.register_driver :chrome do |app|
    Capybara::Selenium::Driver.new(app, :browser => :chrome, options: chrome_options)
  end
  @session = Capybara::Session.new(:chrome)
end

# Settings and profile for the Chrome Browser
# NOTE: still cannot get headless working
def chrome_options
  opts = Selenium::WebDriver::Chrome::Options.new
  opts.add_argument('--headless') unless ENV['UI']
  opts.add_argument('--no-sandbox')
  opts.add_argument('--disable-gpu')
  opts.add_argument('--disable-dev-shm-usage')
  opts.add_argument('--window-size=1920,1080')

  opts.add_preference(:download,
                      directory_upgrade: true,
                      prompt_for_download: false,
                      default_directory: "~/Downloads")

  opts.add_preference(:plugins,
                      plugins_disabled: ["Chrome PDF Viewer"])

  opts.add_preference(:browser, set_download_behavior: { behavior: 'allow' })
  opts
end

更新 我正在使用 Chrome 版本 81.0.4044.113-1

在 Chrome 和 selenium-webdriver 的不同版本中,下载工作所需的设置有 changed/grown。看来你少了一个。

opts.add_preference('download.default_directory', '~/Downloads')

根据版本,您还可以做的另一件事是

def initialise
  Capybara.register_driver :chrome do |app|
    Capybara::Selenium::Driver.new(app, :browser => :chrome, options: chrome_options).tap do |driver|
      driver.browser.download_path = '~/Downloads'
  end
  @session = Capybara::Session.new(:chrome)
end

我在你的代码中添加了一些配置,可能会起作用:

#!/usr/bin/env ruby
#
require 'capybara'
require 'rb-inotify'
require 'webdrivers/chromedriver'

def initialise
  Capybara.register_driver :chrome do |app|
    Capybara::Selenium::Driver.new(app, :browser => :chrome, options: chrome_options)
  end
  @session = Capybara::Session.new(:chrome)
end

# Settings and profile for the Chrome Browser
# NOTE: still cannot get headless working
def chrome_options
  download_directory = "~/Downloads"
  opts = Selenium::WebDriver::Chrome::Options.new
  opts.add_argument('--headless') unless ENV['UI']
  opts.add_argument('--no-sandbox')
  opts.add_argument('--disable-gpu')
  opts.add_argument('--disable-dev-shm-usage')
  opts.add_argument('--window-size=1920,1080')

  opts.add_preference(:download,
                      directory_upgrade: true,
                      prompt_for_download: false,
                      default_directory: download_directory)
  opts.add_preference(:browser, set_download_behavior: { behavior: 'allow' })

  driver = Capybara::Selenium::Driver.new(app, browser: :chrome,
                                          options: options)
  bridge = driver.browser.send(:bridge)

  path = '/session/:session_id/chromium/send_command'
  path[':session_id'] = bridge.session_id

  bridge.http.call(:post, path, cmd: 'Page.setDownloadBehavior',
                   params: {
                       behavior: 'allow',
                       downloadPath: download_directory
                   })
  driver
  opts.add_preference(:plugins,
                     always_open_pdf_externally: true)

  opts.add_preference(:browser, set_download_behavior: { behavior: 'allow' })
  opts
end