有没有办法让这个 python selenium 代码在无头模式下工作?
Is there a way to make this python selenium code work in headless mode?
所以我之前已经问过这个问题 () 并设法做到了这一点。我终于意识到代码无法正常工作,因为它处于无头模式。
在我之前的 post 中,我还提到我会尝试使用请求来获取文件,但 csv
文件似乎没有 link在这种情况下。
代码基本上在此处 https://www.macrotrends.net/1476/copper-prices-historical-chart-data,单击“所有年份”按钮,然后单击“下载历史数据”按钮。并且 selenium 在单击后尝试保存文件。
但是就像我说的那样,它只在我处于正常模式时才下载文件,它似乎无法在无头模式下工作。是否有一个原因?有没有办法让它在无头模式下工作?我一直在四处寻找,但找不到答案。
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
start_time = time.time()
options = Options()
#options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.add_experimental_option("prefs", {
"download.default_directory": r"'/home/Documents/testing/macrotrends'",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": False
})
driver = webdriver.Chrome(executable_path=r'/home/chromedriver/chromedriver',options=options)
driver.get('https://www.macrotrends.net/1476/copper-prices-historical-chart-data')
time.sleep(5)
iframe = driver.find_element_by_xpath("//iframe[@id='chart_iframe']")
driver.switch_to.frame(iframe)
xpath = "//a[text()='All Years']"
driver.find_element_by_xpath(xpath).click()
xpath = "//button[@id='dataDownload']"
driver.find_element_by_xpath(xpath).click()
time.sleep(10)
driver.close()
print("--- %s seconds ---" % (time.time() - start_time))
screenshot of the website in chrome
在 headless 模式下默认禁用下载。您可以通过执行这样的开发人员工具命令来允许它们:
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
driver = Chrome(options=options)
params = {'behavior': 'allow', 'downloadPath': '/path/for/download'}
driver.execute_cdp_cmd('Page.setDownloadBehavior', params)
# downloads are now enabled for this driver instance
您可以使用模块 pyvirtualdisplay
创建虚拟显示,它将被 Chrome
或 Firefox
(没有 headless
)自动使用,它会隐藏 window.
Chrome:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920,1080))
display.start()
start_time = time.time()
options = Options()
###options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.add_experimental_option("prefs", {
"download.default_directory": "/home/Documents/testing/macrotrends", # without `r` and `' '`, only `" "`
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": False
})
driver = webdriver.Chrome(executable_path=r'/home/chromedriver/chromedriver',options=options)
#driver = webdriver.Chrome(options=options) # I have chromedriver's folder in PATH so I don't have to use `executable_path`
driver.get('https://www.macrotrends.net/1476/copper-prices-historical-chart-data')
print('[INFO] loaded', time.time() - start_time)
time.sleep(5)
iframe = driver.find_element_by_xpath("//iframe[@id='chart_iframe']")
driver.switch_to.frame(iframe)
print('[INFO] switched', time.time() - start_time)
xpath = "//a[text()='All Years']"
driver.find_element_by_xpath(xpath).click()
xpath = "//button[@id='dataDownload']"
driver.find_element_by_xpath(xpath).click()
print('[INFO] clicked', time.time() - start_time)
time.sleep(10)
print('[INFO] closing', time.time() - start_time)
driver.close()
display.stop()
print('[INFO] end', time.time() - start_time)
火狐:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
import time
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920,1080))
display.start()
start_time = time.time()
options = Options()
###options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.dir", "/home/Documents/testing/macrotrends") # without `r` and `' '`, only `" "`
options.set_preference("browser.download.useDownloadDir", True)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
driver = webdriver.Firefox(executable_path="...", options=options)
#driver = webdriver.Firefox(options=options) # I have geckondriver's folder in PATH so I don't have to use `executable_path`
driver.get('https://www.macrotrends.net/1476/copper-prices-historical-chart-data')
print('[INFO] loaded', time.time() - start_time)
time.sleep(5)
iframe = driver.find_element_by_xpath("//iframe[@id='chart_iframe']")
driver.switch_to.frame(iframe)
print('[INFO] switched', time.time() - start_time)
xpath = "//a[text()='All Years']"
driver.find_element_by_xpath(xpath).click()
xpath = "//button[@id='dataDownload']"
driver.find_element_by_xpath(xpath).click()
print('[INFO] clicked', time.time() - start_time)
time.sleep(10)
print('[INFO] closing', time.time() - start_time)
driver.close()
display.stop()
print('[INFO] end', time.time() - start_time)
所以我之前已经问过这个问题 (
在我之前的 post 中,我还提到我会尝试使用请求来获取文件,但 csv
文件似乎没有 link在这种情况下。
代码基本上在此处 https://www.macrotrends.net/1476/copper-prices-historical-chart-data,单击“所有年份”按钮,然后单击“下载历史数据”按钮。并且 selenium 在单击后尝试保存文件。
但是就像我说的那样,它只在我处于正常模式时才下载文件,它似乎无法在无头模式下工作。是否有一个原因?有没有办法让它在无头模式下工作?我一直在四处寻找,但找不到答案。
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
start_time = time.time()
options = Options()
#options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.add_experimental_option("prefs", {
"download.default_directory": r"'/home/Documents/testing/macrotrends'",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": False
})
driver = webdriver.Chrome(executable_path=r'/home/chromedriver/chromedriver',options=options)
driver.get('https://www.macrotrends.net/1476/copper-prices-historical-chart-data')
time.sleep(5)
iframe = driver.find_element_by_xpath("//iframe[@id='chart_iframe']")
driver.switch_to.frame(iframe)
xpath = "//a[text()='All Years']"
driver.find_element_by_xpath(xpath).click()
xpath = "//button[@id='dataDownload']"
driver.find_element_by_xpath(xpath).click()
time.sleep(10)
driver.close()
print("--- %s seconds ---" % (time.time() - start_time))
screenshot of the website in chrome
在 headless 模式下默认禁用下载。您可以通过执行这样的开发人员工具命令来允许它们:
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
driver = Chrome(options=options)
params = {'behavior': 'allow', 'downloadPath': '/path/for/download'}
driver.execute_cdp_cmd('Page.setDownloadBehavior', params)
# downloads are now enabled for this driver instance
您可以使用模块 pyvirtualdisplay
创建虚拟显示,它将被 Chrome
或 Firefox
(没有 headless
)自动使用,它会隐藏 window.
Chrome:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920,1080))
display.start()
start_time = time.time()
options = Options()
###options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.add_experimental_option("prefs", {
"download.default_directory": "/home/Documents/testing/macrotrends", # without `r` and `' '`, only `" "`
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": False
})
driver = webdriver.Chrome(executable_path=r'/home/chromedriver/chromedriver',options=options)
#driver = webdriver.Chrome(options=options) # I have chromedriver's folder in PATH so I don't have to use `executable_path`
driver.get('https://www.macrotrends.net/1476/copper-prices-historical-chart-data')
print('[INFO] loaded', time.time() - start_time)
time.sleep(5)
iframe = driver.find_element_by_xpath("//iframe[@id='chart_iframe']")
driver.switch_to.frame(iframe)
print('[INFO] switched', time.time() - start_time)
xpath = "//a[text()='All Years']"
driver.find_element_by_xpath(xpath).click()
xpath = "//button[@id='dataDownload']"
driver.find_element_by_xpath(xpath).click()
print('[INFO] clicked', time.time() - start_time)
time.sleep(10)
print('[INFO] closing', time.time() - start_time)
driver.close()
display.stop()
print('[INFO] end', time.time() - start_time)
火狐:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
import time
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920,1080))
display.start()
start_time = time.time()
options = Options()
###options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.dir", "/home/Documents/testing/macrotrends") # without `r` and `' '`, only `" "`
options.set_preference("browser.download.useDownloadDir", True)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
driver = webdriver.Firefox(executable_path="...", options=options)
#driver = webdriver.Firefox(options=options) # I have geckondriver's folder in PATH so I don't have to use `executable_path`
driver.get('https://www.macrotrends.net/1476/copper-prices-historical-chart-data')
print('[INFO] loaded', time.time() - start_time)
time.sleep(5)
iframe = driver.find_element_by_xpath("//iframe[@id='chart_iframe']")
driver.switch_to.frame(iframe)
print('[INFO] switched', time.time() - start_time)
xpath = "//a[text()='All Years']"
driver.find_element_by_xpath(xpath).click()
xpath = "//button[@id='dataDownload']"
driver.find_element_by_xpath(xpath).click()
print('[INFO] clicked', time.time() - start_time)
time.sleep(10)
print('[INFO] closing', time.time() - start_time)
driver.close()
display.stop()
print('[INFO] end', time.time() - start_time)