元素在无头模式下不可点击
Element isn't clickable in headless mode
我的代码非常简单:点击 href link 下载文件。它工作正常,直到我添加无头参数,然后单击它不会执行任何操作。不确定这是 Selenium 问题还是 Chromedriver 问题? None 我在网上找到的解决方案很有帮助,因此我们将不胜感激。这是我的代码:
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
class Scraper(object):
def __init__(self, cursor):
self.driver = None
def create_driver(self):
# Set up Headless Chrome
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--window-size=1920x1080")
self.driver = webdriver.Chrome(executable_path=os.path.abspath("path to chromedriver"),
chrome_options=chrome_options)
self.driver.maximize_window()
def go_to_website(self):
self.driver.get('https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6202.0Nov%202019?OpenDocument')
link_to_click = self.driver.find_element_by_xpath("//a[contains(@href,'/log?openagent&6202012.xls&6202.0')]")
link_to_click.click()
def run(self):
# set a new driver
self.create_driver()
self.go_to_website()
如果您的用例是单击文本为 ...Table 的元素的 .xls 元素 12. 劳动力状况性别、州和地区 - 趋势、季节性调整和原始... 您为 element_to_be_clickable()
诱导 WebDriverWait 并且您可以使用以下任一项 :
使用CSS_SELECTOR
:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div#details tbody tr:nth-of-type(13) td>a>img"))).click()
使用XPATH
:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//tr[@class='listentry']/td[contains(., 'Labour force status by Sex, State and Territory - Trend, Seasonally adjusted and Original')]//following::td[1]/a/img"))).click()
注意:您必须添加以下导入:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
更新
但是,通过 定位器策略 单击元素可能不会启动下载。使用 google-chrome-headless you have to configure Page.setDownloadBehavior
through execute_cdp_cmd()
and you can find a detailed discussion in Download file through Google Chrome in headless mode
开始下载
在 chromedriver 中使用无头模式时必须指定下载路径。您还必须等到文件下载完毕。在下面的代码中,您可以找到如何等待文件下载的简单示例。我使用正则表达式来获取文件的名称。
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
download_path = "your_download_path"
options = webdriver.ChromeOptions()
prefs = {
"profile.default_content_settings.popups": 0,
"download.prompt_for_download": False,
"download.directory_upgrade ": True,
'download.default_directory': download_path,
}
options.add_experimental_option('prefs', prefs)
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.headless = True
driver = webdriver.Chrome(options=options)
driver.set_window_size(1920, 1080)
driver.maximize_window()
wait = WebDriverWait(driver, 10)
spreadsheet_name = "Table 12. Labour force status by Sex, State and Territory - Trend, Seasonally adjusted and Original"
excel_xpath = f"//tr[contains(., '{spreadsheet_name}') and @class='listentry']//a[./img[contains(@alt, 'Excel')]]"
with driver:
driver.get('https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6202.0Nov%202019?OpenDocument')
download_button = wait.until(EC.element_to_be_clickable((By.XPATH, excel_xpath)))
href = download_button.get_attribute("href")
# href of the file
# https://www.abs.gov.au/ausstats/meisubs.nsf/log?openagent&6202012.xls&6202.0&Time%20Series%20Spreadsheet&053D25DD395DF901CA2584D4001C70A5&0&Nov%202019&19.12.2019&Latest"
file_name = re.findall(r"(?<=openagent&)(.*?)(?=&)", href)[0]
download_button.click()
for i in range(60):
if not os.path.exists(f"{download_path}/{file_name}"):
time.sleep(1)
if not os.path.exists(f"{download_path}/{file_name}"):
print("Failed to download", file_name, href)
我的代码非常简单:点击 href link 下载文件。它工作正常,直到我添加无头参数,然后单击它不会执行任何操作。不确定这是 Selenium 问题还是 Chromedriver 问题? None 我在网上找到的解决方案很有帮助,因此我们将不胜感激。这是我的代码:
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
class Scraper(object):
def __init__(self, cursor):
self.driver = None
def create_driver(self):
# Set up Headless Chrome
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--window-size=1920x1080")
self.driver = webdriver.Chrome(executable_path=os.path.abspath("path to chromedriver"),
chrome_options=chrome_options)
self.driver.maximize_window()
def go_to_website(self):
self.driver.get('https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6202.0Nov%202019?OpenDocument')
link_to_click = self.driver.find_element_by_xpath("//a[contains(@href,'/log?openagent&6202012.xls&6202.0')]")
link_to_click.click()
def run(self):
# set a new driver
self.create_driver()
self.go_to_website()
如果您的用例是单击文本为 ...Table 的元素的 .xls 元素 12. 劳动力状况性别、州和地区 - 趋势、季节性调整和原始... 您为 element_to_be_clickable()
诱导 WebDriverWait 并且您可以使用以下任一项
使用
CSS_SELECTOR
:WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div#details tbody tr:nth-of-type(13) td>a>img"))).click()
使用
XPATH
:WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//tr[@class='listentry']/td[contains(., 'Labour force status by Sex, State and Territory - Trend, Seasonally adjusted and Original')]//following::td[1]/a/img"))).click()
注意:您必须添加以下导入:
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC
更新
但是,通过 定位器策略 单击元素可能不会启动下载。使用 google-chrome-headless you have to configure Page.setDownloadBehavior
through execute_cdp_cmd()
and you can find a detailed discussion in Download file through Google Chrome in headless mode
在 chromedriver 中使用无头模式时必须指定下载路径。您还必须等到文件下载完毕。在下面的代码中,您可以找到如何等待文件下载的简单示例。我使用正则表达式来获取文件的名称。
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import re
download_path = "your_download_path"
options = webdriver.ChromeOptions()
prefs = {
"profile.default_content_settings.popups": 0,
"download.prompt_for_download": False,
"download.directory_upgrade ": True,
'download.default_directory': download_path,
}
options.add_experimental_option('prefs', prefs)
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.headless = True
driver = webdriver.Chrome(options=options)
driver.set_window_size(1920, 1080)
driver.maximize_window()
wait = WebDriverWait(driver, 10)
spreadsheet_name = "Table 12. Labour force status by Sex, State and Territory - Trend, Seasonally adjusted and Original"
excel_xpath = f"//tr[contains(., '{spreadsheet_name}') and @class='listentry']//a[./img[contains(@alt, 'Excel')]]"
with driver:
driver.get('https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/6202.0Nov%202019?OpenDocument')
download_button = wait.until(EC.element_to_be_clickable((By.XPATH, excel_xpath)))
href = download_button.get_attribute("href")
# href of the file
# https://www.abs.gov.au/ausstats/meisubs.nsf/log?openagent&6202012.xls&6202.0&Time%20Series%20Spreadsheet&053D25DD395DF901CA2584D4001C70A5&0&Nov%202019&19.12.2019&Latest"
file_name = re.findall(r"(?<=openagent&)(.*?)(?=&)", href)[0]
download_button.click()
for i in range(60):
if not os.path.exists(f"{download_path}/{file_name}"):
time.sleep(1)
if not os.path.exists(f"{download_path}/{file_name}"):
print("Failed to download", file_name, href)