使用 Selenium 在列表中使用 CssSelector 存储所有元素的 id 属性
Storing the id attribute of all elements using CssSelector in a list using Selenium
我这里的 Python selenium 代码将我们带到一个页面,其中包含一些我有兴趣抓取的数据集。这是我的代码。
import time, os
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
options = webdriver.ChromeOptions()
preferences= {"download.default_directory": os.getcwd(), "directory_upgrade": True}
options.add_experimental_option("prefs", preferences)
#options.headless = True
options.add_experimental_option('excludeSwitches', ['enable-logging'])
url = "http://www.ssp.sp.gov.br/transparenciassp/"
# Path of my WebDriver
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
wait = WebDriverWait(driver, 10)
# to maximize the browser window
driver.maximize_window()
#get method to launch the URL
driver.get(url)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#cphBody_btnHomicicio"))).click()
for x in range(18, 19):
year = '#cphBody_lkAno'+str(x)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, year))).click()
for x in range(1, 2):
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#cphBody_lkMes'+str(x)))).click()
到目前为止这是可行的,但我的问题来自此语法的最后一个循环,即使它完全符合我的要求。我想要做的是 select 页面上带有 CSS select 或包含文本“cphBody_lkMes”的所有元素,并将它们放在列表中
['#cphBody_lkMes1','#cphBody_lkMes2', ...]
这样我就可以相应地遍历它们。当前代码显得凌乱。
当我尝试时
for x in range(18, 19):
year = '#cphBody_lkAno'+str(x)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, year))).click()
meses = driver.find_elements_by_css_selector("//*[contains(text(),'cphBody_lkMes')]")
meses
for x in range(1, 2):
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#cphBody_lkMes'+str(x)))).click()
列表显示为空,并显示错误
An invalid or illegal selector was specified.
我该如何解决这个问题?
到 select 页面上具有 CSS select 或其 id
属性以 cphBody_lkMes
开头的所有元素 你需要诱导 WebDriverWait for visibility_of_all_elements_located() and you can use either of the following :
使用CSS_SELECTOR
print([my_elem.get_attribute("id") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "ul.nav.nav-pills.mesNav li > a[id^='cphBody_lkMes']")))])
使用 XPATH:
print([my_elem.get_attribute("id") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//ul[@class='nav nav-pills mesNav']//li/a[starts-with(@id, 'cphBody_lkMes')]")))])
更新
迭代列表:
months = [my_elem.get_attribute("id") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "ul.nav.nav-pills.mesNav li > a[id^='cphBody_lkMes']")))]
for month in months:
print(month)
我在这里看到几个问题:
- 您正在外部和内部
for
循环中定义 x
索引
- 那里应该是
meses = driver.find_elements_by_xpath
而不是 meses = driver.find_elements_by_css_selector
- 最后
cphBody_lkMes
是元素 id
值的子串,而不是 text
值。
所以,这给了我 non-empty meses
列表:
for x in range(18, 19):
year = '#cphBody_lkAno' + str(x)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, year))).click()
meses = driver.find_elements_by_xpath("//*[contains(@id,'cphBody_lkMes')]")
for y in range(1, 2):
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#cphBody_lkMes' + str(y)))).click()
我这里的 Python selenium 代码将我们带到一个页面,其中包含一些我有兴趣抓取的数据集。这是我的代码。
import time, os
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
options = webdriver.ChromeOptions()
preferences= {"download.default_directory": os.getcwd(), "directory_upgrade": True}
options.add_experimental_option("prefs", preferences)
#options.headless = True
options.add_experimental_option('excludeSwitches', ['enable-logging'])
url = "http://www.ssp.sp.gov.br/transparenciassp/"
# Path of my WebDriver
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
wait = WebDriverWait(driver, 10)
# to maximize the browser window
driver.maximize_window()
#get method to launch the URL
driver.get(url)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#cphBody_btnHomicicio"))).click()
for x in range(18, 19):
year = '#cphBody_lkAno'+str(x)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, year))).click()
for x in range(1, 2):
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#cphBody_lkMes'+str(x)))).click()
到目前为止这是可行的,但我的问题来自此语法的最后一个循环,即使它完全符合我的要求。我想要做的是 select 页面上带有 CSS select 或包含文本“cphBody_lkMes”的所有元素,并将它们放在列表中
['#cphBody_lkMes1','#cphBody_lkMes2', ...]
这样我就可以相应地遍历它们。当前代码显得凌乱。
当我尝试时
for x in range(18, 19):
year = '#cphBody_lkAno'+str(x)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, year))).click()
meses = driver.find_elements_by_css_selector("//*[contains(text(),'cphBody_lkMes')]")
meses
for x in range(1, 2):
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#cphBody_lkMes'+str(x)))).click()
列表显示为空,并显示错误
An invalid or illegal selector was specified.
我该如何解决这个问题?
到 select 页面上具有 CSS select 或其 id
属性以 cphBody_lkMes
开头的所有元素 你需要诱导 WebDriverWait for visibility_of_all_elements_located() and you can use either of the following
使用CSS_SELECTOR
print([my_elem.get_attribute("id") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "ul.nav.nav-pills.mesNav li > a[id^='cphBody_lkMes']")))])
使用 XPATH:
print([my_elem.get_attribute("id") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//ul[@class='nav nav-pills mesNav']//li/a[starts-with(@id, 'cphBody_lkMes')]")))])
更新
迭代列表:
months = [my_elem.get_attribute("id") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "ul.nav.nav-pills.mesNav li > a[id^='cphBody_lkMes']")))]
for month in months:
print(month)
我在这里看到几个问题:
- 您正在外部和内部
for
循环中定义x
索引 - 那里应该是
meses = driver.find_elements_by_xpath
而不是meses = driver.find_elements_by_css_selector
- 最后
cphBody_lkMes
是元素id
值的子串,而不是text
值。
所以,这给了我 non-emptymeses
列表:
for x in range(18, 19):
year = '#cphBody_lkAno' + str(x)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, year))).click()
meses = driver.find_elements_by_xpath("//*[contains(@id,'cphBody_lkMes')]")
for y in range(1, 2):
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#cphBody_lkMes' + str(y)))).click()