使用 Python Selenium 抓取空 Webelement 时出现 TimeoutException
Getting TimeoutException when scraping an empty Webelement using Python Selenium
我正在尝试通过循环国家/地区和城市来抓取网页以获取宠物店名称和相应地址。然后将其导出到 Excel 文件。
我发现当有一个城市不包含任何宠物店时,我会收到一条 TimeoutException 消息。我试图避开它,但没有成功。我需要一些帮助。
这是我的代码:(我绕过了状态循环以使其更短)
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
driver_path_location = "C:\Users\aclav\CHROMEDRIVER\chromedriver.exe"
driver = webdriver.Chrome(driver_path_location)
driver.get("https://granplus.com.br/onde-comprar-lojas-fisicas/")
# Getting inside the iFrame
driver.switch_to.frame(driver.find_element_by_xpath("//iframe[@class='cz-map-frame']"))
# Click on Button "Por Cidade"
driver.find_element_by_xpath("//input[@id='searchType_2']//following::span[@class=\
'radio-checkmark'][1]").click()
# Click on Menu "Estado"
driver.find_element_by_css_selector('[name="estado"]').click()
# Select a State
driver.find_element_by_xpath('//option[@value="RN"]').click()
Petshops = []
Addresses = []
# Getting the cities names
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,\
'//select[@name="cidade"]'))).click()
selcidade = driver.find_element_by_xpath('//select[@name="cidade"]')
cidades = selcidade.find_elements_by_tag_name('option')
cidadeList = []
# Recording the cities in cidadeList
for cidade in cidades:
cidadeList.append(cidade.get_attribute("value"))
cidadeList.remove('0') # Remove option 0 "Escolha uma cidade"
# Select a city
for cidadeValue in cidadeList:
selcidade = Select(driver.find_element_by_xpath('//select[@name="cidade"]'))
cidadeSelecionada = selcidade.select_by_value(cidadeValue)
# Getting the Petshops names by city
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
for Petshop in List_Petshops:
Petshops.append(Petshop.text) # Records the Petshops names in a list
# Getting the Petshops addresses
List_Addresses = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH, '//div[@class="p-t-5"]')))
for Address in List_Addresses:
Addresses.append(Address.text) # Armazenar os Enderecos em uma lista
# Transforming in a Data Frame and exporting to Excel
df1 = pd.DataFrame(Petshops, columns=['Petshops'])
df1.to_excel("Petshops.xlsx",sheet_name="Petshops")
df2 = pd.DataFrame(Addresses, columns=['Addresses'])
df2.to_excel("Enderecos.xlsx",sheet_name="Enderecos")
df = df1.merge(df2, left_index=True, right_index=True)
driver.quit()
正如您在代码中提到的,此处:
# Getting the Petshops names by city
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
您正在为每个选定的城市开设宠物店。
您正在等待宠物店出现,超时 10 秒。
那么,如果在那个城市找不到宠物店会怎样呢?
Selenium 将无法在定义的 10 秒超时内找到此类元素,并且将抛出 TimeoutException
。
要解决此问题,您可以使用 try-except
,如下所示:
try:
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
for Address in List_Addresses:
Addresses.append(Address.text) # Armazenar os Enderecos em uma lista
except:
#Do what you think will be logically correct here or at least print
print("No pet shops found in this city")
我正在尝试通过循环国家/地区和城市来抓取网页以获取宠物店名称和相应地址。然后将其导出到 Excel 文件。 我发现当有一个城市不包含任何宠物店时,我会收到一条 TimeoutException 消息。我试图避开它,但没有成功。我需要一些帮助。
这是我的代码:(我绕过了状态循环以使其更短)
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
driver_path_location = "C:\Users\aclav\CHROMEDRIVER\chromedriver.exe"
driver = webdriver.Chrome(driver_path_location)
driver.get("https://granplus.com.br/onde-comprar-lojas-fisicas/")
# Getting inside the iFrame
driver.switch_to.frame(driver.find_element_by_xpath("//iframe[@class='cz-map-frame']"))
# Click on Button "Por Cidade"
driver.find_element_by_xpath("//input[@id='searchType_2']//following::span[@class=\
'radio-checkmark'][1]").click()
# Click on Menu "Estado"
driver.find_element_by_css_selector('[name="estado"]').click()
# Select a State
driver.find_element_by_xpath('//option[@value="RN"]').click()
Petshops = []
Addresses = []
# Getting the cities names
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,\
'//select[@name="cidade"]'))).click()
selcidade = driver.find_element_by_xpath('//select[@name="cidade"]')
cidades = selcidade.find_elements_by_tag_name('option')
cidadeList = []
# Recording the cities in cidadeList
for cidade in cidades:
cidadeList.append(cidade.get_attribute("value"))
cidadeList.remove('0') # Remove option 0 "Escolha uma cidade"
# Select a city
for cidadeValue in cidadeList:
selcidade = Select(driver.find_element_by_xpath('//select[@name="cidade"]'))
cidadeSelecionada = selcidade.select_by_value(cidadeValue)
# Getting the Petshops names by city
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
for Petshop in List_Petshops:
Petshops.append(Petshop.text) # Records the Petshops names in a list
# Getting the Petshops addresses
List_Addresses = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH, '//div[@class="p-t-5"]')))
for Address in List_Addresses:
Addresses.append(Address.text) # Armazenar os Enderecos em uma lista
# Transforming in a Data Frame and exporting to Excel
df1 = pd.DataFrame(Petshops, columns=['Petshops'])
df1.to_excel("Petshops.xlsx",sheet_name="Petshops")
df2 = pd.DataFrame(Addresses, columns=['Addresses'])
df2.to_excel("Enderecos.xlsx",sheet_name="Enderecos")
df = df1.merge(df2, left_index=True, right_index=True)
driver.quit()
正如您在代码中提到的,此处:
# Getting the Petshops names by city
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
您正在为每个选定的城市开设宠物店。
您正在等待宠物店出现,超时 10 秒。
那么,如果在那个城市找不到宠物店会怎样呢?
Selenium 将无法在定义的 10 秒超时内找到此类元素,并且将抛出 TimeoutException
。
要解决此问题,您可以使用 try-except
,如下所示:
try:
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
for Address in List_Addresses:
Addresses.append(Address.text) # Armazenar os Enderecos em uma lista
except:
#Do what you think will be logically correct here or at least print
print("No pet shops found in this city")