使用 selenium 的 xpath 问题
xpath issue using selenium
我正在尝试抓取 title
但他们说你的 xpath 是错误的
from selenium.webdriver.common.by import By
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
driver.get(url)
sleep(2)
def searchplace():
vid = driver.find_elements(By.XPATH, "//div[@class='row']")
for item in vid:
title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
print(title)
searchplace()
下面的 xpath 对我有用试一试。如果需要,还可以尝试有效地减少 xpath 长度。
vid = driver.find_elements(By.XPATH, "//div[@class='directory-item directory-item-feature-toggled exhibitor-category']")
for item in vid:
title=item.find_element(By.XPATH, "div[@class='row']/div[2]/div//div[@class='company-info']/div/a/h3")
print(title.text)
您在这里使用了错误的定位器。
每个 vid
块都带有此 XPath://div[contains(@class,'directory-item directory-item-feature-toggled')]
.
这样您的代码将如下所示:
from selenium.webdriver.common.by import By
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
driver.get(url)
sleep(2)
def searchplace():
vid = driver.find_elements(By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")
for item in vid:
title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
print(title)
searchplace()
我建议您使用预期条件显式等待而不是硬编码暂停。
有了它,您的代码将是:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
wait = WebDriverWait(driver, 20)
driver.get(url)
wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")))
sleep(0.3) #leaved short delay to make sure not only the first item got visible
def searchplace():
vid = driver.find_elements(By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")
for item in vid:
title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
print(title)
searchplace()
我正在尝试抓取 title
但他们说你的 xpath 是错误的
from selenium.webdriver.common.by import By
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
driver.get(url)
sleep(2)
def searchplace():
vid = driver.find_elements(By.XPATH, "//div[@class='row']")
for item in vid:
title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
print(title)
searchplace()
下面的 xpath 对我有用试一试。如果需要,还可以尝试有效地减少 xpath 长度。
vid = driver.find_elements(By.XPATH, "//div[@class='directory-item directory-item-feature-toggled exhibitor-category']")
for item in vid:
title=item.find_element(By.XPATH, "div[@class='row']/div[2]/div//div[@class='company-info']/div/a/h3")
print(title.text)
您在这里使用了错误的定位器。
每个 vid
块都带有此 XPath://div[contains(@class,'directory-item directory-item-feature-toggled')]
.
这样您的代码将如下所示:
from selenium.webdriver.common.by import By
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
driver.get(url)
sleep(2)
def searchplace():
vid = driver.find_elements(By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")
for item in vid:
title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
print(title)
searchplace()
我建议您使用预期条件显式等待而不是硬编码暂停。
有了它,您的代码将是:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from time import sleep
PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
wait = WebDriverWait(driver, 20)
driver.get(url)
wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")))
sleep(0.3) #leaved short delay to make sure not only the first item got visible
def searchplace():
vid = driver.find_elements(By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")
for item in vid:
title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
print(title)
searchplace()