使用 selenium 的 xpath 问题

Question

我正在尝试抓取 title 但他们说你的 xpath 是错误的

from selenium.webdriver.common.by import By
from selenium import webdriver
from time import sleep

PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
driver.get(url)
sleep(2)
def searchplace():
    vid = driver.find_elements(By.XPATH, "//div[@class='row']")
    for item in vid:
        title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
        print(title)
searchplace()

Answer 1

下面的 xpath 对我有用试一试。如果需要，还可以尝试有效地减少 xpath 长度。

vid = driver.find_elements(By.XPATH, "//div[@class='directory-item directory-item-feature-toggled exhibitor-category']")
for item in vid:
    title=item.find_element(By.XPATH, "div[@class='row']/div[2]/div//div[@class='company-info']/div/a/h3")
    print(title.text)

Answer 2

您在这里使用了错误的定位器。
每个 vid 块都带有此 XPath：//div[contains(@class,'directory-item directory-item-feature-toggled')].
这样您的代码将如下所示：

from selenium.webdriver.common.by import By
from selenium import webdriver
from time import sleep

PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
driver.get(url)
sleep(2)
def searchplace():
    vid = driver.find_elements(By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")
    for item in vid:
        title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
        print(title)
searchplace()

我建议您使用预期条件显式等待而不是硬编码暂停。
有了它，您的代码将是：

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from time import sleep

PATH="C:\Program Files (x86)\chromedriver.exe"
url='https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html'
driver =webdriver.Chrome(PATH)
wait = WebDriverWait(driver, 20)

driver.get(url)
wait.until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")))

sleep(0.3) #leaved short delay to make sure not only the first item got visible
def searchplace():
    vid = driver.find_elements(By.XPATH, "//div[contains(@class,'directory-item directory-item-feature-toggled')]")
    for item in vid:
        title=item.find_element_by_xpath(".//div[@class='company-info']//h3").text
        print(title)
searchplace()

使用 selenium 的 xpath 问题

xpath issue using selenium

python

selenium

xpath

web-scraping