如何使用 Selenium 从网站上提取图书价格 Python
How to extract the price of the book from website using Selenium Python
我正试图从这本 link 中获取本书的价格(我正在为项目使用 google colab)
这是我编写的代码:
import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
from selenium import webdriver
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
wd = webdriver.Chrome('chromedriver',options=options)
wd.get("https://www.amazon.fr/dp/000101742X")
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
product_title = wd.find_element(By.CLASS_NAME, 'a-size-extra-large')
print(product_title.text)
product_image_url = wd.find_element(By.ID, 'imgBlkFront')
print(product_image_url.get_attribute('src'))
product_price = wd.find_element(By.CLASS_NAME, 'a-size-base a-color-price a-color-price')
print(product_price.text)
当我 运行 代码时,这给我一个错误
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".a-size-base a-color-price a-color-price"}
(Session info: headless chrome=99.0.4844.84)
Stacktrace:
#0 0x561c75bf5b63 <unknown>
#1 0x561c758ebc93 <unknown>
#2 0x561c75921ba0 <unknown>
#3 0x561c75921dc1 <unknown>
#4 0x561c75956267 <unknown>
#5 0x561c7593f33d <unknown>
#6 0x561c75953fac <unknown>
#7 0x561c7593f683 <unknown>
#8 0x561c75915c7c <unknown>
#9 0x561c75917145 <unknown>
#10 0x561c75c19fe0 <unknown>
#11 0x561c75c2b17f <unknown>
#12 0x561c75c2af19 <unknown>
#13 0x561c75c2b6e2 <unknown>
#14 0x561c75c643cb <unknown>
#15 0x561c75c2b941 <unknown>
#16 0x561c75c0ed13 <unknown>
#17 0x561c75c35098 <unknown>
#18 0x561c75c3522a <unknown>
#19 0x561c75c4e711 <unknown>
#20 0x7f37b1a316db <unknown>
我搜索了一下,也尝试了一些其他的方法
wd.implicitly_wait(20)
product_price = wd.find_element(By.CLASS_NAME, 'a-size-base a-color-price a-color-price')
print(product_price.text)
###########################################################
try:
product_price = WebDriverWait(wd, 20).until(
EC.presence_of_element_located((By.CLASS_NAME, 'a-size-base a-color-price a-color-price'))
) # wd.find_element(By.CLASS_NAME, 'a-size-base a-color-price a-color-price')
print(product_price.text)
finally:
wd.quit()
#################################################
button = wd.find_element(By.CLASS_NAME, 'a-button a-button-selected a-spacing-mini a-button-toggle format')
button.click()
product_price = wd.find_element(By.CLASS_NAME, 'a-size-base a-color-price a-color-price')
print(product_price.text)
所有这些方法都给我错误。有人可以帮忙吗?谢谢。
要打印价格文本,即 23,72,您需要引入 WebDriverWait for the and you can use either of the following :
使用 XPATH 和 text 属性:
driver.get("https://www.amazon.fr/dp/000101742X")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='accept']"))).click()
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[text()='Partition']//following::span[1]"))).text.split(" ")[3])
driver.quit()
控制台输出:
23,72
注意:您必须添加以下导入:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
You can find a relevant discussion in
我正试图从这本 link 中获取本书的价格(我正在为项目使用 google colab)
这是我编写的代码:
import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
from selenium import webdriver
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
wd = webdriver.Chrome('chromedriver',options=options)
wd.get("https://www.amazon.fr/dp/000101742X")
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
product_title = wd.find_element(By.CLASS_NAME, 'a-size-extra-large')
print(product_title.text)
product_image_url = wd.find_element(By.ID, 'imgBlkFront')
print(product_image_url.get_attribute('src'))
product_price = wd.find_element(By.CLASS_NAME, 'a-size-base a-color-price a-color-price')
print(product_price.text)
当我 运行 代码时,这给我一个错误
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".a-size-base a-color-price a-color-price"}
(Session info: headless chrome=99.0.4844.84)
Stacktrace:
#0 0x561c75bf5b63 <unknown>
#1 0x561c758ebc93 <unknown>
#2 0x561c75921ba0 <unknown>
#3 0x561c75921dc1 <unknown>
#4 0x561c75956267 <unknown>
#5 0x561c7593f33d <unknown>
#6 0x561c75953fac <unknown>
#7 0x561c7593f683 <unknown>
#8 0x561c75915c7c <unknown>
#9 0x561c75917145 <unknown>
#10 0x561c75c19fe0 <unknown>
#11 0x561c75c2b17f <unknown>
#12 0x561c75c2af19 <unknown>
#13 0x561c75c2b6e2 <unknown>
#14 0x561c75c643cb <unknown>
#15 0x561c75c2b941 <unknown>
#16 0x561c75c0ed13 <unknown>
#17 0x561c75c35098 <unknown>
#18 0x561c75c3522a <unknown>
#19 0x561c75c4e711 <unknown>
#20 0x7f37b1a316db <unknown>
我搜索了一下,也尝试了一些其他的方法
wd.implicitly_wait(20)
product_price = wd.find_element(By.CLASS_NAME, 'a-size-base a-color-price a-color-price')
print(product_price.text)
###########################################################
try:
product_price = WebDriverWait(wd, 20).until(
EC.presence_of_element_located((By.CLASS_NAME, 'a-size-base a-color-price a-color-price'))
) # wd.find_element(By.CLASS_NAME, 'a-size-base a-color-price a-color-price')
print(product_price.text)
finally:
wd.quit()
#################################################
button = wd.find_element(By.CLASS_NAME, 'a-button a-button-selected a-spacing-mini a-button-toggle format')
button.click()
product_price = wd.find_element(By.CLASS_NAME, 'a-size-base a-color-price a-color-price')
print(product_price.text)
所有这些方法都给我错误。有人可以帮忙吗?谢谢。
要打印价格文本,即 23,72,您需要引入 WebDriverWait for the
使用 XPATH 和 text 属性:
driver.get("https://www.amazon.fr/dp/000101742X") WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='accept']"))).click() print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[text()='Partition']//following::span[1]"))).text.split(" ")[3]) driver.quit()
控制台输出:
23,72
注意:您必须添加以下导入:
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC
You can find a relevant discussion in