如何获得 table 及其与 Python/Selenium 的元素

How to get table and it's element with Python/Selenium

我正在尝试在 URL 获取 table 中的所有价格: https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01 table 元素是具有相关价格的日期。

这就是我想要获得 table:

#Attempt 1
week = table.find_element(By.CLASS_NAME, "BpkCalendarGrid_bpk-calendar-grid__NzBmM month-view-grid--data-loaded")

#Attempt 2
table = driver.find_element(by=By.XPATH, value="Xpath copied using Crhome inspector"

但是我无法得到它。 从这个 table 中提取所有价格的正确方法是什么?谢谢!

您可以使用带有 pandas DataFrame 的 selenium 获取 table 数据,这意味着所有价格。 table 数据 prices

存在两个 table
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.chrome.options import Options

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC


option = webdriver.ChromeOptions()
option.add_argument("start-maximized")

#chrome to stay open
option.add_experimental_option("detach", True)

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01')


table = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '(//table)[1]'))).get_attribute("outerHTML")
table_2 = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '(//table)[2]'))).get_attribute("outerHTML")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="acceptCookieButton"]'))).click()

df1 = pd.read_html(table)[0]
print(df1)

df2 = pd.read_html(table_2)[0]
print(df2)

输出:

  lun     mar     mer     gio     ven     sab     dom
0   1€ 40   2€ 28   3€ 32   4€ 37   5€ 34   6€ 35   7€ 34
1   8€ 34   9€ 28  10€ 27  11€ 26  12€ 26  13€ 46  14€ 35
2  15€ 35  16€ 40  17€ 36  18€ 51  19€ 28  20€ 33  21€ 36
3  22€ 38  23€ 38  24€ 30  25€ 50  26€ 43  27€ 50  28€ 51
4  29€ 38  30€ 36  31€ 58      1-      2-      3-      4-
5      5-      6-      7-      8-      9-     10-     11-
      lun     mar     mer     gio     ven     sab     dom
0   1€ 40   2€ 28   3€ 32   4€ 37   5€ 34   6€ 35   7€ 34
1   8€ 34   9€ 28  10€ 27  11€ 26  12€ 26  13€ 46  14€ 35
2  15€ 35  16€ 40  17€ 36  18€ 51  19€ 28  20€ 33  21€ 36
3  22€ 38  23€ 38  24€ 30  25€ 50  26€ 43  27€ 50  28€ 51
4  29€ 38  30€ 36  31€ 58      1-      2-      3-      4-
5      5-      6-      7-      8-      9-     10-     11-

webdriverManager

替代方案(Table-1):这样你也可以从table两个中提取价格。

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.chrome.options import Options

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC


option = webdriver.ChromeOptions()
option.add_argument("start-maximized")

#chrome to stay open
option.add_experimental_option("detach", True)

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01')

WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="acceptCookieButton"]'))).click()

table = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, '(//table)[1]/tbody/tr/td')))

for i in table:
    price = i.find_element(By.XPATH,'.//div[@class="price"]').text.replace('€','').strip() 
    print(price)

输出:

39
30
32
37
34
35
34
34
28
27
26
26
46
35
35
40
36
52
29
34
37
39
39
30
50
44
50
52
38
36
58