如何获得 table 及其与 Python/Selenium 的元素
How to get table and it's element with Python/Selenium
我正在尝试在 URL 获取 table 中的所有价格:
https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01
table 元素是具有相关价格的日期。
这就是我想要获得 table:
#Attempt 1
week = table.find_element(By.CLASS_NAME, "BpkCalendarGrid_bpk-calendar-grid__NzBmM month-view-grid--data-loaded")
#Attempt 2
table = driver.find_element(by=By.XPATH, value="Xpath copied using Crhome inspector"
但是我无法得到它。
从这个 table 中提取所有价格的正确方法是什么?谢谢!
您可以使用带有 pandas DataFrame 的 selenium 获取 table 数据,这意味着所有价格。 table 数据 prices
存在两个 table
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
option = webdriver.ChromeOptions()
option.add_argument("start-maximized")
#chrome to stay open
option.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01')
table = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '(//table)[1]'))).get_attribute("outerHTML")
table_2 = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '(//table)[2]'))).get_attribute("outerHTML")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="acceptCookieButton"]'))).click()
df1 = pd.read_html(table)[0]
print(df1)
df2 = pd.read_html(table_2)[0]
print(df2)
输出:
lun mar mer gio ven sab dom
0 1€ 40 2€ 28 3€ 32 4€ 37 5€ 34 6€ 35 7€ 34
1 8€ 34 9€ 28 10€ 27 11€ 26 12€ 26 13€ 46 14€ 35
2 15€ 35 16€ 40 17€ 36 18€ 51 19€ 28 20€ 33 21€ 36
3 22€ 38 23€ 38 24€ 30 25€ 50 26€ 43 27€ 50 28€ 51
4 29€ 38 30€ 36 31€ 58 1- 2- 3- 4-
5 5- 6- 7- 8- 9- 10- 11-
lun mar mer gio ven sab dom
0 1€ 40 2€ 28 3€ 32 4€ 37 5€ 34 6€ 35 7€ 34
1 8€ 34 9€ 28 10€ 27 11€ 26 12€ 26 13€ 46 14€ 35
2 15€ 35 16€ 40 17€ 36 18€ 51 19€ 28 20€ 33 21€ 36
3 22€ 38 23€ 38 24€ 30 25€ 50 26€ 43 27€ 50 28€ 51
4 29€ 38 30€ 36 31€ 58 1- 2- 3- 4-
5 5- 6- 7- 8- 9- 10- 11-
替代方案(Table-1):这样你也可以从table两个中提取价格。
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
option = webdriver.ChromeOptions()
option.add_argument("start-maximized")
#chrome to stay open
option.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="acceptCookieButton"]'))).click()
table = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, '(//table)[1]/tbody/tr/td')))
for i in table:
price = i.find_element(By.XPATH,'.//div[@class="price"]').text.replace('€','').strip()
print(price)
输出:
39
30
32
37
34
35
34
34
28
27
26
26
46
35
35
40
36
52
29
34
37
39
39
30
50
44
50
52
38
36
58
我正在尝试在 URL 获取 table 中的所有价格:
https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01
table 元素是具有相关价格的日期。
这就是我想要获得 table:
#Attempt 1
week = table.find_element(By.CLASS_NAME, "BpkCalendarGrid_bpk-calendar-grid__NzBmM month-view-grid--data-loaded")
#Attempt 2
table = driver.find_element(by=By.XPATH, value="Xpath copied using Crhome inspector"
但是我无法得到它。 从这个 table 中提取所有价格的正确方法是什么?谢谢!
您可以使用带有 pandas DataFrame 的 selenium 获取 table 数据,这意味着所有价格。 table 数据 prices
存在两个 tableimport pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
option = webdriver.ChromeOptions()
option.add_argument("start-maximized")
#chrome to stay open
option.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01')
table = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '(//table)[1]'))).get_attribute("outerHTML")
table_2 = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '(//table)[2]'))).get_attribute("outerHTML")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="acceptCookieButton"]'))).click()
df1 = pd.read_html(table)[0]
print(df1)
df2 = pd.read_html(table_2)[0]
print(df2)
输出:
lun mar mer gio ven sab dom
0 1€ 40 2€ 28 3€ 32 4€ 37 5€ 34 6€ 35 7€ 34
1 8€ 34 9€ 28 10€ 27 11€ 26 12€ 26 13€ 46 14€ 35
2 15€ 35 16€ 40 17€ 36 18€ 51 19€ 28 20€ 33 21€ 36
3 22€ 38 23€ 38 24€ 30 25€ 50 26€ 43 27€ 50 28€ 51
4 29€ 38 30€ 36 31€ 58 1- 2- 3- 4-
5 5- 6- 7- 8- 9- 10- 11-
lun mar mer gio ven sab dom
0 1€ 40 2€ 28 3€ 32 4€ 37 5€ 34 6€ 35 7€ 34
1 8€ 34 9€ 28 10€ 27 11€ 26 12€ 26 13€ 46 14€ 35
2 15€ 35 16€ 40 17€ 36 18€ 51 19€ 28 20€ 33 21€ 36
3 22€ 38 23€ 38 24€ 30 25€ 50 26€ 43 27€ 50 28€ 51
4 29€ 38 30€ 36 31€ 58 1- 2- 3- 4-
5 5- 6- 7- 8- 9- 10- 11-
替代方案(Table-1):这样你也可以从table两个中提取价格。
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
option = webdriver.ChromeOptions()
option.add_argument("start-maximized")
#chrome to stay open
option.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=option)
driver.get('https://www.skyscanner.it/trasporti/voli/bud/rome/?adults=1&adultsv2=1&cabinclass=economy&children=0&childrenv2=&destinationentityid=27539793&inboundaltsenabled=true&infants=0&iym=2208&originentityid=27539604&outboundaltsenabled=true&oym=2208&preferdirects=false&ref=home&rtn=1&selectedoday=01&selectediday=01')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="acceptCookieButton"]'))).click()
table = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, '(//table)[1]/tbody/tr/td')))
for i in table:
price = i.find_element(By.XPATH,'.//div[@class="price"]').text.replace('€','').strip()
print(price)
输出:
39
30
32
37
34
35
34
34
28
27
26
26
46
35
35
40
36
52
29
34
37
39
39
30
50
44
50
52
38
36
58