如何使用 selenium python 从 table 捕获数据?
How to capture data from table using selenium python?
我需要从 link:
中捕获 table
https://fr.tradingeconomics.com/country-list/rating
我尝试了以下代码,但没有得到任何响应
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
my_url= "https://fr.tradingeconomics.com/country-list/rating"
driver.get(my_url)
#actions = ActionChains(driver)
WebDriverWait(driver, 50).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "table table-hover")))
trs = driver.find_elements(By.TAG_NAME, "tr")
print(len(trs))
countries = []
for tr in trs:
country = {}
items= tr.find_elements(By.TAG_NAME, "td")
for item in items:
country_name = item.find_element(By.XPATH, "//*[@id='ctl00_ContentPlaceHolder1_ctl01_GridView1']/tbody/tr[2]/td[1]")
country['country_name'] = country_name.get_attribute('text')
s_and_p = item.find_element(By.XPATH, "//*[@id='ctl00_ContentPlaceHolder1_ctl01_GridView1']/tbody/tr[2]/td[2]")
country['S&P']= s_and_p.get_attribute("text")
moodys = item.find_element(By.XPATH, "//*[@id='ctl00_ContentPlaceHolder1_ctl01_GridView1']/tbody/tr[2]/td[3]")
country['Moody\'s'] = moodys.get_attribute("text")
countries.append(country)
print(country)
如有任何帮助,我们将不胜感激。谢谢。
由于 url 不是动态的,因此您也可以仅使用 pandas 轻松获取 table data
。
import pandas as pd
url='https://fr.tradingeconomics.com/country-list/rating'
df = pd.read_html(url)[0]
print(df)
输出:
Unnamed: 0 S&P Moody's Fitch DBRS TE
0 Albanie B+ B1 NaN NaN 35.0
1 Andorre BBB Baa2 BBB+ NaN 62.0
2 Angola B- B3 B- NaN 23.0
3 Argentine CCC+ Ca CCC CCC 15.0
4 Arménie B+ Ba3 B+ NaN 14.0
.. ... ... ... ... ... ...
151 Uruguay BBB Baa2 BBB- BBB (low) 55.0
152 Ouzbékistan BB- B1 BB- NaN 38.0
153 Venezuela NaN C RD NaN 11.0
154 Vietnam BB Ba3 BB NaN 43.0
155 Zambie SD Ca RD NaN 30.0
[156 rows x 6 columns]
你必须使用 innerText
而不是 text
,而且第一个 tr
没有 td
这就是你没有得到任何回应的原因。
硒溶液:
代码:
driver.maximize_window()
wait = WebDriverWait(driver, 30)
my_url= "https://fr.tradingeconomics.com/country-list/rating"
driver.get(my_url)
#actions = ActionChains(driver)
table = WebDriverWait(driver, 50).until(EC.visibility_of_element_located((By.XPATH, "//table[@class='table table-hover']")))
trs = table.find_elements(By.XPATH, ".//tr")
print(len(trs))
countries = []
for tr in trs:
tds = tr.find_elements(By.XPATH, ".//td[not(self::th)]")
for td in tds:
print(td.get_attribute('innerText'))
进口:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
我需要从 link:
中捕获 tablehttps://fr.tradingeconomics.com/country-list/rating
我尝试了以下代码,但没有得到任何响应
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
my_url= "https://fr.tradingeconomics.com/country-list/rating"
driver.get(my_url)
#actions = ActionChains(driver)
WebDriverWait(driver, 50).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "table table-hover")))
trs = driver.find_elements(By.TAG_NAME, "tr")
print(len(trs))
countries = []
for tr in trs:
country = {}
items= tr.find_elements(By.TAG_NAME, "td")
for item in items:
country_name = item.find_element(By.XPATH, "//*[@id='ctl00_ContentPlaceHolder1_ctl01_GridView1']/tbody/tr[2]/td[1]")
country['country_name'] = country_name.get_attribute('text')
s_and_p = item.find_element(By.XPATH, "//*[@id='ctl00_ContentPlaceHolder1_ctl01_GridView1']/tbody/tr[2]/td[2]")
country['S&P']= s_and_p.get_attribute("text")
moodys = item.find_element(By.XPATH, "//*[@id='ctl00_ContentPlaceHolder1_ctl01_GridView1']/tbody/tr[2]/td[3]")
country['Moody\'s'] = moodys.get_attribute("text")
countries.append(country)
print(country)
如有任何帮助,我们将不胜感激。谢谢。
由于 url 不是动态的,因此您也可以仅使用 pandas 轻松获取 table data
。
import pandas as pd
url='https://fr.tradingeconomics.com/country-list/rating'
df = pd.read_html(url)[0]
print(df)
输出:
Unnamed: 0 S&P Moody's Fitch DBRS TE
0 Albanie B+ B1 NaN NaN 35.0
1 Andorre BBB Baa2 BBB+ NaN 62.0
2 Angola B- B3 B- NaN 23.0
3 Argentine CCC+ Ca CCC CCC 15.0
4 Arménie B+ Ba3 B+ NaN 14.0
.. ... ... ... ... ... ...
151 Uruguay BBB Baa2 BBB- BBB (low) 55.0
152 Ouzbékistan BB- B1 BB- NaN 38.0
153 Venezuela NaN C RD NaN 11.0
154 Vietnam BB Ba3 BB NaN 43.0
155 Zambie SD Ca RD NaN 30.0
[156 rows x 6 columns]
你必须使用 innerText
而不是 text
,而且第一个 tr
没有 td
这就是你没有得到任何回应的原因。
硒溶液:
代码:
driver.maximize_window()
wait = WebDriverWait(driver, 30)
my_url= "https://fr.tradingeconomics.com/country-list/rating"
driver.get(my_url)
#actions = ActionChains(driver)
table = WebDriverWait(driver, 50).until(EC.visibility_of_element_located((By.XPATH, "//table[@class='table table-hover']")))
trs = table.find_elements(By.XPATH, ".//tr")
print(len(trs))
countries = []
for tr in trs:
tds = tr.find_elements(By.XPATH, ".//td[not(self::th)]")
for td in tds:
print(td.get_attribute('innerText'))
进口:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC