硒,python 动态 table
Selenium, python dynamic table
我正在用 selenium 创建一个机器人,它可以从巴西的机构获取所有信息,我已经完成了所有州和县之间的排列点击,我现在要做的就是点击所有机构并获取信息,例如: "telefone","endereço","HORÁRIO DE ATENDIMENTO"。我怎样才能做到这一点??到目前为止,这是我的代码:
# -*- coding: UTF-8 -*-
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.select import Select
import time
import urllib
import urllib.request
from bs4 import BeautifulSoup
import request
def make_soup(url):
thepage = urllib.request.urlopen(url)
soupdata = BeautifulSoup(thepage,'html.parser')
return soupdata
chrome_path = r"D:\Users\gbenites\Desktop\Inovação\arquivos py\WebDriver\chromedriver.exe"
url = 'http://www2.correios.com.br/sistemas/agencias/'
driver = webdriver.Chrome(chrome_path)
driver.get("http://www2.correios.com.br/sistemas/agencias/")
#WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'municipioAgencia')))
driver.maximize_window()
time.sleep(3)
estadoList = Select(driver.find_element_by_name('estadoAgencia'))
for index in range(3,len(estadoList.options)):
select = Select(driver.find_element_by_name('estadoAgencia'))
print("selecting Estado: ",select.options[index].text)
print("selecting Index: ",index)
select.select_by_index(index)
time.sleep(2)
municípioList = Select(driver.find_element_by_name('municipioAgencia'))
for index_b in range(1,len(municípioList.options)):
select_b = Select(driver.find_element_by_name('municipioAgencia'))
print("...selecting Municipio",select_b.options[index_b].text.replace("'",""))
print("...selecting Municipio Index: ",index_b)
select_b.select_by_index(index_b)
time.sleep(3)
get_info_list = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//table[@class = 'dadosAgencia']//td")))
info_list = driver.find_elements_by_xpath("//table[@class = 'dadosAgencia']//span")
print(range(len(info_list)))
for record in range(0,len(info_list)):
print(record.get_attribute('innerText'))
这就是我的输出:
selecting Estado: AMAPÁ
selecting Index: 3
...selecting Municipio AMAPA
...selecting Municipio Index: 1
range(0, 27)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
试试这个 xPath:
//table[@class = 'dadosAgencia']//tr
会是这样的:
elements = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located(
(By.XPATH, "//table[@class = 'dadosAgencia']//tr")))
它为您提供了所有找到的元素的列表。要打印每个元素的文本,您可以使用:
for element in elements:
print(element.text)
注意:您必须添加一些导入:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
为 table 尝试此 XPath:
//table[@class = 'dadosAgencia']
首先,找到table个元素:
table_elements = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located(
(By.XPATH, "//table[@class = 'dadosAgencia']")))
遍历 table:
for table_element in table_elements:
for row in table_element.find_elements_by_xpath(".//tr"):
print(row.text)
我正在用 selenium 创建一个机器人,它可以从巴西的机构获取所有信息,我已经完成了所有州和县之间的排列点击,我现在要做的就是点击所有机构并获取信息,例如: "telefone","endereço","HORÁRIO DE ATENDIMENTO"。我怎样才能做到这一点??到目前为止,这是我的代码:
# -*- coding: UTF-8 -*-
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.select import Select
import time
import urllib
import urllib.request
from bs4 import BeautifulSoup
import request
def make_soup(url):
thepage = urllib.request.urlopen(url)
soupdata = BeautifulSoup(thepage,'html.parser')
return soupdata
chrome_path = r"D:\Users\gbenites\Desktop\Inovação\arquivos py\WebDriver\chromedriver.exe"
url = 'http://www2.correios.com.br/sistemas/agencias/'
driver = webdriver.Chrome(chrome_path)
driver.get("http://www2.correios.com.br/sistemas/agencias/")
#WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'municipioAgencia')))
driver.maximize_window()
time.sleep(3)
estadoList = Select(driver.find_element_by_name('estadoAgencia'))
for index in range(3,len(estadoList.options)):
select = Select(driver.find_element_by_name('estadoAgencia'))
print("selecting Estado: ",select.options[index].text)
print("selecting Index: ",index)
select.select_by_index(index)
time.sleep(2)
municípioList = Select(driver.find_element_by_name('municipioAgencia'))
for index_b in range(1,len(municípioList.options)):
select_b = Select(driver.find_element_by_name('municipioAgencia'))
print("...selecting Municipio",select_b.options[index_b].text.replace("'",""))
print("...selecting Municipio Index: ",index_b)
select_b.select_by_index(index_b)
time.sleep(3)
get_info_list = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//table[@class = 'dadosAgencia']//td")))
info_list = driver.find_elements_by_xpath("//table[@class = 'dadosAgencia']//span")
print(range(len(info_list)))
for record in range(0,len(info_list)):
print(record.get_attribute('innerText'))
这就是我的输出:
selecting Estado: AMAPÁ
selecting Index: 3
...selecting Municipio AMAPA
...selecting Municipio Index: 1
range(0, 27)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
试试这个 xPath:
//table[@class = 'dadosAgencia']//tr
会是这样的:
elements = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located(
(By.XPATH, "//table[@class = 'dadosAgencia']//tr")))
它为您提供了所有找到的元素的列表。要打印每个元素的文本,您可以使用:
for element in elements:
print(element.text)
注意:您必须添加一些导入:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
为 table 尝试此 XPath:
//table[@class = 'dadosAgencia']
首先,找到table个元素:
table_elements = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located(
(By.XPATH, "//table[@class = 'dadosAgencia']")))
遍历 table:
for table_element in table_elements:
for row in table_element.find_elements_by_xpath(".//tr"):
print(row.text)