下拉列表下的硒网页抓取
Selenium web scraping under dropdown list
Selenium 网页抓取:
- 下拉列表更改
- 尝试更改结果抓取
- 失败
代码:
'''
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
import time
driver=webdriver.Chrome(executable_path=r'C:\Program Files\Python39\chromedriver.exe')
driver.maximize_window()
driver.get("https://www.gastite.com/locator/?cats=109")
for i in range(1,3,1):
state=driver.find_element(By.NAME, 'state')
stateDD=Select(state)
stateDD.select_by_index(i)
driver.find_element(By.XPATH,'//*[@id="content"]/div[3]/form/input[2]')
time.sleep(2)
lists=driver.find_elements_by_css_selector("div.repcontent > a")
#print(lists)
for list in lists:
company=list.find_element_by_class_name('namelink company_title').text
address=list.find_element_by_class_name('address').text
address1=list.find_element_by_class_name('address2').text
tel=list.find_element_by_tag_name('span').text
fax=list.find_element_by_tag_name('span').text
web=list.get_attribute('href')
print(company, address, address1, tel, fax, web)
'''
我 select 下拉列表中的一个状态作为示例,其余的请尝试。
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.select import Select
from bs4 import BeautifulSoup
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
url ='https://www.gastite.com/locator/?cats=109'
driver.maximize_window()
Select(WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.XPATH, "//select[@name='state']")))).select_by_value("AL")
soup = BeautifulSoup(driver.page_source,'lxml')
data=[]
for card in soup.select('#resultscontainer > ul > li > div.repcontent'):
company = card.select_one('h3.company_title').text
print(company)
address = card.select_one('div.address').text
address2 = card.select_one('div.address2').text
phone = card.select('span').contents[0]
fax = card.select('span').contents[1]
data.append({
'company':company,
'address':address,
'address2':address2,
'phone':phone,
'fax':fax
})
print(data)
Selenium 网页抓取:
- 下拉列表更改
- 尝试更改结果抓取
- 失败
代码:
'''
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
import time
driver=webdriver.Chrome(executable_path=r'C:\Program Files\Python39\chromedriver.exe')
driver.maximize_window()
driver.get("https://www.gastite.com/locator/?cats=109")
for i in range(1,3,1):
state=driver.find_element(By.NAME, 'state')
stateDD=Select(state)
stateDD.select_by_index(i)
driver.find_element(By.XPATH,'//*[@id="content"]/div[3]/form/input[2]')
time.sleep(2)
lists=driver.find_elements_by_css_selector("div.repcontent > a")
#print(lists)
for list in lists:
company=list.find_element_by_class_name('namelink company_title').text
address=list.find_element_by_class_name('address').text
address1=list.find_element_by_class_name('address2').text
tel=list.find_element_by_tag_name('span').text
fax=list.find_element_by_tag_name('span').text
web=list.get_attribute('href')
print(company, address, address1, tel, fax, web)
'''
我 select 下拉列表中的一个状态作为示例,其余的请尝试。
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.select import Select
from bs4 import BeautifulSoup
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
url ='https://www.gastite.com/locator/?cats=109'
driver.maximize_window()
Select(WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.XPATH, "//select[@name='state']")))).select_by_value("AL")
soup = BeautifulSoup(driver.page_source,'lxml')
data=[]
for card in soup.select('#resultscontainer > ul > li > div.repcontent'):
company = card.select_one('h3.company_title').text
print(company)
address = card.select_one('div.address').text
address2 = card.select_one('div.address2').text
phone = card.select('span').contents[0]
fax = card.select('span').contents[1]
data.append({
'company':company,
'address':address,
'address2':address2,
'phone':phone,
'fax':fax
})
print(data)