与来自 python/selenium 的 javascript 可滚动容器交互
Interacting with javascript scrollable container from python/selenium
我正在尝试使用 Selenium/Python 从 http://factfinder.census.gov 自动下载数据集。我是 Javascript 的新手,如果这是一个容易解决的问题,我深表歉意。我现在正在处理代码的开头部分,它应该:
- 去here
- 单击 "Topics" 按钮
- 单击 "Topics" 并加载新页面后,单击 "Dataset"
- Select 我需要的数据集,最好是通过索引 (sub) table.
我卡在了第3步,这是截图;似乎我想访问 div w/id "scrollable_container_topics" 然后遍历或索引以获取其子节点(在这种情况下,我想要最后一个子节点)。我试过使用 script_execute 然后通过 id 和 class 名称定位元素,但到目前为止没有任何效果。如果有任何指点,我将不胜感激。
这是我的代码:
import os
import re
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
# A list of all the variables we want to extract; corresponds to "Topics" field on site
topics = ["B03003", "B05001"]
# A list of all the states we want to extract data for (currently, strings; is there a numeric code?)
states = ["New Jersey", "Georgia"]
# A vector of all the years we want to extract data for [lower, upper) *Note* this != range of years covered by data
years = range(2009, 2010)
# Define the class
class CensusSearch:
# Initialize and set attributes of the query
def __init__(self, topic, state, year):
"""
:type topic: str
:type state: str
:type year: int
"""
self.topic = topic
self.state = state
self.year = year
def setUp(self):
# self.driver = webdriver.Chrome("C:/Python34/Scripts/chromedriver.exe")
self.driver = webdriver.Firefox()
def extractData(self):
driver = self.driver
driver.set_page_load_timeout(1000000000000)
driver.implicitly_wait(100)
# Navigate to site; this url = after you have already chosen "Advanced Search"
driver.get("http://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t")
driver.implicitly_wait(10)
# FIlter by dataset (want the ACS 1, 3, and 5-year estimates)
driver.execute_script("document.getElementsByClassName('leftnav_btn')[0].click()") # click the "Topics" button
driver.implicitly_wait(20)
# This is where I am stuck; I've tried the following:
getData = driver.find_element_by_id("ygtvlabelel172")
getData.click()
driver.implicitly_wait(10)
# Filter geographically: select all counties in the United States and Puerto Rico
# Click "Geographies" button
driver.execute_script("document.getElementsByClassName('leftnav_btn')[1].click()")
driver.implicitly_wait(10)
drop_down = driver.find_element_by_class_name("popular_summarylevel")
select_box = Select(drop_down)
select_box.select_by_value("050")
# Once "Geography" is clicked, select "County - 050" from the drop-down menu; then select "All US + Puerto Rico"
drop_down_counties = driver.find_element_by_id("geoAssistList")
select_box_counties = Select(drop_down_counties)
select_box_counties.select_by_index(1)
# Click the "ADD TO YOUR SELECTIONS" button
driver.execute_script("document.getElementsByClassName('button-g')[0].click()")
driver.implicitly_wait(10)
def tearDown(self):
self.driver.quit()
def main(self):
#print(getattr(self))
print(self.state)
print(self.topic)
print(self.year)
self.setUp()
self.extractData()
self.tearDown()
for a in topics:
for b in states:
for c in years:
query = CensusSearch(a, b, c)
query.main()
print("done")
需要解决的几件事:
- 您不必使用
document.getElement..
方法 - selenium 有自己的方法来定位页面上的元素
- 无需操纵隐式等待(另外,请确保您了解调用
implicitly_wait()
不会像 time.sleep()
一样运行 - 您不会立即延迟)或页面加载在这种情况下超时 - 在页面上执行操作之前只需使用 Explicit Waits
这是一个工作代码,点击 "Topics" 然后 "Dataset":
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("http://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t")
wait = WebDriverWait(driver, 10)
actions = ActionChains(driver)
# click "Topics"
topics = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a#topic-overlay-btn")))
driver.execute_script("arguments[0].click();", topics)
# click "Dataset"
dataset = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span[title=Dataset]")))
dataset.click()
我正在尝试使用 Selenium/Python 从 http://factfinder.census.gov 自动下载数据集。我是 Javascript 的新手,如果这是一个容易解决的问题,我深表歉意。我现在正在处理代码的开头部分,它应该:
- 去here
- 单击 "Topics" 按钮
- 单击 "Topics" 并加载新页面后,单击 "Dataset"
- Select 我需要的数据集,最好是通过索引 (sub) table.
我卡在了第3步,这是截图;似乎我想访问 div w/id "scrollable_container_topics" 然后遍历或索引以获取其子节点(在这种情况下,我想要最后一个子节点)。我试过使用 script_execute 然后通过 id 和 class 名称定位元素,但到目前为止没有任何效果。如果有任何指点,我将不胜感激。
这是我的代码:
import os
import re
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.select import Select
# A list of all the variables we want to extract; corresponds to "Topics" field on site
topics = ["B03003", "B05001"]
# A list of all the states we want to extract data for (currently, strings; is there a numeric code?)
states = ["New Jersey", "Georgia"]
# A vector of all the years we want to extract data for [lower, upper) *Note* this != range of years covered by data
years = range(2009, 2010)
# Define the class
class CensusSearch:
# Initialize and set attributes of the query
def __init__(self, topic, state, year):
"""
:type topic: str
:type state: str
:type year: int
"""
self.topic = topic
self.state = state
self.year = year
def setUp(self):
# self.driver = webdriver.Chrome("C:/Python34/Scripts/chromedriver.exe")
self.driver = webdriver.Firefox()
def extractData(self):
driver = self.driver
driver.set_page_load_timeout(1000000000000)
driver.implicitly_wait(100)
# Navigate to site; this url = after you have already chosen "Advanced Search"
driver.get("http://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t")
driver.implicitly_wait(10)
# FIlter by dataset (want the ACS 1, 3, and 5-year estimates)
driver.execute_script("document.getElementsByClassName('leftnav_btn')[0].click()") # click the "Topics" button
driver.implicitly_wait(20)
# This is where I am stuck; I've tried the following:
getData = driver.find_element_by_id("ygtvlabelel172")
getData.click()
driver.implicitly_wait(10)
# Filter geographically: select all counties in the United States and Puerto Rico
# Click "Geographies" button
driver.execute_script("document.getElementsByClassName('leftnav_btn')[1].click()")
driver.implicitly_wait(10)
drop_down = driver.find_element_by_class_name("popular_summarylevel")
select_box = Select(drop_down)
select_box.select_by_value("050")
# Once "Geography" is clicked, select "County - 050" from the drop-down menu; then select "All US + Puerto Rico"
drop_down_counties = driver.find_element_by_id("geoAssistList")
select_box_counties = Select(drop_down_counties)
select_box_counties.select_by_index(1)
# Click the "ADD TO YOUR SELECTIONS" button
driver.execute_script("document.getElementsByClassName('button-g')[0].click()")
driver.implicitly_wait(10)
def tearDown(self):
self.driver.quit()
def main(self):
#print(getattr(self))
print(self.state)
print(self.topic)
print(self.year)
self.setUp()
self.extractData()
self.tearDown()
for a in topics:
for b in states:
for c in years:
query = CensusSearch(a, b, c)
query.main()
print("done")
需要解决的几件事:
- 您不必使用
document.getElement..
方法 - selenium 有自己的方法来定位页面上的元素 - 无需操纵隐式等待(另外,请确保您了解调用
implicitly_wait()
不会像time.sleep()
一样运行 - 您不会立即延迟)或页面加载在这种情况下超时 - 在页面上执行操作之前只需使用 Explicit Waits
这是一个工作代码,点击 "Topics" 然后 "Dataset":
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("http://factfinder.census.gov/faces/nav/jsf/pages/searchresults.xhtml?refresh=t")
wait = WebDriverWait(driver, 10)
actions = ActionChains(driver)
# click "Topics"
topics = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a#topic-overlay-btn")))
driver.execute_script("arguments[0].click();", topics)
# click "Dataset"
dataset = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span[title=Dataset]")))
dataset.click()