使用 Selenium 提取自动完成搜索提供的数据
Using Selenium to extract data provided by an autocomplete search
我想提取网站搜索栏自动完成提供的部分结果。我在提取结果时遇到问题。我可以输入我想要的查询,但我无法存储自动建议。似乎每当我单击 "inspect element" 的下拉建议以查找 select 的内容时,下拉菜单就会消失!
这是我正在使用的代码:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
from scrapy.selector import HtmlXPathSelector
#launch chromedirver
driver.get("http://www.marinetraffic.com/en/ais/index/ports/all")
searchBox = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located(
(By.XPATH, '//input[@id= "portname"]')
)
)
searchBox.click()
searchBox.clear()
a = searchBox.send_keys('Belawan') #so far so good
selen_html = driver.find_element_by_class_name('input-group').get_attribute('innerHTML')
hxs = HtmlXPathSelector(text=selen_html)
suggests = hxs.select('//div[@class= "input-group"/Belawan/@title').extract
driver.close()
不出所料,错误是 ValueError: XPath error: Invalid predicate in //div[@....[etc]
。如何找到正确的名称以放入我的 XPath?
自动完成采用 BELAWAN - Port [ID]
的形式,最终目标是提取 ID
。
编辑:
screenshot
这应该有效。
基本上你会找到那些网络元素的 xpath 定位器'
你的情况就像
<ul class="ui-autocomplete ui-front ui-menu ui-widget ui-widget-content ui-corner-all" id="ui-id-3" tabindex="0" style="display: none; top: 375px; left: 63px; width: 306px;">
<li class="ui-menu-item" role="presentation"><a id="ui-id-7" class="ui-corner-all" tabindex="-1"><b>BELA</b>WAN - Port [ID]</a></li>
<li class="ui-menu-item" role="presentation"><a id="ui-id-8" class="ui-corner-all" tabindex="-1"><b>BELA</b>WAN ANCH - Ancorage [ID]</a></li>
</ul>
所以我使用 id 获取另一个 ul
然后使用 find_elements_by_xpath
获取与 xpath 匹配的 childrend 列表。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
#launch chromedirver
driver = webdriver.Chrome()
driver.get("http://www.marinetraffic.com/en/ais/index/ports/all")
searchBox = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located(
(By.XPATH, '//input[@id= "portname"]')
)
)
searchBox.click()
searchBox.clear()
a = searchBox.send_keys('Belawan') #so far so good
web_elem_list = driver.find_element_by_id("ui-id-3").find_elements_by_xpath("//li[@role='presentation']/a")
suggests = [web_elem.text for web_elem in web_elem_list]
driver.close()
print suggests
# Will Give o/p
[u'BELAWAN - Port [ID]', u'BELAWAN ANCH - Ancorage [ID]']
我想提取网站搜索栏自动完成提供的部分结果。我在提取结果时遇到问题。我可以输入我想要的查询,但我无法存储自动建议。似乎每当我单击 "inspect element" 的下拉建议以查找 select 的内容时,下拉菜单就会消失!
这是我正在使用的代码:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
from scrapy.selector import HtmlXPathSelector
#launch chromedirver
driver.get("http://www.marinetraffic.com/en/ais/index/ports/all")
searchBox = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located(
(By.XPATH, '//input[@id= "portname"]')
)
)
searchBox.click()
searchBox.clear()
a = searchBox.send_keys('Belawan') #so far so good
selen_html = driver.find_element_by_class_name('input-group').get_attribute('innerHTML')
hxs = HtmlXPathSelector(text=selen_html)
suggests = hxs.select('//div[@class= "input-group"/Belawan/@title').extract
driver.close()
不出所料,错误是 ValueError: XPath error: Invalid predicate in //div[@....[etc]
。如何找到正确的名称以放入我的 XPath?
自动完成采用 BELAWAN - Port [ID]
的形式,最终目标是提取 ID
。
编辑: screenshot
这应该有效。 基本上你会找到那些网络元素的 xpath 定位器'
你的情况就像
<ul class="ui-autocomplete ui-front ui-menu ui-widget ui-widget-content ui-corner-all" id="ui-id-3" tabindex="0" style="display: none; top: 375px; left: 63px; width: 306px;">
<li class="ui-menu-item" role="presentation"><a id="ui-id-7" class="ui-corner-all" tabindex="-1"><b>BELA</b>WAN - Port [ID]</a></li>
<li class="ui-menu-item" role="presentation"><a id="ui-id-8" class="ui-corner-all" tabindex="-1"><b>BELA</b>WAN ANCH - Ancorage [ID]</a></li>
</ul>
所以我使用 id 获取另一个 ul
然后使用 find_elements_by_xpath
获取与 xpath 匹配的 childrend 列表。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
#launch chromedirver
driver = webdriver.Chrome()
driver.get("http://www.marinetraffic.com/en/ais/index/ports/all")
searchBox = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located(
(By.XPATH, '//input[@id= "portname"]')
)
)
searchBox.click()
searchBox.clear()
a = searchBox.send_keys('Belawan') #so far so good
web_elem_list = driver.find_element_by_id("ui-id-3").find_elements_by_xpath("//li[@role='presentation']/a")
suggests = [web_elem.text for web_elem in web_elem_list]
driver.close()
print suggests
# Will Give o/p
[u'BELAWAN - Port [ID]', u'BELAWAN ANCH - Ancorage [ID]']