如何使用 selenium python 抓取雅虎财经搜索自动建议结果?
How to scrape the yahoo finance search auto suggestion result with selenium python?
我正在尝试使用 selenium python 在 yahoo finance 上自动搜索。当我输入一些单词时,建议会像 google 建议一样弹出。
我找到一个带有xpath的list元素应该是yahoo的建议:
//*[@id="search-assist-input"]/div[2]/ul
建议内容似乎隐藏在这个列表中,但它是不可见的,我的意思是当我点击展开它时,它就消失了。我不知道 firefox 中是否有某种 'always unfold nodes' 或 chrome,但这些元素似乎很难达到。
我试图获取该元素下的所有子项,它显示找不到元素:
from chrome_driver.chrome import Chrome
driver = Chrome().get_driver()
driver.get('https://finance.yahoo.com/')
driver.find_elements_by_xpath("//div[@id='search-assist-input']/div/input")[0].send_keys('goog')
x = driver.find_elements_by_xpath("//div[@data-reactid='56']/ul[@data-reactid='57']/*")
如何从搜索框中找到这些自动建议?
提取关于搜索文本的自动建议,例如GOOG 在 https://finance.yahoo.com/
的 Search Box 中,您必须为自动建议引入 WebDriverWait 可见,您可以使用以下解决方案:
代码块:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\ChromeDriver\chromedriver_win32\chromedriver.exe')
driver.get('https://finance.yahoo.com/')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='p']"))).send_keys("goog")
yahoo_fin_auto_suggestions = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//input[@name='p']//following::div[1]/ul//li")))
for item in yahoo_fin_auto_suggestions :
print(item.text)
控制台输出:
GOOG
Alphabet Inc.Equity - NASDAQ
GOOGL
Alphabet Inc.Equity - NASDAQ
GOOGL-USD.SW
AlphabetEquity - Swiss
GOOGL180518C01080000
GOOGL May 2018 call 1080.000Option - OPR
GOOG.MX
Alphabet Inc.Equity - Mexico
GOOG180525C01075000
GOOG May 2018 call 1075.000Option - OPR
GOOG180518C00720000
GOOG May 2018 call 720.000Option - OPR
GOOGL180518C01120000
GOOGL May 2018 call 1120.000Option - OPR
GOOGL.MX
Alphabet Inc.Equity - Mexico
GOOGL190621C01500000
GOOGL Jun 2019 call 1500.000Option - OPR
由于https://finance.yahoo.com/网站的源代码可能被更改,我对@DebanjanB的回答进行了三点调整:
- 点击接受cookies/提交同意书
- 搜索字段的 Xpath(至少 Germany/EU)
- 建议列表的 Xpath
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
#options.add_argument('headless') #optional for headless driver
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\Program Files (x86)\Google\Chrome\Chromedriver\chromedriver.exe')
driver.get('https://finance.yahoo.com/')
driver.find_element_by_xpath("//button[@type='submit' and @value='agree']").click() #for cookie consent
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='yfin-usr-qry']"))).send_keys("goog")
yahoo_fin_auto_suggestions = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, '(//div[@class="_0ea0377c _4343c2a0 _50f34a35"])')))
for item in yahoo_fin_auto_suggestions:
print(item.text)
以下是为响应雅虎财经的最新变化而修改的版本。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.page_load_strategy = 'eager'
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_argument('log-level=3')
latest_news = ['Go to Latest News']
chrome_path = "C:\Python\SYS\chromedriver.exe"
driver = webdriver.Chrome(chrome_options=options, executable_path=chrome_path)
driver.get('https://finance.yahoo.com/')
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='yfin-usr-qry']"))).send_keys("goog")
WebDriverWait(driver, 20).until(EC.text_to_be_present_in_element((By.XPATH,'//*[@id="header-search-form"]/div[2]/div[1]/div/div[1]/h3'),'Symbols'))
yahoo_fin_auto_suggestions = driver.find_elements(By.CLASS_NAME,'modules_list__1zFHY')[0].text.split('\n')
if yahoo_fin_auto_suggestions == latest_news:
yahoo_fin_auto_suggestions = driver.find_elements(By.CLASS_NAME,'modules_list__1zFHY')[1].text.split('\n')
print(yahoo_fin_auto_suggestions)
driver.quit()
我正在尝试使用 selenium python 在 yahoo finance 上自动搜索。当我输入一些单词时,建议会像 google 建议一样弹出。
我找到一个带有xpath的list元素应该是yahoo的建议:
//*[@id="search-assist-input"]/div[2]/ul
建议内容似乎隐藏在这个列表中,但它是不可见的,我的意思是当我点击展开它时,它就消失了。我不知道 firefox 中是否有某种 'always unfold nodes' 或 chrome,但这些元素似乎很难达到。 我试图获取该元素下的所有子项,它显示找不到元素:
from chrome_driver.chrome import Chrome
driver = Chrome().get_driver()
driver.get('https://finance.yahoo.com/')
driver.find_elements_by_xpath("//div[@id='search-assist-input']/div/input")[0].send_keys('goog')
x = driver.find_elements_by_xpath("//div[@data-reactid='56']/ul[@data-reactid='57']/*")
如何从搜索框中找到这些自动建议?
提取关于搜索文本的自动建议,例如GOOG 在 https://finance.yahoo.com/
的 Search Box 中,您必须为自动建议引入 WebDriverWait 可见,您可以使用以下解决方案:
代码块:
from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC options = Options() options.add_argument("start-maximized") options.add_argument("disable-infobars") options.add_argument("--disable-extensions") driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\ChromeDriver\chromedriver_win32\chromedriver.exe') driver.get('https://finance.yahoo.com/') WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='p']"))).send_keys("goog") yahoo_fin_auto_suggestions = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//input[@name='p']//following::div[1]/ul//li"))) for item in yahoo_fin_auto_suggestions : print(item.text)
控制台输出:
GOOG Alphabet Inc.Equity - NASDAQ GOOGL Alphabet Inc.Equity - NASDAQ GOOGL-USD.SW AlphabetEquity - Swiss GOOGL180518C01080000 GOOGL May 2018 call 1080.000Option - OPR GOOG.MX Alphabet Inc.Equity - Mexico GOOG180525C01075000 GOOG May 2018 call 1075.000Option - OPR GOOG180518C00720000 GOOG May 2018 call 720.000Option - OPR GOOGL180518C01120000 GOOGL May 2018 call 1120.000Option - OPR GOOGL.MX Alphabet Inc.Equity - Mexico GOOGL190621C01500000 GOOGL Jun 2019 call 1500.000Option - OPR
由于https://finance.yahoo.com/网站的源代码可能被更改,我对@DebanjanB的回答进行了三点调整:
- 点击接受cookies/提交同意书
- 搜索字段的 Xpath(至少 Germany/EU)
- 建议列表的 Xpath
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
#options.add_argument('headless') #optional for headless driver
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\Program Files (x86)\Google\Chrome\Chromedriver\chromedriver.exe')
driver.get('https://finance.yahoo.com/')
driver.find_element_by_xpath("//button[@type='submit' and @value='agree']").click() #for cookie consent
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='yfin-usr-qry']"))).send_keys("goog")
yahoo_fin_auto_suggestions = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, '(//div[@class="_0ea0377c _4343c2a0 _50f34a35"])')))
for item in yahoo_fin_auto_suggestions:
print(item.text)
以下是为响应雅虎财经的最新变化而修改的版本。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.page_load_strategy = 'eager'
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_argument('log-level=3')
latest_news = ['Go to Latest News']
chrome_path = "C:\Python\SYS\chromedriver.exe"
driver = webdriver.Chrome(chrome_options=options, executable_path=chrome_path)
driver.get('https://finance.yahoo.com/')
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='yfin-usr-qry']"))).send_keys("goog")
WebDriverWait(driver, 20).until(EC.text_to_be_present_in_element((By.XPATH,'//*[@id="header-search-form"]/div[2]/div[1]/div/div[1]/h3'),'Symbols'))
yahoo_fin_auto_suggestions = driver.find_elements(By.CLASS_NAME,'modules_list__1zFHY')[0].text.split('\n')
if yahoo_fin_auto_suggestions == latest_news:
yahoo_fin_auto_suggestions = driver.find_elements(By.CLASS_NAME,'modules_list__1zFHY')[1].text.split('\n')
print(yahoo_fin_auto_suggestions)
driver.quit()