如何从金融代码(股票代码)获取公司网站?
How to get company website from a finance ticker (stock symbol)?
我知道如何使用 API 和雅虎财经等服务获取股票代码、公司名称和统计信息。
但是,我想从股票代码中获取公司的官方网站。会有 API 或服务吗?
此外,我可以将符号转换为公司名称(使用,比如说 yahoo finance),然后进行 Google 搜索。但是,这不是我需要的,因为我有数千个公司名称,而且 Google 不允许进行那种自动搜索。
关于如何从数千个股票代码中获取网站有什么想法吗?
下面的代码使用 duckduckgo 从属于 bloomberg.com 的搜索结果中提取 url。它的命中率约为 97%。它可能不会选择的网站是前面没有 "www." 的网站、只有一个字母的公司和私人公司。此外,如果该公司是 merger/acquisition/holding 公司,则 bloomberg
中可能缺少代码
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
path = 'C:/Users/user/Documents/tickers.xlsx'
chromedriver = 'C:\Users\user\Downloads\chromedriver_win32\chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('window-size=1200x600')
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=options)
df = pd.read_excel(path, sheet_name=0)
data=df["ticker"].tolist()
for item in data:
browser.get('https://duckduckgo.com/')
search_box= WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.NAME, "q")))
search_box.send_keys('region:us site:https://www.bloomberg.com/ inurl:'+item+':US "www."')
search_box.submit()
results = browser.find_elements_by_xpath("//div[@id='links']/div/div/h2/a[@class='result__a']")
results3 = browser.find_elements_by_xpath("//div[@id='links']/div/div/div[1]/div/a/span[2]")
lists = ['companies/' + item + ':US', 'companies/' + item + '/A:US','quote/' + item + ':US', 'quote/' + item + '/A:US','company/' +
item + ':US', 'company/' + item + '/A:US']
try:
results2 = WebDriverWait(browser, 60).until(EC.visibility_of_all_elements_located((By.XPATH,"//div[@id='links']/div/div/div[2]")))
urls = []
description = []
except Exception:
print(item +": no website showed up search in results")
continue
for result in results:
urls.append(result.get_attribute("href"))
iterate = len(urls)
counter=0
for i in range(0,iterate):
u = results3[i].text
if any(x in u for x in lists):
s = str(results2[i].text)
start = s.find('www.')
s= s[start:]
end = s.find(' ')
s= s[:end]
urls1=urls[i]
counter += 1
if s.endswith('.'):
s=s[:-1]
if '/' in s:
end = s.find('/')
s= s[:end]
if item+":US" in urls1:
breaker=True
print(item + ":" + s)
break
description.append(s)
description=list(set(description))
if breaker:
continue
if counter == 0:
print(item +": search results did not find any relevant result(s)")
continue
if description==['']:
print(item +": relevant result(s) found but company website not found/listed")
continue
description = ''.join(description)
description = description.replace("['","")
description = description.replace("']","")
print(item +":"+description)
我知道如何使用 API 和雅虎财经等服务获取股票代码、公司名称和统计信息。
但是,我想从股票代码中获取公司的官方网站。会有 API 或服务吗?
此外,我可以将符号转换为公司名称(使用,比如说 yahoo finance),然后进行 Google 搜索。但是,这不是我需要的,因为我有数千个公司名称,而且 Google 不允许进行那种自动搜索。
关于如何从数千个股票代码中获取网站有什么想法吗?
下面的代码使用 duckduckgo 从属于 bloomberg.com 的搜索结果中提取 url。它的命中率约为 97%。它可能不会选择的网站是前面没有 "www." 的网站、只有一个字母的公司和私人公司。此外,如果该公司是 merger/acquisition/holding 公司,则 bloomberg
中可能缺少代码from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
path = 'C:/Users/user/Documents/tickers.xlsx'
chromedriver = 'C:\Users\user\Downloads\chromedriver_win32\chromedriver.exe'
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('window-size=1200x600')
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=options)
df = pd.read_excel(path, sheet_name=0)
data=df["ticker"].tolist()
for item in data:
browser.get('https://duckduckgo.com/')
search_box= WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.NAME, "q")))
search_box.send_keys('region:us site:https://www.bloomberg.com/ inurl:'+item+':US "www."')
search_box.submit()
results = browser.find_elements_by_xpath("//div[@id='links']/div/div/h2/a[@class='result__a']")
results3 = browser.find_elements_by_xpath("//div[@id='links']/div/div/div[1]/div/a/span[2]")
lists = ['companies/' + item + ':US', 'companies/' + item + '/A:US','quote/' + item + ':US', 'quote/' + item + '/A:US','company/' +
item + ':US', 'company/' + item + '/A:US']
try:
results2 = WebDriverWait(browser, 60).until(EC.visibility_of_all_elements_located((By.XPATH,"//div[@id='links']/div/div/div[2]")))
urls = []
description = []
except Exception:
print(item +": no website showed up search in results")
continue
for result in results:
urls.append(result.get_attribute("href"))
iterate = len(urls)
counter=0
for i in range(0,iterate):
u = results3[i].text
if any(x in u for x in lists):
s = str(results2[i].text)
start = s.find('www.')
s= s[start:]
end = s.find(' ')
s= s[:end]
urls1=urls[i]
counter += 1
if s.endswith('.'):
s=s[:-1]
if '/' in s:
end = s.find('/')
s= s[:end]
if item+":US" in urls1:
breaker=True
print(item + ":" + s)
break
description.append(s)
description=list(set(description))
if breaker:
continue
if counter == 0:
print(item +": search results did not find any relevant result(s)")
continue
if description==['']:
print(item +": relevant result(s) found but company website not found/listed")
continue
description = ''.join(description)
description = description.replace("['","")
description = description.replace("']","")
print(item +":"+description)