使用模块请求同时发出多个请求并在 excel(python) 上打印输出
making multiple request at the same time with the module requests and print the output on excel(python)
嗨,这是一个相当复杂的问题
来自这个脚本:
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
stock=[here goes a list of 2000+ stock tickers as strings]
sector = []
peg_ratio = []
foward_eps = []
for idx in range(len(stock)):
url_profile='https://finance.yahoo.com/quote/{}/profile?p={}'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
response =requests.get(url_profile.format(stock[idx],stock[idx]),headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
pattern = re.compile(r'\s--\sData\s--\s')
script_data = soup.find('script',text=pattern).contents[0]
start = script_data.find("context")-2
json_data=json.loads(script_data[start:-12])
try:
sector.append(json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry'])
except:
sector.append("Error")
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
response = requests.get(url_stats.format(stock[idx], stock[idx]), headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
pattern = re.compile(r'\s--\sData\s--\s')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context") - 2
json_data = json.loads(script_data[start:-12])
try:
peg_ratio.append(
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio'][
'fmt'])
except:
peg_ratio.append("Error")
try:
foward_eps.append(
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps'][
'fmt'])
except:
foward_eps.append("Error")
worksheet.write("A" + str(idx + 1), stock[idx])
worksheet.write("B" + str(idx + 1), sector[idx])
worksheet.write("C" + str(idx+1), foward_eps[idx])
worksheet.write("D" + str(idx + 1), peg_ratio[idx])
workbook.close()
代码本身做应该做的事情(获取 data:foward eps、挂钩比率、扇区并将它们粘贴到 excel 文件中)但问题是它需要很多时间的时间和列表库存很长(2531 个元素)有没有办法使这段代码更有效或更快?
我已尝试按照此视频中的说明进行操作:https://www.youtube.com/watch?v=nFn4_nA_yk8
但我仍然需要在 excel 文件中写入任何单个股票的信息,有没有办法优化所有这些?
也许通过同时发送多个请求并在不同时间将数据写入 excel ?
唯一的最终目标是使整个过程尽可能快。
在此先感谢(如果您需要任何其他信息,请发表评论,我会尽快答复)
首先你必须把代码放在函数中
# --- globals ---
url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
pattern = re.compile(r'\s--\sData\s--\s')
# --- functions ---
def process(number, stock_name):
print(f'{number} {stock_name}\n', end='', flush=True)
url = url_profile.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
data = json.loads(script_data[start:-12])
try:
sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
except:
sector = "Error"
url = url_stats.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context") - 2
data = json.loads(script_data[start:-12])
try:
peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
except:
peg_ratio = "Error"
try:
foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
except:
foward_eps = "Error"
# return data - for thread
results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# return data - for normal execution
return (stock_name, sector, foward_eps, foward_eps, peg_ratio)
接下来你可以运行用老方法
stock = ['AAPL', 'GOOG', 'TESL', 'MSFT', 'AAPL', 'GOOG', 'TESL', 'MSFT']
_start = time.time()
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
for number, stock_name in enumerate(stock, 1):
data = process(number, stock_name)
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
这给了我时间 ~15s
,(但有时甚至 ~32s
)
现在您可以使用 threading
到 运行 同时具有不同值的相同函数。
因为thread
不能return直接得到结果所以我使用全局字典results
(因为线程共享内存)。
stock = ['AAPL', 'GOOG', 'TESL', 'MSFT', 'AAPL', 'GOOG', 'TESL', 'MSFT']
_start = time.time()
threads = []
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
# start all threads
for number, stock_name in enumerate(stock, 1):
t = threading.Thread(target=process, args=(number, stock_name))
t.start()
threads.append(t)
# wait for end of all threads
for t in threads:
t.join()
# use results
for number, data in results.items():
#(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
这给了我时间~6s
对于更多的股票,最好使用 Threading.Pool
这样它会 运行 同时只有几个线程,因为 运行 同时 2000+ 个线程不是好主意。
完整的工作代码
import requests
import time
import xlsxwriter
import re
from bs4 import BeautifulSoup
import json
import threading
# --- globals ---
url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
pattern = re.compile(r'\s--\sData\s--\s')
# --- functions ---
def process(number, stock_name):
print(f'{number} {stock_name}\n', end='', flush=True)
url = url_profile.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
data = json.loads(script_data[start:-12])
try:
sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
except:
sector = "Error"
url = url_stats.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context") - 2
data = json.loads(script_data[start:-12])
try:
peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
except:
peg_ratio = "Error"
try:
foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
except:
foward_eps = "Error"
# return data - for thread
results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# return data - for normal execution
return (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# --- main ---
stock = [
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
]
# --- old version ---
_start = time.time()
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
for number, stock_name in enumerate(stock, 1):
data = process(number, stock_name)
#(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
# --- new version ---
_start = time.time()
threads = []
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
# start all threads
for number, stock_name in enumerate(stock, 1):
t = threading.Thread(target=process, args=(number, stock_name))
t.start()
threads.append(t)
# wait for end of all threads
for t in threads:
t.join()
# use results
for number, data in results.items():
#(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
版本 Pool
import requests
import time
import xlsxwriter
import re
from bs4 import BeautifulSoup
import json
import threading
import threading
from multiprocessing.pool import ThreadPool
# --- globals ---
url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
pattern = re.compile(r'\s--\sData\s--\s')
# --- functions ---
def process(number, stock_name):
print(f'{number} {stock_name}\n', end='', flush=True)
url = url_profile.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
data = json.loads(script_data[start:-12])
try:
sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
except:
sector = "Error"
url = url_stats.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context") - 2
data = json.loads(script_data[start:-12])
try:
peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
except:
peg_ratio = "Error"
try:
foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
except:
foward_eps = "Error"
# return data - for thread
results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# return data - for normal execution
return (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# --- main ---
stock = [
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
]
_start = time.time()
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
with ThreadPool(processes=10) as pool:
pool_results = pool.starmap_async(process, enumerate(stock, 1))
pool_results = pool_results.get()
for number, data in enumerate(pool_results, 1):
#(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
嗨,这是一个相当复杂的问题 来自这个脚本:
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
stock=[here goes a list of 2000+ stock tickers as strings]
sector = []
peg_ratio = []
foward_eps = []
for idx in range(len(stock)):
url_profile='https://finance.yahoo.com/quote/{}/profile?p={}'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
response =requests.get(url_profile.format(stock[idx],stock[idx]),headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
pattern = re.compile(r'\s--\sData\s--\s')
script_data = soup.find('script',text=pattern).contents[0]
start = script_data.find("context")-2
json_data=json.loads(script_data[start:-12])
try:
sector.append(json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry'])
except:
sector.append("Error")
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
response = requests.get(url_stats.format(stock[idx], stock[idx]), headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
pattern = re.compile(r'\s--\sData\s--\s')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context") - 2
json_data = json.loads(script_data[start:-12])
try:
peg_ratio.append(
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio'][
'fmt'])
except:
peg_ratio.append("Error")
try:
foward_eps.append(
json_data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps'][
'fmt'])
except:
foward_eps.append("Error")
worksheet.write("A" + str(idx + 1), stock[idx])
worksheet.write("B" + str(idx + 1), sector[idx])
worksheet.write("C" + str(idx+1), foward_eps[idx])
worksheet.write("D" + str(idx + 1), peg_ratio[idx])
workbook.close()
代码本身做应该做的事情(获取 data:foward eps、挂钩比率、扇区并将它们粘贴到 excel 文件中)但问题是它需要很多时间的时间和列表库存很长(2531 个元素)有没有办法使这段代码更有效或更快?
我已尝试按照此视频中的说明进行操作:https://www.youtube.com/watch?v=nFn4_nA_yk8 但我仍然需要在 excel 文件中写入任何单个股票的信息,有没有办法优化所有这些? 也许通过同时发送多个请求并在不同时间将数据写入 excel ? 唯一的最终目标是使整个过程尽可能快。 在此先感谢(如果您需要任何其他信息,请发表评论,我会尽快答复)
首先你必须把代码放在函数中
# --- globals ---
url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
pattern = re.compile(r'\s--\sData\s--\s')
# --- functions ---
def process(number, stock_name):
print(f'{number} {stock_name}\n', end='', flush=True)
url = url_profile.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
data = json.loads(script_data[start:-12])
try:
sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
except:
sector = "Error"
url = url_stats.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context") - 2
data = json.loads(script_data[start:-12])
try:
peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
except:
peg_ratio = "Error"
try:
foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
except:
foward_eps = "Error"
# return data - for thread
results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# return data - for normal execution
return (stock_name, sector, foward_eps, foward_eps, peg_ratio)
接下来你可以运行用老方法
stock = ['AAPL', 'GOOG', 'TESL', 'MSFT', 'AAPL', 'GOOG', 'TESL', 'MSFT']
_start = time.time()
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
for number, stock_name in enumerate(stock, 1):
data = process(number, stock_name)
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
这给了我时间 ~15s
,(但有时甚至 ~32s
)
现在您可以使用 threading
到 运行 同时具有不同值的相同函数。
因为thread
不能return直接得到结果所以我使用全局字典results
(因为线程共享内存)。
stock = ['AAPL', 'GOOG', 'TESL', 'MSFT', 'AAPL', 'GOOG', 'TESL', 'MSFT']
_start = time.time()
threads = []
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
# start all threads
for number, stock_name in enumerate(stock, 1):
t = threading.Thread(target=process, args=(number, stock_name))
t.start()
threads.append(t)
# wait for end of all threads
for t in threads:
t.join()
# use results
for number, data in results.items():
#(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
这给了我时间~6s
对于更多的股票,最好使用 Threading.Pool
这样它会 运行 同时只有几个线程,因为 运行 同时 2000+ 个线程不是好主意。
完整的工作代码
import requests
import time
import xlsxwriter
import re
from bs4 import BeautifulSoup
import json
import threading
# --- globals ---
url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
pattern = re.compile(r'\s--\sData\s--\s')
# --- functions ---
def process(number, stock_name):
print(f'{number} {stock_name}\n', end='', flush=True)
url = url_profile.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
data = json.loads(script_data[start:-12])
try:
sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
except:
sector = "Error"
url = url_stats.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context") - 2
data = json.loads(script_data[start:-12])
try:
peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
except:
peg_ratio = "Error"
try:
foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
except:
foward_eps = "Error"
# return data - for thread
results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# return data - for normal execution
return (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# --- main ---
stock = [
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
]
# --- old version ---
_start = time.time()
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
for number, stock_name in enumerate(stock, 1):
data = process(number, stock_name)
#(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
# --- new version ---
_start = time.time()
threads = []
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
# start all threads
for number, stock_name in enumerate(stock, 1):
t = threading.Thread(target=process, args=(number, stock_name))
t.start()
threads.append(t)
# wait for end of all threads
for t in threads:
t.join()
# use results
for number, data in results.items():
#(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)
版本 Pool
import requests
import time
import xlsxwriter
import re
from bs4 import BeautifulSoup
import json
import threading
import threading
from multiprocessing.pool import ThreadPool
# --- globals ---
url_profile = 'https://finance.yahoo.com/quote/{}/profile?p={}'
url_stats = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
pattern = re.compile(r'\s--\sData\s--\s')
# --- functions ---
def process(number, stock_name):
print(f'{number} {stock_name}\n', end='', flush=True)
url = url_profile.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text,'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context")-2
data = json.loads(script_data[start:-12])
try:
sector = data['context']['dispatcher']['stores']['QuoteSummaryStore']['assetProfile']['industry']
except:
sector = "Error"
url = url_stats.format(stock_name, stock_name)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
script_data = soup.find('script', text=pattern).contents[0]
start = script_data.find("context") - 2
data = json.loads(script_data[start:-12])
try:
peg_ratio = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['pegRatio']['fmt']
except:
peg_ratio = "Error"
try:
foward_eps = data['context']['dispatcher']['stores']['QuoteSummaryStore']['defaultKeyStatistics']['forwardEps']['fmt']
except:
foward_eps = "Error"
# return data - for thread
results[number] = (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# return data - for normal execution
return (stock_name, sector, foward_eps, foward_eps, peg_ratio)
# --- main ---
stock = [
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
'AAPL', 'GOOG', 'TESL', 'MSFT',
]
_start = time.time()
results = {}
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet("Stocks")
with ThreadPool(processes=10) as pool:
pool_results = pool.starmap_async(process, enumerate(stock, 1))
pool_results = pool_results.get()
for number, data in enumerate(pool_results, 1):
#(stock_name, sector, foward_eps, foward_eps, peg_ratio) = data
worksheet.write(f"A{number}", data[0]) #stock_name
worksheet.write(f"B{number}", data[1]) #sector
worksheet.write(f"C{number}", data[2]) #foward_eps
worksheet.write(f"D{number}", data[3]) #peg_ratio
workbook.close()
_end = time.time()
print(_end - _start)