当我去 concurrent.futures 时,无法弄清楚如何将结果写回同一个工作表
Unable to figure out how I can write the results back to the same worksheet when I go for concurrent.futures
我使用 openpyxl
库从工作表中读取不同的代码名称,然后在 website 中使用这些代码生成结果,最后将结果写回到同一个工作表中相关单元格中的代码。
当我 运行 没有在其中实现多处理的脚本时,我发现它运行完美。
但是,当我访问这个库时,我不知道如何将结果写回工作表中的相关单元格 concurrent.futures
。
我目前的尝试:
import requests
from openpyxl import load_workbook
import concurrent.futures as futures
wb = load_workbook('Screener.xlsx')
ws = wb['Screener-1']
link = 'https://backend.otcmarkets.com/otcapi/company/profile/full/{}?'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
}
params = {
'symbol': ''
}
def get_info(ticker):
target_link = link.format(ticker)
params['symbol'] = ticker
r = requests.get(target_link,params,headers=headers)
try:
address = r.json()['address']
except (AttributeError,KeyError,IndexError):
address = ""
try:
website = r.json()['website']
except (AttributeError,KeyError,IndexError):
website = ""
return address,website
if __name__ == '__main__':
ticker_list = []
for row in range(2, ws.max_row + 1):
if ws.cell(row=row,column=1).value==None:break
ticker = ws.cell(row=row,column=1).value
ticker_list.append(ticker)
with futures.ThreadPoolExecutor(max_workers=6) as executor:
future_to_url = {executor.submit(get_info, ticker): ticker for ticker in ticker_list}
for future in futures.as_completed(future_to_url):
address,website = future.result()[0],future.result()[1]
print(address,website)
# ws.cell(row=row, column=2).value = '{}'.format(address)
# ws.cell(row=row, column=3).value = '{}'.format(website)
# wb.save('Screener.xlsx')
您测试的代码很少:
tickers = ['URBT','TPRP','CRBO','PVSP','TSPG','VMHG','MRTI','VTMC','TORM','SORT']
How can I write the results back to the same worksheet while doing reverse search using concurrent.futures?
如果您想知道我正在尝试将数据写入的确切位置,这就是 the worksheet 看起来像。
由于您已经在使用 openpyxl
,我建议您使用 pandas
,因为您可能会发现使用工作簿更容易一些。 openpyxl
次幂 pandas
read_excel
.
假设您有一个包含列 Symbol
的文件 Screener.xlsx
,如下所示:
您可以获取丢失的数据并更新工作簿。
方法如下:
import concurrent.futures as futures
import pandas as pd
import requests
link = 'https://backend.otcmarkets.com/otcapi/company/profile/full/{}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
}
def get_info(ticker) -> dict:
r = requests.get(link.format(ticker), headers=headers)
print(f"Fetching data for {ticker}...")
try:
address = r.json()["address1"]
except (AttributeError, KeyError, IndexError):
address = "None"
try:
website = r.json()["website"]
except (AttributeError, KeyError, IndexError):
website = "None"
return {"ticker": ticker, "address": address, "website": website}
if __name__ == "__main__":
df = pd.read_excel("Screener.xlsx")
tickers = df["Symbol"].to_list()
with futures.ThreadPoolExecutor(max_workers=6) as executor:
future_to_url = {
executor.submit(get_info, ticker): ticker for ticker in tickers
}
tickers_scraped = [
future.result() for future in futures.as_completed(future_to_url)
]
sorted_tickers = sorted(
tickers_scraped, key=lambda i: tickers.index(i["ticker"])
)
df.loc[:, ["Address", "Website"]] = [
[i["address"], i["website"]] for i in sorted_tickers
]
df.to_excel("Screener.xlsx", index=False)
得到这个:
编辑:
这是一个纯pandas
方法没有首先对抓取的数据进行排序。
if __name__ == "__main__":
df = pd.read_excel("Screener.xlsx")
tickers = df["Symbol"].to_list()
with futures.ThreadPoolExecutor(max_workers=6) as executor:
future_to_url = {
executor.submit(get_info, ticker): ticker for ticker in tickers
}
tickers_scraped = [
future.result() for future in futures.as_completed(future_to_url)
]
df_scraped = pd.DataFrame(tickers_scraped).set_index("ticker")
df = df.set_index("Symbol")
df[["Address", "Website"]] = df_scraped[["address", "website"]]
df = df.reset_index()
df.to_excel("Screener.xlsx", index=False)
我使用 openpyxl
库从工作表中读取不同的代码名称,然后在 website 中使用这些代码生成结果,最后将结果写回到同一个工作表中相关单元格中的代码。
当我 运行 没有在其中实现多处理的脚本时,我发现它运行完美。
但是,当我访问这个库时,我不知道如何将结果写回工作表中的相关单元格 concurrent.futures
。
我目前的尝试:
import requests
from openpyxl import load_workbook
import concurrent.futures as futures
wb = load_workbook('Screener.xlsx')
ws = wb['Screener-1']
link = 'https://backend.otcmarkets.com/otcapi/company/profile/full/{}?'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
}
params = {
'symbol': ''
}
def get_info(ticker):
target_link = link.format(ticker)
params['symbol'] = ticker
r = requests.get(target_link,params,headers=headers)
try:
address = r.json()['address']
except (AttributeError,KeyError,IndexError):
address = ""
try:
website = r.json()['website']
except (AttributeError,KeyError,IndexError):
website = ""
return address,website
if __name__ == '__main__':
ticker_list = []
for row in range(2, ws.max_row + 1):
if ws.cell(row=row,column=1).value==None:break
ticker = ws.cell(row=row,column=1).value
ticker_list.append(ticker)
with futures.ThreadPoolExecutor(max_workers=6) as executor:
future_to_url = {executor.submit(get_info, ticker): ticker for ticker in ticker_list}
for future in futures.as_completed(future_to_url):
address,website = future.result()[0],future.result()[1]
print(address,website)
# ws.cell(row=row, column=2).value = '{}'.format(address)
# ws.cell(row=row, column=3).value = '{}'.format(website)
# wb.save('Screener.xlsx')
您测试的代码很少:
tickers = ['URBT','TPRP','CRBO','PVSP','TSPG','VMHG','MRTI','VTMC','TORM','SORT']
How can I write the results back to the same worksheet while doing reverse search using concurrent.futures?
如果您想知道我正在尝试将数据写入的确切位置,这就是 the worksheet 看起来像。
由于您已经在使用 openpyxl
,我建议您使用 pandas
,因为您可能会发现使用工作簿更容易一些。 openpyxl
次幂 pandas
read_excel
.
假设您有一个包含列 Symbol
的文件 Screener.xlsx
,如下所示:
您可以获取丢失的数据并更新工作簿。
方法如下:
import concurrent.futures as futures
import pandas as pd
import requests
link = 'https://backend.otcmarkets.com/otcapi/company/profile/full/{}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
}
def get_info(ticker) -> dict:
r = requests.get(link.format(ticker), headers=headers)
print(f"Fetching data for {ticker}...")
try:
address = r.json()["address1"]
except (AttributeError, KeyError, IndexError):
address = "None"
try:
website = r.json()["website"]
except (AttributeError, KeyError, IndexError):
website = "None"
return {"ticker": ticker, "address": address, "website": website}
if __name__ == "__main__":
df = pd.read_excel("Screener.xlsx")
tickers = df["Symbol"].to_list()
with futures.ThreadPoolExecutor(max_workers=6) as executor:
future_to_url = {
executor.submit(get_info, ticker): ticker for ticker in tickers
}
tickers_scraped = [
future.result() for future in futures.as_completed(future_to_url)
]
sorted_tickers = sorted(
tickers_scraped, key=lambda i: tickers.index(i["ticker"])
)
df.loc[:, ["Address", "Website"]] = [
[i["address"], i["website"]] for i in sorted_tickers
]
df.to_excel("Screener.xlsx", index=False)
得到这个:
编辑:
这是一个纯pandas
方法没有首先对抓取的数据进行排序。
if __name__ == "__main__":
df = pd.read_excel("Screener.xlsx")
tickers = df["Symbol"].to_list()
with futures.ThreadPoolExecutor(max_workers=6) as executor:
future_to_url = {
executor.submit(get_info, ticker): ticker for ticker in tickers
}
tickers_scraped = [
future.result() for future in futures.as_completed(future_to_url)
]
df_scraped = pd.DataFrame(tickers_scraped).set_index("ticker")
df = df.set_index("Symbol")
df[["Address", "Website"]] = df_scraped[["address", "website"]]
df = df.reset_index()
df.to_excel("Screener.xlsx", index=False)