如何将 asyncio 与现有的阻塞库一起使用?
How to use asyncio with existing blocking library?
我几乎没有阻塞函数 foo
,bar
并且我无法更改它们(一些内部库我不控制。与一个或多个网络服务对话)。我如何将它用作异步?例如。我不想做以下事情。
results = []
for inp in inps:
val = foo(inp)
result = bar(val)
results.append(result)
这将是低效的,因为我可以在等待第一个输入时为第二个输入调用 foo
,bar
也是如此。我如何包装它们以便它们可以与 asyncio 一起使用(即新的 async
、await
语法)?
让我们假设函数是可重入的。也就是说,当之前的 foo
已经在处理时,再次调用 foo
是可以的。
更新
使用可重复使用的装饰器扩展答案。例如点击。
def run_in_executor(f):
@functools.wraps(f)
def inner(*args, **kwargs):
loop = asyncio.get_running_loop()
return loop.run_in_executor(None, functools.partial(f, *args, **kwargs))
return inner
这里有(某种)两个问题:首先,如何 运行 异步阻塞代码,其次,如何 运行 并发异步代码(asyncio 是单线程的,因此GIL 仍然适用,所以它不是真正 并行,但我离题了)。
可以使用 asyncio.ensure_future 创建并发任务,如文档所述 here。
要运行同步代码,您将需要run the blocking code in an executor。示例:
import concurrent.futures
import asyncio
import time
def blocking(delay):
time.sleep(delay)
print('Completed.')
async def non_blocking(loop, executor):
# Run three of the blocking tasks concurrently. asyncio.wait will
# automatically wrap these in Tasks. If you want explicit access
# to the tasks themselves, use asyncio.ensure_future, or add a
# "done, pending = asyncio.wait..." assignment
await asyncio.wait(
fs={
# Returns after delay=12 seconds
loop.run_in_executor(executor, blocking, 12),
# Returns after delay=14 seconds
loop.run_in_executor(executor, blocking, 14),
# Returns after delay=16 seconds
loop.run_in_executor(executor, blocking, 16)
},
return_when=asyncio.ALL_COMPLETED
)
loop = asyncio.get_event_loop()
executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
loop.run_until_complete(non_blocking(loop, executor))
如果您想使用 for 循环安排这些任务(如您的示例所示),您有几种不同的策略,但基本方法是使用 安排 任务for 循环(或列表理解等),使用 asyncio.wait 等待它们,然后 然后 检索结果。示例:
done, pending = await asyncio.wait(
fs=[loop.run_in_executor(executor, blocking_foo, *args) for args in inps],
return_when=asyncio.ALL_COMPLETED
)
# Note that any errors raise during the above will be raised here; to
# handle errors you will need to call task.exception() and check if it
# is not None before calling task.result()
results = [task.result() for task in done]
扩展已接受的答案以实际解决问题。
注意:需要 python 3.7+
import functools
from urllib.request import urlopen
import asyncio
def legacy_blocking_function(): # You cannot change this function
r = urlopen("https://example.com")
return r.read().decode()
def run_in_executor(f):
@functools.wraps(f)
def inner(*args, **kwargs):
loop = asyncio.get_running_loop()
return loop.run_in_executor(None, lambda: f(*args, **kwargs))
return inner
@run_in_executor
def foo(arg): # Your wrapper for async use
resp = legacy_blocking_function()
return f"{arg}{len(resp)}"
@run_in_executor
def bar(arg): # Another wrapper
resp = legacy_blocking_function()
return f"{len(resp)}{arg}"
async def process_input(inp): # Modern async function (coroutine)
res = await foo(inp)
res = f"XXX{res}XXX"
return await bar(res)
async def main():
inputs = ["one", "two", "three"]
input_tasks = [asyncio.create_task(process_input(inp)) for inp in inputs]
print([await t for t in asyncio.as_completed(input_tasks)])
# This doesn't work as expected :(
# print([await t for t in asyncio.as_completed([process_input(inp) for inp in input_tasks])])
if __name__ == '__main__':
asyncio.run(main())
单击 here 获取此示例的最新版本并发送拉取请求。
import asyncio
from time import sleep
import logging
logging.basicConfig(
level=logging.DEBUG, format="%(asctime)s %(thread)s %(funcName)s %(message)s")
def long_task(t):
"""Simulate long IO bound task."""
logging.info("2. t: %s", t)
sleep(t)
logging.info("4. t: %s", t)
return t ** 2
async def main():
loop = asyncio.get_running_loop()
inputs = range(1, 5)
logging.info("1.")
futures = [loop.run_in_executor(None, long_task, i) for i in inputs]
logging.info("3.")
results = await asyncio.gather(*futures)
logging.info("5.")
for (i, result) in zip(inputs, results):
logging.info("6. Result: %s, %s", i, result)
if __name__ == "__main__":
asyncio.run(main())
输出:
2020-03-18 17:13:07,523 23964 main 1.
2020-03-18 17:13:07,524 5008 long_task 2. t: 1
2020-03-18 17:13:07,525 21232 long_task 2. t: 2
2020-03-18 17:13:07,525 22048 long_task 2. t: 3
2020-03-18 17:13:07,526 25588 long_task 2. t: 4
2020-03-18 17:13:07,526 23964 main 3.
2020-03-18 17:13:08,526 5008 long_task 4. t: 1
2020-03-18 17:13:09,526 21232 long_task 4. t: 2
2020-03-18 17:13:10,527 22048 long_task 4. t: 3
2020-03-18 17:13:11,527 25588 long_task 4. t: 4
2020-03-18 17:13:11,527 23964 main 5.
2020-03-18 17:13:11,528 23964 main 6. Result: 1, 1
2020-03-18 17:13:11,528 23964 main 6. Result: 2, 4
2020-03-18 17:13:11,529 23964 main 6. Result: 3, 9
2020-03-18 17:13:11,529 23964 main 6. Result: 4, 16
我几乎没有阻塞函数 foo
,bar
并且我无法更改它们(一些内部库我不控制。与一个或多个网络服务对话)。我如何将它用作异步?例如。我不想做以下事情。
results = []
for inp in inps:
val = foo(inp)
result = bar(val)
results.append(result)
这将是低效的,因为我可以在等待第一个输入时为第二个输入调用 foo
,bar
也是如此。我如何包装它们以便它们可以与 asyncio 一起使用(即新的 async
、await
语法)?
让我们假设函数是可重入的。也就是说,当之前的 foo
已经在处理时,再次调用 foo
是可以的。
更新
使用可重复使用的装饰器扩展答案。例如点击
def run_in_executor(f):
@functools.wraps(f)
def inner(*args, **kwargs):
loop = asyncio.get_running_loop()
return loop.run_in_executor(None, functools.partial(f, *args, **kwargs))
return inner
这里有(某种)两个问题:首先,如何 运行 异步阻塞代码,其次,如何 运行 并发异步代码(asyncio 是单线程的,因此GIL 仍然适用,所以它不是真正 并行,但我离题了)。
可以使用 asyncio.ensure_future 创建并发任务,如文档所述 here。
要运行同步代码,您将需要run the blocking code in an executor。示例:
import concurrent.futures
import asyncio
import time
def blocking(delay):
time.sleep(delay)
print('Completed.')
async def non_blocking(loop, executor):
# Run three of the blocking tasks concurrently. asyncio.wait will
# automatically wrap these in Tasks. If you want explicit access
# to the tasks themselves, use asyncio.ensure_future, or add a
# "done, pending = asyncio.wait..." assignment
await asyncio.wait(
fs={
# Returns after delay=12 seconds
loop.run_in_executor(executor, blocking, 12),
# Returns after delay=14 seconds
loop.run_in_executor(executor, blocking, 14),
# Returns after delay=16 seconds
loop.run_in_executor(executor, blocking, 16)
},
return_when=asyncio.ALL_COMPLETED
)
loop = asyncio.get_event_loop()
executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
loop.run_until_complete(non_blocking(loop, executor))
如果您想使用 for 循环安排这些任务(如您的示例所示),您有几种不同的策略,但基本方法是使用 安排 任务for 循环(或列表理解等),使用 asyncio.wait 等待它们,然后 然后 检索结果。示例:
done, pending = await asyncio.wait(
fs=[loop.run_in_executor(executor, blocking_foo, *args) for args in inps],
return_when=asyncio.ALL_COMPLETED
)
# Note that any errors raise during the above will be raised here; to
# handle errors you will need to call task.exception() and check if it
# is not None before calling task.result()
results = [task.result() for task in done]
扩展已接受的答案以实际解决问题。
注意:需要 python 3.7+
import functools
from urllib.request import urlopen
import asyncio
def legacy_blocking_function(): # You cannot change this function
r = urlopen("https://example.com")
return r.read().decode()
def run_in_executor(f):
@functools.wraps(f)
def inner(*args, **kwargs):
loop = asyncio.get_running_loop()
return loop.run_in_executor(None, lambda: f(*args, **kwargs))
return inner
@run_in_executor
def foo(arg): # Your wrapper for async use
resp = legacy_blocking_function()
return f"{arg}{len(resp)}"
@run_in_executor
def bar(arg): # Another wrapper
resp = legacy_blocking_function()
return f"{len(resp)}{arg}"
async def process_input(inp): # Modern async function (coroutine)
res = await foo(inp)
res = f"XXX{res}XXX"
return await bar(res)
async def main():
inputs = ["one", "two", "three"]
input_tasks = [asyncio.create_task(process_input(inp)) for inp in inputs]
print([await t for t in asyncio.as_completed(input_tasks)])
# This doesn't work as expected :(
# print([await t for t in asyncio.as_completed([process_input(inp) for inp in input_tasks])])
if __name__ == '__main__':
asyncio.run(main())
单击 here 获取此示例的最新版本并发送拉取请求。
import asyncio
from time import sleep
import logging
logging.basicConfig(
level=logging.DEBUG, format="%(asctime)s %(thread)s %(funcName)s %(message)s")
def long_task(t):
"""Simulate long IO bound task."""
logging.info("2. t: %s", t)
sleep(t)
logging.info("4. t: %s", t)
return t ** 2
async def main():
loop = asyncio.get_running_loop()
inputs = range(1, 5)
logging.info("1.")
futures = [loop.run_in_executor(None, long_task, i) for i in inputs]
logging.info("3.")
results = await asyncio.gather(*futures)
logging.info("5.")
for (i, result) in zip(inputs, results):
logging.info("6. Result: %s, %s", i, result)
if __name__ == "__main__":
asyncio.run(main())
输出:
2020-03-18 17:13:07,523 23964 main 1.
2020-03-18 17:13:07,524 5008 long_task 2. t: 1
2020-03-18 17:13:07,525 21232 long_task 2. t: 2
2020-03-18 17:13:07,525 22048 long_task 2. t: 3
2020-03-18 17:13:07,526 25588 long_task 2. t: 4
2020-03-18 17:13:07,526 23964 main 3.
2020-03-18 17:13:08,526 5008 long_task 4. t: 1
2020-03-18 17:13:09,526 21232 long_task 4. t: 2
2020-03-18 17:13:10,527 22048 long_task 4. t: 3
2020-03-18 17:13:11,527 25588 long_task 4. t: 4
2020-03-18 17:13:11,527 23964 main 5.
2020-03-18 17:13:11,528 23964 main 6. Result: 1, 1
2020-03-18 17:13:11,528 23964 main 6. Result: 2, 4
2020-03-18 17:13:11,529 23964 main 6. Result: 3, 9
2020-03-18 17:13:11,529 23964 main 6. Result: 4, 16