请求 + Tqdm 到一个变量

Requests + Tqdm to a variable

我从这个 question 得到这个函数(ctrl+ f 表示“请求和 tqdm 有答案”):

import requests
from tqdm import tqdm
def download(url: str, fname: str):
    resp = requests.get(url, stream=True)
    total = int(resp.headers.get('content-length', 0))
    with open(fname, 'wb') as file, tqdm(
        desc=fname,
        total=total,
        unit='b',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in resp.iter_content(chunk_size=1024):
            size = file.write(data)
            bar.update(size)

基本上它会下载一个文件并将其写入一个文件,我希望它成为 return 一个代表下载文件的变量,所以我做了这个:

def download(url: str, fname: str):
    import requests
    from tqdm import tqdm
    import os
    resp = requests.get(url, stream=True)
    total = int(resp.headers.get('content-length', 0))
    with open(fname, 'wb') as file, tqdm(
        desc=fname,
        total=total,
        unit='b',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in resp.iter_content(chunk_size=1024):
            size = file.write(data)
            bar.update(size)
    with open(fname, "rb") as f:
        returned = f.read()
    os.remove(fname)
    return returned

现在它保存文件,读取文件并将其保存到变量中,删除文件并 returns 变量。 有什么方法可以直接保存到变量中吗?

好吧,你可以 return 一个 tqdm 迭代器,然后对块做任何你喜欢的事情:

import requests
import tqdm
import io

def download(url: str):
    resp = requests.get(url, stream=True)
    total = int(resp.headers.get('content-length', 0))
    with tqdm.tqdm(
        desc=url,
        total=total,
        unit='b',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for chunk in resp.iter_content(chunk_size=65536):
            bar.update(len(chunk))
            yield chunk

bio = io.BytesIO()

for chunk in download('http://...'):
    # Do something with the chunk; this just stores it in memory.
    bio.write(chunk)

content = bio.getvalue()  # Get the contents of the BytesIO() as a bytes.

当然你可以将其重构为

import requests
import tqdm
import io


def download_as_bytes_with_progress(url: str) -> bytes:
    resp = requests.get(url, stream=True)
    total = int(resp.headers.get('content-length', 0))
    bio = io.BytesIO()
    with tqdm.tqdm(
        desc=url,
        total=total,
        unit='b',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for chunk in resp.iter_content(chunk_size=65536):
            bar.update(len(chunk))
            bio.write(chunk)
    return bio.getvalue()