在 Flask 响应中生成 google 的 MediaIoBaseDownload
Yield google's MediaIoBaseDownload inside Flask Response
我编写了一个小型 Flask 应用程序来从 Google Drive 下载文件。
@app.route("/downloadFile/<id>")
def downloadFile(id):
ioBytes, name, mime = gdrive.downloadFile(id)
return send_file(ioBytes, mime, True, name)
我使用了示例 here 中的下载方法,稍作改动
def downloadFile(self, file_id):
file = self.drive.files().get(fileId=file_id).execute()
request = self.drive.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Downloading {} - {}%".format(file.get('name'), int(status.progress() * 100)))
fh.seek(0)
return (fh, file.get('name'), file.get('mimeType'))
它按预期运行并将文件下载到我的计算机上。
现在,我想将这个 Flask 应用程序部署到 Heroku。我的问题是 HTTP 超时,如 here:
所述
HTTP requests have an initial 30 second window in which the web process must return response data
由于我的一些文件可能需要超过 30 秒才能下载,这最终成为一个大问题。
我尝试使用 Response class 和 yield 语句来继续发送空字节,直到我使用以下函数下载并发送了文件:
def sendUntilEndOfRequest(func, args=()):
def thread():
with app.app_context(), app.test_request_context():
return func(*args)
with concurrent.futures.ThreadPoolExecutor() as executor:
ret = ""
def exec():
while ret == "":
yield ""
time.sleep(1)
yield ret
future = executor.submit(thread)
def getValue():
nonlocal ret
ret = future.result()
threading.Thread(target=getValue).start()
return Response(stream_with_context(exec()))
我试着让它有点通用,这样如果我有任何其他函数需要超过 30 秒的执行时间,我就可以使用它。
现在,我的下载码是
@app.route("/downloadFile/<id>")
def downloadFile(id):
def downloadAndSendFile():
ioBytes, name, mime = gdrive.downloadFile(id)
return send_file(ioBytes, mime, True, name)
return sendUntilEndOfRequest(downloadAndSendFile)
但是每次我尝试 运行 这段代码时,它都会给出这个错误:
127.0.0.1 - - [15/Jan/2020 20:38:06] "[37mGET /downloadFile/1heeoEBZrhW0crgDSLbhLpcyMfvXqSmqi HTTP/1.1[0m" 200 -
Error on request:
Traceback (most recent call last):
File "C:\Users\fsvic\AppData\Local\Programs\Python\Python37\lib\site-packages\werkzeug\serving.py", line 303, in run_wsgi
execute(self.server.app)
File "C:\Users\fsvic\AppData\Local\Programs\Python\Python37\lib\site-packages\werkzeug\serving.py", line 294, in execute
write(data)
File "C:\Users\fsvic\AppData\Local\Programs\Python\Python37\lib\site-packages\werkzeug\serving.py", line 274, in write
assert isinstance(data, bytes), "applications must write bytes"
AssertionError: applications must write bytes
显然,文件下载正确。我测试了用 render_template
命令替换 send_file
以检查是否可以生成烧瓶对象并且它工作得很好。我还测试了返回的字符串,它也能正常工作。
请问如何找回下载的文件?
MediaIoBaseDownload
所做的只是调用文件处理程序的 write
方法。
所以你可以像这样实现你自己的 IO:
import io
from googleapiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
from googleapiclient.http import MediaIoBaseDownload
from flask import Flask
from flask import Response
app = Flask(__name__)
SCOPES = 'https://www.googleapis.com/auth/drive.readonly'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
creds = tools.run_flow(flow, store)
drive_service = discovery.build('drive', 'v3', http=creds.authorize(Http()))
class ChunkHolder(object):
def __init__(self):
self.chunk = None
def write(self, chunk):
"""Save current chunk"""
self.chunk = chunk
@app.route('/<file_id>')
def download_file(file_id):
request = drive_service.files().get_media(fileId=file_id)
def download_stream():
done = False
fh = ChunkHolder()
downloader = MediaIoBaseDownload(fh, request)
while not done:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
yield fh.chunk
return Response(download_stream())
if __name__ == '__main__':
app.run(port=5000)
我们会在下载块后立即生成下载的块,并且不会在内存中保留之前的块。
我编写了一个小型 Flask 应用程序来从 Google Drive 下载文件。
@app.route("/downloadFile/<id>")
def downloadFile(id):
ioBytes, name, mime = gdrive.downloadFile(id)
return send_file(ioBytes, mime, True, name)
我使用了示例 here 中的下载方法,稍作改动
def downloadFile(self, file_id):
file = self.drive.files().get(fileId=file_id).execute()
request = self.drive.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Downloading {} - {}%".format(file.get('name'), int(status.progress() * 100)))
fh.seek(0)
return (fh, file.get('name'), file.get('mimeType'))
它按预期运行并将文件下载到我的计算机上。
现在,我想将这个 Flask 应用程序部署到 Heroku。我的问题是 HTTP 超时,如 here:
所述HTTP requests have an initial 30 second window in which the web process must return response data
由于我的一些文件可能需要超过 30 秒才能下载,这最终成为一个大问题。
我尝试使用 Response class 和 yield 语句来继续发送空字节,直到我使用以下函数下载并发送了文件:
def sendUntilEndOfRequest(func, args=()):
def thread():
with app.app_context(), app.test_request_context():
return func(*args)
with concurrent.futures.ThreadPoolExecutor() as executor:
ret = ""
def exec():
while ret == "":
yield ""
time.sleep(1)
yield ret
future = executor.submit(thread)
def getValue():
nonlocal ret
ret = future.result()
threading.Thread(target=getValue).start()
return Response(stream_with_context(exec()))
我试着让它有点通用,这样如果我有任何其他函数需要超过 30 秒的执行时间,我就可以使用它。
现在,我的下载码是
@app.route("/downloadFile/<id>")
def downloadFile(id):
def downloadAndSendFile():
ioBytes, name, mime = gdrive.downloadFile(id)
return send_file(ioBytes, mime, True, name)
return sendUntilEndOfRequest(downloadAndSendFile)
但是每次我尝试 运行 这段代码时,它都会给出这个错误:
127.0.0.1 - - [15/Jan/2020 20:38:06] "[37mGET /downloadFile/1heeoEBZrhW0crgDSLbhLpcyMfvXqSmqi HTTP/1.1[0m" 200 -
Error on request:
Traceback (most recent call last):
File "C:\Users\fsvic\AppData\Local\Programs\Python\Python37\lib\site-packages\werkzeug\serving.py", line 303, in run_wsgi
execute(self.server.app)
File "C:\Users\fsvic\AppData\Local\Programs\Python\Python37\lib\site-packages\werkzeug\serving.py", line 294, in execute
write(data)
File "C:\Users\fsvic\AppData\Local\Programs\Python\Python37\lib\site-packages\werkzeug\serving.py", line 274, in write
assert isinstance(data, bytes), "applications must write bytes"
AssertionError: applications must write bytes
显然,文件下载正确。我测试了用 render_template
命令替换 send_file
以检查是否可以生成烧瓶对象并且它工作得很好。我还测试了返回的字符串,它也能正常工作。
请问如何找回下载的文件?
MediaIoBaseDownload
所做的只是调用文件处理程序的 write
方法。
所以你可以像这样实现你自己的 IO:
import io
from googleapiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
from googleapiclient.http import MediaIoBaseDownload
from flask import Flask
from flask import Response
app = Flask(__name__)
SCOPES = 'https://www.googleapis.com/auth/drive.readonly'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
creds = tools.run_flow(flow, store)
drive_service = discovery.build('drive', 'v3', http=creds.authorize(Http()))
class ChunkHolder(object):
def __init__(self):
self.chunk = None
def write(self, chunk):
"""Save current chunk"""
self.chunk = chunk
@app.route('/<file_id>')
def download_file(file_id):
request = drive_service.files().get_media(fileId=file_id)
def download_stream():
done = False
fh = ChunkHolder()
downloader = MediaIoBaseDownload(fh, request)
while not done:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))
yield fh.chunk
return Response(download_stream())
if __name__ == '__main__':
app.run(port=5000)
我们会在下载块后立即生成下载的块,并且不会在内存中保留之前的块。