google 在 Python 中使用 aiohttp 驱动文件上传

google drive file upload with aiohttp in Python

我正在尝试制作一个将文件上传到我的 google 驱动器的功能。

下面的代码上传带有请求的单个文件。

import requests
import json

with open('storage.json', 'r') as file:
    token = json.load(file)
    token = token["access_token"]

url = "https://www.googleapis.com/upload/drive/v3/files"
file_metadata = {"name": "test.jpg",
                 "parents": [],
                 }
data = {
    "MetaData": (
        "metadata",
        json.dumps(file_metadata),
        "application/json; charset=UTF-8",
    ),
    "Media": open("test2.jpg", "rb"),
}

headers = {"Authorization": "Bearer {}".format(token)}
params = {"uploadType": "multipart"}

res = requests.post(url, files=data, params=params, headers=headers)
print(res.text)

现在,我想用 aiohttp 做一个异步函数。但是,我不知道该怎么做。 下面是我当前的代码,但它给出了 aiohttp.payload.LookupError.

async def upload_file(session, file_path, folder_id):
    '''
    * uploads a single file to the designated folder
    * args:
    - session: aiohttp session
    - file_path : absolute path of a file (e.g.) C:\Git\GoogleDriveAPI\test2.jpg
    - folder_id: folder id of the designated folder in google drive (e.g.) '1Q6gaU4kHaLRN5psS4S_2Yx_*******'
    '''
    file_name = file_path.split(os.path.sep)[-1]
    url = "https://www.googleapis.com/upload/drive/v3/files"
    file_metadata = {"name": file_name,
                     "parents": [folder_id],
                     }
    data = {
        "MetaData": (
            "metadata",
            json.dumps(file_metadata),
            "application/json; charset=UTF-8",
        ),
        "Media": open(file_path, "rb"),
    }
    global token
    headers = {"Authorization": "Bearer {}".format(token)}
    params = {"uploadType": "multipart"}
    async with session.post(url, data=data, params=params, headers=headers) as resp:
        print(resp.text)

这是我的完整代码,以供您进行实验。

import os
import sys
import argparse
import asyncio
import json

import aiohttp
from aiohttp import web

import googleapiclient.errors
from googleapiclient.discovery import build

from httplib2 import Http
from oauth2client import file, client, tools
from tqdm import tqdm


DEFAULT_CONCUR_REQ = 20
MAX_CONCUR_REQ = 1000


def get_token():
    '''
    * authorize access to user's google drive and return access token
    * access information is stored as 'storage.json'
    '''
    SCOPES = 'https://www.googleapis.com/auth/drive.file'
    store = file.Storage('storage.json')
    creds = store.get()
    if not creds or creds.invalid:
        print("make new storage data file ")
        flow = client.flow_from_clientsecrets('client_secret_drive.json', SCOPES)
        creds = tools.run_flow(flow, store)
    build('drive', 'v3', http=creds.authorize(Http()))
    with open('storage.json', 'r') as f:
        creds = json.load(f)
        token = creds["access_token"]
    return token


async def upload_file(session, file_path, folder_id):
    '''
    * uploads a single file to the designated folder
    * args:
    - session: aiohttp session
    - file_path : absolute path of a file (e.g.) C:\Git\GoogleDriveAPI\test2.jpg
    - folder_id: folder id of the designated folder in google drive (e.g.) '1Q6gaU4kHaLRN5psS4S_2Yx_*******'
    '''
    file_name = file_path.split(os.path.sep)[-1]
    url = "https://www.googleapis.com/upload/drive/v3/files"
    file_metadata = {"name": file_name,
                     "parents": [folder_id],
                     }
    data = {
        "MetaData": (
            "metadata",
            json.dumps(file_metadata),
            "application/json; charset=UTF-8",
        ),
        "Media": open(file_path, "rb"),
    }
    global token
    headers = {"Authorization": "Bearer {}".format(token)}
    params = {"uploadType": "multipart"}
    async with session.post(url, data=data, params=params, headers=headers) as resp:
        print(resp.text)

async def upload_files(file_paths, folder_id):
    async with aiohttp.ClientSession() as session:
        jobs = [upload_file(session, file_path, folder_id) for file_path in file_paths]
        jobs = asyncio.as_completed(jobs)
        for job in jobs:
            await job

def main():
    folder = r'C:\Git\GoogleDriveAPI\test2'
    folder_id = None
    files = os.listdir(folder)
    file_paths = [os.path.join(folder, file) for file in files]
    loop = asyncio.get_event_loop()
    loop.run_until_complete(upload_files(file_paths, folder_id))



if __name__ == '__main__':
    # parser = argparse.ArgumentParser(
    #     description='Upload folder including all sub-folders to google drive.')
    # parser.add_argument('folder_path',
    #                     help='folder_path: local folder path to upload'
    #                          'e.g. C:\Git\PytorchBasic')
    # parser.add_argument('folder_id',
    #                     help='folder_id: target folder\'s id in google drive'
    #                          'e.g. 1FzI5QChbh4Q-nEQGRu8D-********')
    # args = parser.parse_args()
    # if not os.path.isdir(args.folder_path):
    #     print('*** Folder path error: invalid path')
    #     parser.print_usage()
    #     sys.exit(1)
    # folder_path = args.folder_path
    # folder_id = args.folder_id
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
    token = get_token()
    main()




错误

C:\VirtualEnv\basic\Scripts\python.exe C:/Git/GoogleDriveAPI/googledriveapi_async.py
Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 510, in update_body_from_data
    body = payload.PAYLOAD_REGISTRY.get(body, disposition=None)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 145, in _gen_form_data
    part = payload.get_payload(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 71, in get_payload
    return PAYLOAD_REGISTRY.get(data, *args, **kwargs)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 103, in <module>
    main()
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 81, in main
    loop.run_until_complete(upload_files(file_paths, folder_id))
  File "C:\Users\chsze\AppData\Local\Programs\Python\Python38\lib\asyncio\base_events.py", line 616, in run_until_complete
    return future.result()
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 73, in upload_files
    await job
  File "C:\Users\chsze\AppData\Local\Programs\Python\Python38\lib\asyncio\tasks.py", line 619, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 65, in upload_file
    async with session.post(url, data=data, params=params, headers=headers) as resp:
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 1138, in __aenter__
    self._resp = await self._coro
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 507, in _request
    req = self._request_class(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 313, in __init__
    self.update_body_from_data(data)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 512, in update_body_from_data
    body = FormData(body)()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 170, in __call__
    return self._gen_form_data()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 149, in _gen_form_data
    raise TypeError(
TypeError: Can not serialize value type: <class 'tuple'>
 headers: {}
 value: ('metadata', '{"name": "COCO_train2014_000000000064.jpg", "parents": [null]}', 'application/json; charset=UTF-8')
Task exception was never retrieved
future: <Task finished name='Task-4' coro=<upload_file() done, defined at C:/Git/GoogleDriveAPI/googledriveapi_async.py:41> exception=TypeError('Can not serialize value type: <class \'tuple\'>\n headers: {}\n value: (\'metadata\', \'{"name": "COCO_train2014_000000000061.jpg", "parents": [null]}\', \'application/json; charset=UTF-8\')')>
Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 510, in update_body_from_data
    body = payload.PAYLOAD_REGISTRY.get(body, disposition=None)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 145, in _gen_form_data
    part = payload.get_payload(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 71, in get_payload
    return PAYLOAD_REGISTRY.get(data, *args, **kwargs)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 65, in upload_file
    async with session.post(url, data=data, params=params, headers=headers) as resp:
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 1138, in __aenter__
    self._resp = await self._coro
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 507, in _request
    req = self._request_class(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 313, in __init__
    self.update_body_from_data(data)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 512, in update_body_from_data
    body = FormData(body)()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 170, in __call__
    return self._gen_form_data()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 149, in _gen_form_data
    raise TypeError(
TypeError: Can not serialize value type: <class 'tuple'>
 headers: {}
 value: ('metadata', '{"name": "COCO_train2014_000000000061.jpg", "parents": [null]}', 'application/json; charset=UTF-8')
Task exception was never retrieved
future: <Task finished name='Task-3' coro=<upload_file() done, defined at C:/Git/GoogleDriveAPI/googledriveapi_async.py:41> exception=TypeError('Can not serialize value type: <class \'tuple\'>\n headers: {}\n value: (\'metadata\', \'{"name": "COCO_train2014_000000000049.jpg", "parents": [null]}\', \'application/json; charset=UTF-8\')')>
Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 510, in update_body_from_data
    body = payload.PAYLOAD_REGISTRY.get(body, disposition=None)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 145, in _gen_form_data
    part = payload.get_payload(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 71, in get_payload
    return PAYLOAD_REGISTRY.get(data, *args, **kwargs)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 65, in upload_file
    async with session.post(url, data=data, params=params, headers=headers) as resp:
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 1138, in __aenter__
    self._resp = await self._coro
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 507, in _request
    req = self._request_class(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 313, in __init__
    self.update_body_from_data(data)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 512, in update_body_from_data
    body = FormData(body)()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 170, in __call__
    return self._gen_form_data()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 149, in _gen_form_data
    raise TypeError(
TypeError: Can not serialize value type: <class 'tuple'>
 headers: {}
 value: ('metadata', '{"name": "COCO_train2014_000000000049.jpg", "parents": [null]}', 'application/json; charset=UTF-8')

Process finished with exit code 1

当我看到您的脚本时,我认为要使用您的脚本请求 multipart/form-data,需要创建请求正文。我认为这可能是您遇到问题的原因。当这反映在您的脚本中时,它会变成如下。

发件人:

data = {
    "MetaData": (
        "metadata",
        json.dumps(file_metadata),
        "application/json; charset=UTF-8",
    ),
    "Media": open(file_path, "rb"),
}
global token
headers = {"Authorization": "Bearer {}".format(token)}
params = {"uploadType": "multipart"}
async with session.post(url, data=data, params=params, headers=headers) as resp:
    print(resp.text)

收件人:

data = aiohttp.FormData()
data.add_field(
    "metadata",
    json.dumps(file_metadata),
    content_type="application/json; charset=UTF-8",
)
data.add_field("file", open(file_path, "rb"))
global token
headers = {"Authorization": "Bearer {}".format(token)}
params = {"uploadType": "multipart"}
async with session.post(url, data=data, params=params, headers=headers) as resp:
    r = await resp.json()
    print(r)

测试:

当这个修改脚本为运行时,得到如下结果

{'kind': 'drive#file', 'id': '###', 'name': '###', 'mimeType': '###'}
,
,
,

注:

  • 此修改后的脚本假定您的访问令牌可用于将文件上传到 Google 云端硬盘。请注意这一点。

参考: