python 中的驱动器 API 有问题

Problems with Drive API in python

我用 Python 和 Streamlit 制作了一个应用程序,并添加了驱动器 API。我拥有在官方 Google 页面上找到的所有代码,一开始它可以工作。

我在 google 驱动器上有一个 .csv,因为我无法在 Heroku 中保存文件,所以我将它保存在驱动器中,然后每次我在应用程序中需要它时下载它。 起初下载代码有效,.csv 被正确下载,但在一些上传和下载之后,下载代码显示此错误

TypeError: expected string or bytes-like object

Traceback:
File "D:\Users\***\drive.py", line 121, in save_response_content
    filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
File "C:\Python\lib\re.py", line 241, in findall
    return _compile(pattern, flags).findall(string)

如果我再次 运行 streamlit 然后它再次工作。但是在一些后续的上传和下载之后它又坏了。如果我检查驱动器,则文件已正确保存。我找不到问题。

这是代码:

import os
import re
import io
import pickle
# Gmail API utils
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
import requests
from tqdm import tqdm

SCOPES = ['https://www.googleapis.com/auth/drive.metadata',
          'https://www.googleapis.com/auth/drive.metadata.readonly',
          'https://www.googleapis.com/auth/drive',
          'https://www.googleapis.com/auth/drive.file'
          ]


def drive_authenticate():
    creds = None
    # the file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first time
    if os.path.exists("token_drive.pickle"):
        with open("token_drive.pickle", "rb") as token:
            creds = pickle.load(token)
    # if there are no (valid) credentials availablle, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('token.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # save the credentials for the next run
        with open("token_drive.pickle", "wb") as token:
            pickle.dump(creds, token)
    return build('drive', 'v3', credentials=creds)

# get the Gmail API service
service = drive_authenticate()


def delete_file(filename):
    search_result = search(query=f"name='{filename}'")
    for i in range(len(search_result)):
        print('deleted: ' + search_result[i][0])
        file_id = search_result[i][0]
        file = service.files().delete(fileId=file_id).execute()


def create_folder(folder_name):
    """
    Creates a folder and upload a file to it
    """
    # folder details we want to make
    folder_metadata = {
        "name": folder_name,
        "mimeType": "application/vnd.google-apps.folder"
    }
    # create the folder
    file = service.files().create(body=folder_metadata, fields="id").execute()


def upload_file (file_name, file_mimetype): #csv is text/csv
    file_metadata = {'name': file_name}
    media = MediaFileUpload(file_name, mimetype= file_mimetype)
    file = service.files().create(body=file_metadata,
                                        media_body=media,
                                        fields='id').execute()


def search(query): #example "mimeType='image/jpeg'" .. or .. "name='token.json'"
    result = []
    page_token = None
    while True:
        response = service.files().list(q=query,
                                              spaces='drive',
                                              fields='nextPageToken, files(id, name)',
                                              pageToken=page_token).execute()
        for file in response.get('files', []):
            # Process change
            print ('Found file: %s (%s)' % (file.get('name'), file.get('id')))
            result.append((file.get('id'), file.get('name')))
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break

    return result


def download(filename):
    # search for the file by name
    search_result = search(query=f"name='{filename}'")
    # get the GDrive ID of the file
    file_id = search_result[0][0]
    # make it shareable
    service.permissions().create(body={"role": "reader", "type": "anyone"}, fileId=file_id).execute()
    # download file
    download_file_from_google_drive(file_id, filename)


def download_file_from_google_drive(id, destination):
    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value
        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 32768
        # get the file size from Content-length response header
        file_size = int(response.headers.get("Content-Length", 0))
        # extract Content disposition from response headers
        content_disposition = response.headers.get("content-disposition")
        # parse filename
        filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
        print("[+] File size:", file_size)
        print("[+] File name:", filename)
        progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
        with open(destination, "wb") as f:
            for chunk in progress:
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    # update the progress bar
                    progress.update(len(chunk))
        progress.close()

    # base URL for download
    URL = "https://docs.google.com/uc?export=download"
    # init a HTTP session
    session = requests.Session()
    # make a request
    response = session.get(URL, params = {'id': id}, stream=True)
    print("[+] Downloading", response.url)
    # get confirmation token
    token = get_confirm_token(response)
    if token:
        params = {'id': id, 'confirm':token}
        response = session.get(URL, params=params, stream=True)
    # download to disk
    save_response_content(response, destination)

似乎下载连接有时会中断,之后您只能等待。

当我查看 pycharm 中的整个代码时,除了这部分代码外,其他所有内容都显示相同:

#When it works
session = requests.Session()
session
Out[]: <requests.sessions.Session at 0x1efb3332880>

response = session.get(URL, params = {'id': id}, stream=True)
response
Out[]: <Response [200]>

response.url
Out[]: 'https://doc-0c-2c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/68igeel9a4bfatoar25e8t8qm0g67tfi/1629226275000/01179996486350361096/*/1xFI06MKFhWtkZbQM8NCgur5SOlkERq-W?e=download'

response.headers
Out[]: {'X-GUploader-UploadID': '***', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'false', 'Access-Control-Allow-Headers': 'Accept, Accept-Language, Authorization, Cache-Control, Content-Disposition, Content-Encoding, Content-Language, Content-Length, Content-MD5, Content-Range, Content-Type, Date, X-Goog-Sn-Metadata, X-Goog-Sn-PatientId, GData-Version, google-cloud-resource-prefix, x-goog-request-params, Host, If-Match, If-Modified-Since, If-None-Match, If-Unmodified-Since, Origin, OriginToken, Pragma, Range, Slug, Transfer-Encoding, hotrod-board-name, hotrod-chrome-cpu-model, hotrod-chrome-processors, Want-Digest, x-chrome-connected, X-ClientDetails, X-Client-Version, X-Firebase-Locale, X-Goog-Firebase-Installations-Auth, X-Firebase-Client, X-Firebase-Client-Log-Type, X-Firebase-GMPID, X-Firebase-Auth-Token, X-Goog-Drive-Client-Version, X-Goog-Drive-Resource-Keys, X-GData-Client, X-GData-Key, X-GoogApps-Allowed-Domains, X-Goog-AdX-Buyer-Impersonation, X-Goog-Api-Client, X-Goog-AuthUser, ***, ***, ***, X-Goog-PageId, X-Goog-Encode-Response-If-Executable, X-Goog-Correlation-Id, X-Goog-Request-Info, X-Goog-Request-Reason, X-Goog-Experiments, x-goog-iam-authority-selector, x-goog-iam-authorization-token, X-Goog-Spatula, X-Goog-Travel-Bgr, X-Goog-Travel-Settings, X-Goog-Upload-Command, X-Goog-Upload-Content-Disposition, X-Goog-Upload-Content-Length, X-Goog-Upload-Content-Type, X-Goog-Upload-File-Name, X-Goog-Upload-Header-Content-Encoding, X-Goog-Upload-Header-Content-Length, X-Goog-Upload-Header-Content-Type, X-Goog-Upload-Header-Transfer-Encoding, X-Goog-Upload-Offset, X-Goog-Upload-Protocol, x-goog-user-project, X-Goog-Visitor-Id, X-Goog-FieldMask, X-Google-Project-Override, X-Goog-Api-Key, X-HTTP-Method-Override, X-JavaScript-User-Agent, X-Pan-Versionid, X-Proxied-User-IP, X-Origin, X-Referer, X-Requested-With, X-Stadia-Client-Context, X-Upload-Content-Length, X-Upload-Content-Type, X-Use-HTTP-Status-Code-Override, X-Ios-Bundle-Identifier, X-Android-Package, X-Ariane-Xsrf-Token, X-YouTube-VVT, X-YouTube-Page-CL, X-YouTube-Page-Timestamp, X-Compass-Routing-Destination, X-Goog-Meeting-ABR, X-Goog-Meeting-Botguardid, X-Goog-Meeting-ClientInfo, X-Goog-Meeting-ClientVersion, X-Goog-Meeting-Debugid, X-Goog-Meeting-Identifier, X-Goog-Meeting-RtcClient, X-Goog-Meeting-StartSource, X-Goog-Meeting-Token, X-Client-Data, x-sdm-id-token, X-Sfdc-Authorization, MIME-Version, Content-Transfer-Encoding, X-Earth-Engine-App-ID-Token, X-Earth-Engine-Computation-Profile, X-Earth-Engine-Computation-Profiling, X-Play-Console-Experiments-Override, X-Play-Console-Session-Id, x-alkali-account-key, x-alkali-application-key, x-alkali-auth-apps-namespace, x-alkali-auth-entities-namespace, x-alkali-auth-entity, x-alkali-client-locale, EES-S7E-MODE, cast-device-capabilities, X-Server-Timeout', 'Access-Control-Allow-Methods': 'GET,OPTIONS', 'Content-Type': 'test/csv', 'Content-Disposition': 'attachment;filename="df_emails.csv";filename*=UTF-8\'\'df_emails.csv', 'Date': 'Tue, 17 Aug 2021 19:27:57 GMT', 'Expires': 'Tue, 17 Aug 2021 19:27:57 GMT', 'Cache-Control': 'private, max-age=0', 'X-Goog-Hash': 'crc32c=KcvnQQ==', 'Content-Length': '62', 'Server': 'UploadServer', 'Alt-Svc': '***"; ma=**...'}

file_size = int(response.headers.get("Content-Length", 0))
file_size
Out[]: 93

content_disposition = response.headers.get("content-disposition")
content_disposition
Out[]: 'attachment;filename="df_emails.csv";filename*=UTF-8\'\'df_emails.csv'


#When it does not work
session = requests.Session()
session
Out[]: <requests.sessions.Session at 0x16b6d381430>

response = session.get(URL, params = {'id': id}, stream=True)
response
Out[]: <Response [403]>

response.url
Out[]: 'https://doc-0c-2c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/2uvue873p1tjsu872vt03liosc91rssf/1629226875000/01179996486350361096/*/1xFI06MKFhWtkZbQM8NCgur5SOlkERq-W?e=download'

response.headers
Out[]: {'Content-Length': '1103', 'Content-Type': 'text/html; charset=UTF-8', 'Date': 'Tue, 17 Aug 2021 19:02:25 GMT', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; ma=2592000; v="46,43"'}

file_size = int(response.headers.get("Content-Length", 0))
file_size
Out[]: 1103

content_disposition = response.headers.get("content-disposition")
content_disposition
#Nothing out

有谁知道如果我不做任何不同的事情,为什么会得到不同的响应?

当您从服务获得响应时,最好先检查响应代码,然后您尝试使用您希望在该响应中包含的数据。

如果您查看响应 objects,您会发现有效时为 <Response [200]>,无效时为 <Response [403]>

403 表示“禁止”。服务器没有 return 给你你期望的数据,这就是为什么没有 content-disposition header,你的正则表达式失败。