python 中的驱动器 API 有问题
Problems with Drive API in python
我用 Python 和 Streamlit 制作了一个应用程序,并添加了驱动器 API。我拥有在官方 Google 页面上找到的所有代码,一开始它可以工作。
我在 google 驱动器上有一个 .csv,因为我无法在 Heroku 中保存文件,所以我将它保存在驱动器中,然后每次我在应用程序中需要它时下载它。
起初下载代码有效,.csv 被正确下载,但在一些上传和下载之后,下载代码显示此错误
TypeError: expected string or bytes-like object
Traceback:
File "D:\Users\***\drive.py", line 121, in save_response_content
filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
File "C:\Python\lib\re.py", line 241, in findall
return _compile(pattern, flags).findall(string)
如果我再次 运行 streamlit 然后它再次工作。但是在一些后续的上传和下载之后它又坏了。如果我检查驱动器,则文件已正确保存。我找不到问题。
这是代码:
import os
import re
import io
import pickle
# Gmail API utils
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
import requests
from tqdm import tqdm
SCOPES = ['https://www.googleapis.com/auth/drive.metadata',
'https://www.googleapis.com/auth/drive.metadata.readonly',
'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.file'
]
def drive_authenticate():
creds = None
# the file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first time
if os.path.exists("token_drive.pickle"):
with open("token_drive.pickle", "rb") as token:
creds = pickle.load(token)
# if there are no (valid) credentials availablle, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('token.json', SCOPES)
creds = flow.run_local_server(port=0)
# save the credentials for the next run
with open("token_drive.pickle", "wb") as token:
pickle.dump(creds, token)
return build('drive', 'v3', credentials=creds)
# get the Gmail API service
service = drive_authenticate()
def delete_file(filename):
search_result = search(query=f"name='{filename}'")
for i in range(len(search_result)):
print('deleted: ' + search_result[i][0])
file_id = search_result[i][0]
file = service.files().delete(fileId=file_id).execute()
def create_folder(folder_name):
"""
Creates a folder and upload a file to it
"""
# folder details we want to make
folder_metadata = {
"name": folder_name,
"mimeType": "application/vnd.google-apps.folder"
}
# create the folder
file = service.files().create(body=folder_metadata, fields="id").execute()
def upload_file (file_name, file_mimetype): #csv is text/csv
file_metadata = {'name': file_name}
media = MediaFileUpload(file_name, mimetype= file_mimetype)
file = service.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
def search(query): #example "mimeType='image/jpeg'" .. or .. "name='token.json'"
result = []
page_token = None
while True:
response = service.files().list(q=query,
spaces='drive',
fields='nextPageToken, files(id, name)',
pageToken=page_token).execute()
for file in response.get('files', []):
# Process change
print ('Found file: %s (%s)' % (file.get('name'), file.get('id')))
result.append((file.get('id'), file.get('name')))
page_token = response.get('nextPageToken', None)
if page_token is None:
break
return result
def download(filename):
# search for the file by name
search_result = search(query=f"name='{filename}'")
# get the GDrive ID of the file
file_id = search_result[0][0]
# make it shareable
service.permissions().create(body={"role": "reader", "type": "anyone"}, fileId=file_id).execute()
# download file
download_file_from_google_drive(file_id, filename)
def download_file_from_google_drive(id, destination):
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
# get the file size from Content-length response header
file_size = int(response.headers.get("Content-Length", 0))
# extract Content disposition from response headers
content_disposition = response.headers.get("content-disposition")
# parse filename
filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
print("[+] File size:", file_size)
print("[+] File name:", filename)
progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
with open(destination, "wb") as f:
for chunk in progress:
if chunk: # filter out keep-alive new chunks
f.write(chunk)
# update the progress bar
progress.update(len(chunk))
progress.close()
# base URL for download
URL = "https://docs.google.com/uc?export=download"
# init a HTTP session
session = requests.Session()
# make a request
response = session.get(URL, params = {'id': id}, stream=True)
print("[+] Downloading", response.url)
# get confirmation token
token = get_confirm_token(response)
if token:
params = {'id': id, 'confirm':token}
response = session.get(URL, params=params, stream=True)
# download to disk
save_response_content(response, destination)
似乎下载连接有时会中断,之后您只能等待。
当我查看 pycharm 中的整个代码时,除了这部分代码外,其他所有内容都显示相同:
#When it works
session = requests.Session()
session
Out[]: <requests.sessions.Session at 0x1efb3332880>
response = session.get(URL, params = {'id': id}, stream=True)
response
Out[]: <Response [200]>
response.url
Out[]: 'https://doc-0c-2c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/68igeel9a4bfatoar25e8t8qm0g67tfi/1629226275000/01179996486350361096/*/1xFI06MKFhWtkZbQM8NCgur5SOlkERq-W?e=download'
response.headers
Out[]: {'X-GUploader-UploadID': '***', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'false', 'Access-Control-Allow-Headers': 'Accept, Accept-Language, Authorization, Cache-Control, Content-Disposition, Content-Encoding, Content-Language, Content-Length, Content-MD5, Content-Range, Content-Type, Date, X-Goog-Sn-Metadata, X-Goog-Sn-PatientId, GData-Version, google-cloud-resource-prefix, x-goog-request-params, Host, If-Match, If-Modified-Since, If-None-Match, If-Unmodified-Since, Origin, OriginToken, Pragma, Range, Slug, Transfer-Encoding, hotrod-board-name, hotrod-chrome-cpu-model, hotrod-chrome-processors, Want-Digest, x-chrome-connected, X-ClientDetails, X-Client-Version, X-Firebase-Locale, X-Goog-Firebase-Installations-Auth, X-Firebase-Client, X-Firebase-Client-Log-Type, X-Firebase-GMPID, X-Firebase-Auth-Token, X-Goog-Drive-Client-Version, X-Goog-Drive-Resource-Keys, X-GData-Client, X-GData-Key, X-GoogApps-Allowed-Domains, X-Goog-AdX-Buyer-Impersonation, X-Goog-Api-Client, X-Goog-AuthUser, ***, ***, ***, X-Goog-PageId, X-Goog-Encode-Response-If-Executable, X-Goog-Correlation-Id, X-Goog-Request-Info, X-Goog-Request-Reason, X-Goog-Experiments, x-goog-iam-authority-selector, x-goog-iam-authorization-token, X-Goog-Spatula, X-Goog-Travel-Bgr, X-Goog-Travel-Settings, X-Goog-Upload-Command, X-Goog-Upload-Content-Disposition, X-Goog-Upload-Content-Length, X-Goog-Upload-Content-Type, X-Goog-Upload-File-Name, X-Goog-Upload-Header-Content-Encoding, X-Goog-Upload-Header-Content-Length, X-Goog-Upload-Header-Content-Type, X-Goog-Upload-Header-Transfer-Encoding, X-Goog-Upload-Offset, X-Goog-Upload-Protocol, x-goog-user-project, X-Goog-Visitor-Id, X-Goog-FieldMask, X-Google-Project-Override, X-Goog-Api-Key, X-HTTP-Method-Override, X-JavaScript-User-Agent, X-Pan-Versionid, X-Proxied-User-IP, X-Origin, X-Referer, X-Requested-With, X-Stadia-Client-Context, X-Upload-Content-Length, X-Upload-Content-Type, X-Use-HTTP-Status-Code-Override, X-Ios-Bundle-Identifier, X-Android-Package, X-Ariane-Xsrf-Token, X-YouTube-VVT, X-YouTube-Page-CL, X-YouTube-Page-Timestamp, X-Compass-Routing-Destination, X-Goog-Meeting-ABR, X-Goog-Meeting-Botguardid, X-Goog-Meeting-ClientInfo, X-Goog-Meeting-ClientVersion, X-Goog-Meeting-Debugid, X-Goog-Meeting-Identifier, X-Goog-Meeting-RtcClient, X-Goog-Meeting-StartSource, X-Goog-Meeting-Token, X-Client-Data, x-sdm-id-token, X-Sfdc-Authorization, MIME-Version, Content-Transfer-Encoding, X-Earth-Engine-App-ID-Token, X-Earth-Engine-Computation-Profile, X-Earth-Engine-Computation-Profiling, X-Play-Console-Experiments-Override, X-Play-Console-Session-Id, x-alkali-account-key, x-alkali-application-key, x-alkali-auth-apps-namespace, x-alkali-auth-entities-namespace, x-alkali-auth-entity, x-alkali-client-locale, EES-S7E-MODE, cast-device-capabilities, X-Server-Timeout', 'Access-Control-Allow-Methods': 'GET,OPTIONS', 'Content-Type': 'test/csv', 'Content-Disposition': 'attachment;filename="df_emails.csv";filename*=UTF-8\'\'df_emails.csv', 'Date': 'Tue, 17 Aug 2021 19:27:57 GMT', 'Expires': 'Tue, 17 Aug 2021 19:27:57 GMT', 'Cache-Control': 'private, max-age=0', 'X-Goog-Hash': 'crc32c=KcvnQQ==', 'Content-Length': '62', 'Server': 'UploadServer', 'Alt-Svc': '***"; ma=**...'}
file_size = int(response.headers.get("Content-Length", 0))
file_size
Out[]: 93
content_disposition = response.headers.get("content-disposition")
content_disposition
Out[]: 'attachment;filename="df_emails.csv";filename*=UTF-8\'\'df_emails.csv'
#When it does not work
session = requests.Session()
session
Out[]: <requests.sessions.Session at 0x16b6d381430>
response = session.get(URL, params = {'id': id}, stream=True)
response
Out[]: <Response [403]>
response.url
Out[]: 'https://doc-0c-2c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/2uvue873p1tjsu872vt03liosc91rssf/1629226875000/01179996486350361096/*/1xFI06MKFhWtkZbQM8NCgur5SOlkERq-W?e=download'
response.headers
Out[]: {'Content-Length': '1103', 'Content-Type': 'text/html; charset=UTF-8', 'Date': 'Tue, 17 Aug 2021 19:02:25 GMT', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; ma=2592000; v="46,43"'}
file_size = int(response.headers.get("Content-Length", 0))
file_size
Out[]: 1103
content_disposition = response.headers.get("content-disposition")
content_disposition
#Nothing out
有谁知道如果我不做任何不同的事情,为什么会得到不同的响应?
当您从服务获得响应时,最好先检查响应代码,然后您尝试使用您希望在该响应中包含的数据。
如果您查看响应 objects,您会发现有效时为 <Response [200]>
,无效时为 <Response [403]>
。
403
表示“禁止”。服务器没有 return 给你你期望的数据,这就是为什么没有 content-disposition
header,你的正则表达式失败。
我用 Python 和 Streamlit 制作了一个应用程序,并添加了驱动器 API。我拥有在官方 Google 页面上找到的所有代码,一开始它可以工作。
我在 google 驱动器上有一个 .csv,因为我无法在 Heroku 中保存文件,所以我将它保存在驱动器中,然后每次我在应用程序中需要它时下载它。 起初下载代码有效,.csv 被正确下载,但在一些上传和下载之后,下载代码显示此错误
TypeError: expected string or bytes-like object
Traceback:
File "D:\Users\***\drive.py", line 121, in save_response_content
filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
File "C:\Python\lib\re.py", line 241, in findall
return _compile(pattern, flags).findall(string)
如果我再次 运行 streamlit 然后它再次工作。但是在一些后续的上传和下载之后它又坏了。如果我检查驱动器,则文件已正确保存。我找不到问题。
这是代码:
import os
import re
import io
import pickle
# Gmail API utils
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
import requests
from tqdm import tqdm
SCOPES = ['https://www.googleapis.com/auth/drive.metadata',
'https://www.googleapis.com/auth/drive.metadata.readonly',
'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.file'
]
def drive_authenticate():
creds = None
# the file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first time
if os.path.exists("token_drive.pickle"):
with open("token_drive.pickle", "rb") as token:
creds = pickle.load(token)
# if there are no (valid) credentials availablle, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('token.json', SCOPES)
creds = flow.run_local_server(port=0)
# save the credentials for the next run
with open("token_drive.pickle", "wb") as token:
pickle.dump(creds, token)
return build('drive', 'v3', credentials=creds)
# get the Gmail API service
service = drive_authenticate()
def delete_file(filename):
search_result = search(query=f"name='{filename}'")
for i in range(len(search_result)):
print('deleted: ' + search_result[i][0])
file_id = search_result[i][0]
file = service.files().delete(fileId=file_id).execute()
def create_folder(folder_name):
"""
Creates a folder and upload a file to it
"""
# folder details we want to make
folder_metadata = {
"name": folder_name,
"mimeType": "application/vnd.google-apps.folder"
}
# create the folder
file = service.files().create(body=folder_metadata, fields="id").execute()
def upload_file (file_name, file_mimetype): #csv is text/csv
file_metadata = {'name': file_name}
media = MediaFileUpload(file_name, mimetype= file_mimetype)
file = service.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
def search(query): #example "mimeType='image/jpeg'" .. or .. "name='token.json'"
result = []
page_token = None
while True:
response = service.files().list(q=query,
spaces='drive',
fields='nextPageToken, files(id, name)',
pageToken=page_token).execute()
for file in response.get('files', []):
# Process change
print ('Found file: %s (%s)' % (file.get('name'), file.get('id')))
result.append((file.get('id'), file.get('name')))
page_token = response.get('nextPageToken', None)
if page_token is None:
break
return result
def download(filename):
# search for the file by name
search_result = search(query=f"name='{filename}'")
# get the GDrive ID of the file
file_id = search_result[0][0]
# make it shareable
service.permissions().create(body={"role": "reader", "type": "anyone"}, fileId=file_id).execute()
# download file
download_file_from_google_drive(file_id, filename)
def download_file_from_google_drive(id, destination):
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
# get the file size from Content-length response header
file_size = int(response.headers.get("Content-Length", 0))
# extract Content disposition from response headers
content_disposition = response.headers.get("content-disposition")
# parse filename
filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
print("[+] File size:", file_size)
print("[+] File name:", filename)
progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
with open(destination, "wb") as f:
for chunk in progress:
if chunk: # filter out keep-alive new chunks
f.write(chunk)
# update the progress bar
progress.update(len(chunk))
progress.close()
# base URL for download
URL = "https://docs.google.com/uc?export=download"
# init a HTTP session
session = requests.Session()
# make a request
response = session.get(URL, params = {'id': id}, stream=True)
print("[+] Downloading", response.url)
# get confirmation token
token = get_confirm_token(response)
if token:
params = {'id': id, 'confirm':token}
response = session.get(URL, params=params, stream=True)
# download to disk
save_response_content(response, destination)
似乎下载连接有时会中断,之后您只能等待。
当我查看 pycharm 中的整个代码时,除了这部分代码外,其他所有内容都显示相同:
#When it works
session = requests.Session()
session
Out[]: <requests.sessions.Session at 0x1efb3332880>
response = session.get(URL, params = {'id': id}, stream=True)
response
Out[]: <Response [200]>
response.url
Out[]: 'https://doc-0c-2c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/68igeel9a4bfatoar25e8t8qm0g67tfi/1629226275000/01179996486350361096/*/1xFI06MKFhWtkZbQM8NCgur5SOlkERq-W?e=download'
response.headers
Out[]: {'X-GUploader-UploadID': '***', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'false', 'Access-Control-Allow-Headers': 'Accept, Accept-Language, Authorization, Cache-Control, Content-Disposition, Content-Encoding, Content-Language, Content-Length, Content-MD5, Content-Range, Content-Type, Date, X-Goog-Sn-Metadata, X-Goog-Sn-PatientId, GData-Version, google-cloud-resource-prefix, x-goog-request-params, Host, If-Match, If-Modified-Since, If-None-Match, If-Unmodified-Since, Origin, OriginToken, Pragma, Range, Slug, Transfer-Encoding, hotrod-board-name, hotrod-chrome-cpu-model, hotrod-chrome-processors, Want-Digest, x-chrome-connected, X-ClientDetails, X-Client-Version, X-Firebase-Locale, X-Goog-Firebase-Installations-Auth, X-Firebase-Client, X-Firebase-Client-Log-Type, X-Firebase-GMPID, X-Firebase-Auth-Token, X-Goog-Drive-Client-Version, X-Goog-Drive-Resource-Keys, X-GData-Client, X-GData-Key, X-GoogApps-Allowed-Domains, X-Goog-AdX-Buyer-Impersonation, X-Goog-Api-Client, X-Goog-AuthUser, ***, ***, ***, X-Goog-PageId, X-Goog-Encode-Response-If-Executable, X-Goog-Correlation-Id, X-Goog-Request-Info, X-Goog-Request-Reason, X-Goog-Experiments, x-goog-iam-authority-selector, x-goog-iam-authorization-token, X-Goog-Spatula, X-Goog-Travel-Bgr, X-Goog-Travel-Settings, X-Goog-Upload-Command, X-Goog-Upload-Content-Disposition, X-Goog-Upload-Content-Length, X-Goog-Upload-Content-Type, X-Goog-Upload-File-Name, X-Goog-Upload-Header-Content-Encoding, X-Goog-Upload-Header-Content-Length, X-Goog-Upload-Header-Content-Type, X-Goog-Upload-Header-Transfer-Encoding, X-Goog-Upload-Offset, X-Goog-Upload-Protocol, x-goog-user-project, X-Goog-Visitor-Id, X-Goog-FieldMask, X-Google-Project-Override, X-Goog-Api-Key, X-HTTP-Method-Override, X-JavaScript-User-Agent, X-Pan-Versionid, X-Proxied-User-IP, X-Origin, X-Referer, X-Requested-With, X-Stadia-Client-Context, X-Upload-Content-Length, X-Upload-Content-Type, X-Use-HTTP-Status-Code-Override, X-Ios-Bundle-Identifier, X-Android-Package, X-Ariane-Xsrf-Token, X-YouTube-VVT, X-YouTube-Page-CL, X-YouTube-Page-Timestamp, X-Compass-Routing-Destination, X-Goog-Meeting-ABR, X-Goog-Meeting-Botguardid, X-Goog-Meeting-ClientInfo, X-Goog-Meeting-ClientVersion, X-Goog-Meeting-Debugid, X-Goog-Meeting-Identifier, X-Goog-Meeting-RtcClient, X-Goog-Meeting-StartSource, X-Goog-Meeting-Token, X-Client-Data, x-sdm-id-token, X-Sfdc-Authorization, MIME-Version, Content-Transfer-Encoding, X-Earth-Engine-App-ID-Token, X-Earth-Engine-Computation-Profile, X-Earth-Engine-Computation-Profiling, X-Play-Console-Experiments-Override, X-Play-Console-Session-Id, x-alkali-account-key, x-alkali-application-key, x-alkali-auth-apps-namespace, x-alkali-auth-entities-namespace, x-alkali-auth-entity, x-alkali-client-locale, EES-S7E-MODE, cast-device-capabilities, X-Server-Timeout', 'Access-Control-Allow-Methods': 'GET,OPTIONS', 'Content-Type': 'test/csv', 'Content-Disposition': 'attachment;filename="df_emails.csv";filename*=UTF-8\'\'df_emails.csv', 'Date': 'Tue, 17 Aug 2021 19:27:57 GMT', 'Expires': 'Tue, 17 Aug 2021 19:27:57 GMT', 'Cache-Control': 'private, max-age=0', 'X-Goog-Hash': 'crc32c=KcvnQQ==', 'Content-Length': '62', 'Server': 'UploadServer', 'Alt-Svc': '***"; ma=**...'}
file_size = int(response.headers.get("Content-Length", 0))
file_size
Out[]: 93
content_disposition = response.headers.get("content-disposition")
content_disposition
Out[]: 'attachment;filename="df_emails.csv";filename*=UTF-8\'\'df_emails.csv'
#When it does not work
session = requests.Session()
session
Out[]: <requests.sessions.Session at 0x16b6d381430>
response = session.get(URL, params = {'id': id}, stream=True)
response
Out[]: <Response [403]>
response.url
Out[]: 'https://doc-0c-2c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/2uvue873p1tjsu872vt03liosc91rssf/1629226875000/01179996486350361096/*/1xFI06MKFhWtkZbQM8NCgur5SOlkERq-W?e=download'
response.headers
Out[]: {'Content-Length': '1103', 'Content-Type': 'text/html; charset=UTF-8', 'Date': 'Tue, 17 Aug 2021 19:02:25 GMT', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000,h3-T051=":443"; ma=2592000,h3-Q050=":443"; ma=2592000,h3-Q046=":443"; ma=2592000,h3-Q043=":443"; ma=2592000,quic=":443"; ma=2592000; v="46,43"'}
file_size = int(response.headers.get("Content-Length", 0))
file_size
Out[]: 1103
content_disposition = response.headers.get("content-disposition")
content_disposition
#Nothing out
有谁知道如果我不做任何不同的事情,为什么会得到不同的响应?
当您从服务获得响应时,最好先检查响应代码,然后您尝试使用您希望在该响应中包含的数据。
如果您查看响应 objects,您会发现有效时为 <Response [200]>
,无效时为 <Response [403]>
。
403
表示“禁止”。服务器没有 return 给你你期望的数据,这就是为什么没有 content-disposition
header,你的正则表达式失败。