使用 PyDrive 访问文件夹、子文件夹和子文件 (Python)
Accessing folders, subfolders and subfiles using PyDrive (Python)
我有 PyDrive 文档中的以下代码,允许访问我的 Google 驱动器中的顶级文件夹。我想访问其中的所有文件夹、子文件夹和文件。我该怎么做(我刚开始使用 PyDrive)?
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth() # Creates local webserver and auto handles authentication
#Make GoogleDrive instance with Authenticated GoogleAuth instance
drive = GoogleDrive(gauth)
#Google_Drive_Tree =
# Auto-iterate through all files that matches this query
top_list = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList()
for file in top_list:
print 'title: %s, id: %s' % (file['title'], file['id'])
print "---------------------------------------------"
#Paginate file lists by specifying number of max results
for file_list in drive.ListFile({'q': 'trashed=true', 'maxResults': 10}):
print 'Received %s files from Files.list()' % len(file_list) # <= 10
for file1 in file_list:
print 'title: %s, id: %s' % (file1['title'], file1['id'])
我查看了下页 How to list all files, folders, subfolders and subfiles of a Google drive folder ,这似乎是我正在寻找的答案,但代码已经不存在了。
它需要迭代文件列表。基于 this,代码获取文件夹中每个文件的文件标题和 url link。通过提供文件夹的 id
,例如 ListFolder('id')
,可以调整代码以获取特定文件夹。下面给出的示例正在查询 root
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth() # Creates local webserver and auto handles authentication
#Make GoogleDrive instance with Authenticated GoogleAuth instance
drive = GoogleDrive(gauth)
def ListFolder(parent):
filelist=[]
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % parent}).GetList()
for f in file_list:
if f['mimeType']=='application/vnd.google-apps.folder': # if folder
filelist.append({"id":f['id'],"title":f['title'],"list":ListFolder(f['id'])})
else:
filelist.append({"title":f['title'],"title1":f['alternateLink']})
return filelist
ListFolder('root')
您的代码完全正确。但是使用 Pydrive 的默认设置,您只能访问根级别的文件和文件夹。
更改 settings.yaml 文件中的 oauth_scope 可修复此问题。
client_config_backend: settings
client_config:
client_id: XXX
client_secret: XXXX
save_credentials: True
save_credentials_backend: file
save_credentials_file: credentials.json
get_refresh_token: True
oauth_scope:
- https://www.googleapis.com/auth/drive
- https://www.googleapis.com/auth/drive.metadata
这是我对获取子文件夹中的所有文件的看法...
这使您可以按您设置的路径进行查询。这是不同的,因为它不会为每个文件夹发出 1 个请求。它创建了一批要查询的文件夹。
批量代码段:
'some_id_1234' in parents or 'some_id_1235' in parents or 'some_id_1236' in parents or 'some_id_1237' in parents or 'some_id_1238' in parents or 'some_id_1239' in parents or 'some_id_1240' in parents and trashed=false
您可以一次查询多个文件夹中的文件。您的查询不能太大,所以超过 300 个文件夹('some_id_1234' in parents'),您将开始出现错误,因此将批量大小保持在 250 左右。
假设您要检查的文件夹有 1,110 个文件夹,并且您将批量大小设置为 250。
然后它将发出 5 个单独的请求来查询所有文件夹。
-请求1查询250个文件夹
-请求2查询250个文件夹
-请求3查询250个文件夹
-请求4次查询250个文件夹
-请求5次查询110个文件夹
然后其中的任何子文件夹将被创建成批次并递归查询。
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
def parse_gdrive_path(gd_path):
if ':' in gd_path:
gd_path = gd_path.split(':')[1]
gd_path = gd_path.replace('\', '/').replace('//', '/')
if gd_path.startswith('/'):
gd_path = gd_path[1:]
if gd_path.endswith('/'):
gd_path = gd_path[:-1]
return gd_path.split('/')
def resolve_path_to_id(folder_path):
_id = 'root'
folder_path = parse_gdrive_path(folder_path)
for idx, folder in enumerate(folder_path):
folder_list = gdrive.ListFile({'q': f"'{_id}' in parents and title='{folder}' and trashed=false and mimeType='application/vnd.google-apps.folder'", 'fields': 'items(id, title, mimeType)'}).GetList()
_id = folder_list[0]['id']
title = folder_list[0]['title']
if idx == (len(folder_path) - 1) and folder == title:
return _id
return _id
def get_folder_files(folder_ids, batch_size=100):
base_query = "'{target_id}' in parents"
target_queries = []
query = ''
for idx, folder_id in enumerate(folder_ids):
query += base_query.format(target_id=folder_id)
if len(folder_ids) == 1 or idx > 0 and idx % batch_size == 0:
target_queries.append(query)
query = ''
elif idx != len(folder_ids)-1:
query += " or "
else:
target_queries.append(query)
for query in target_queries:
for f in gdrive.ListFile({'q': f"{query} and trashed=false", 'fields': 'items(id, title, mimeType, version)'}).GetList():
yield f
def get_files(folder_path=None, target_ids=None, files=[]):
if target_ids is None:
target_ids = [resolve_path_to_id(folder_path)]
file_list = get_folder_files(folder_ids=target_ids, batch_size=250)
subfolder_ids = []
for f in file_list:
if f['mimeType'] == 'application/vnd.google-apps.folder':
subfolder_ids.append(f['id'])
else:
files.append(f['title'])
if len(subfolder_ids) > 0:
get_files(target_ids=subfolder_ids)
return files
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
gdrive = GoogleDrive(gauth)
file_list = get_files('/Some/Folder/Path')
for f in file_list:
print(f)
例如:
您的 google 驱动器包含:
(folder) Root
(folder) Docs
(subfolder) Notes
(subfolder) School
(file) notes_1.txt
(file) notes_2.txt
(file) notes_3.txt
(file) notes_4.txt
(file) notes_5.txt
(subfolder) Important
(file) important_notes_1.txt
(file) important_notes_2.txt
(file) important_notes_3.txt
(subfolder) Old Notes
(file) old_1.txt
(file) old_2.txt
(file) old_3.txt
(subfolder) Secrets
(file) secret_1.txt
(file) secret_2.txt
(file) secret_3.txt
(folder) Stuff
(file) nothing.txt
(file) this-will-not-be-found.txt
并且您想从“笔记”中获取所有文件folder/subfolders
你会做:
file_list = get_files('/Docs/Notes')
for f in file_list:
print(f)
Output:
>> notes_1.txt
>> notes_2.txt
>> notes_3.txt
>> notes_4.txt
>> notes_5.txt
>> important_notes_1.txt
>> important_notes_2.txt
>> important_notes_3.txt
>> old_1.txt
>> old_2.txt
>> old_3.txt
>> secret_1.txt
>> secret_2.txt
>> secret_3.txt
希望这对某人有所帮助:)
我有 PyDrive 文档中的以下代码,允许访问我的 Google 驱动器中的顶级文件夹。我想访问其中的所有文件夹、子文件夹和文件。我该怎么做(我刚开始使用 PyDrive)?
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth() # Creates local webserver and auto handles authentication
#Make GoogleDrive instance with Authenticated GoogleAuth instance
drive = GoogleDrive(gauth)
#Google_Drive_Tree =
# Auto-iterate through all files that matches this query
top_list = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList()
for file in top_list:
print 'title: %s, id: %s' % (file['title'], file['id'])
print "---------------------------------------------"
#Paginate file lists by specifying number of max results
for file_list in drive.ListFile({'q': 'trashed=true', 'maxResults': 10}):
print 'Received %s files from Files.list()' % len(file_list) # <= 10
for file1 in file_list:
print 'title: %s, id: %s' % (file1['title'], file1['id'])
我查看了下页 How to list all files, folders, subfolders and subfiles of a Google drive folder ,这似乎是我正在寻找的答案,但代码已经不存在了。
它需要迭代文件列表。基于 this,代码获取文件夹中每个文件的文件标题和 url link。通过提供文件夹的 id
,例如 ListFolder('id')
,可以调整代码以获取特定文件夹。下面给出的示例正在查询 root
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
gauth = GoogleAuth()
gauth.LocalWebserverAuth() # Creates local webserver and auto handles authentication
#Make GoogleDrive instance with Authenticated GoogleAuth instance
drive = GoogleDrive(gauth)
def ListFolder(parent):
filelist=[]
file_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % parent}).GetList()
for f in file_list:
if f['mimeType']=='application/vnd.google-apps.folder': # if folder
filelist.append({"id":f['id'],"title":f['title'],"list":ListFolder(f['id'])})
else:
filelist.append({"title":f['title'],"title1":f['alternateLink']})
return filelist
ListFolder('root')
您的代码完全正确。但是使用 Pydrive 的默认设置,您只能访问根级别的文件和文件夹。 更改 settings.yaml 文件中的 oauth_scope 可修复此问题。
client_config_backend: settings
client_config:
client_id: XXX
client_secret: XXXX
save_credentials: True
save_credentials_backend: file
save_credentials_file: credentials.json
get_refresh_token: True
oauth_scope:
- https://www.googleapis.com/auth/drive
- https://www.googleapis.com/auth/drive.metadata
这是我对获取子文件夹中的所有文件的看法... 这使您可以按您设置的路径进行查询。这是不同的,因为它不会为每个文件夹发出 1 个请求。它创建了一批要查询的文件夹。
批量代码段:
'some_id_1234' in parents or 'some_id_1235' in parents or 'some_id_1236' in parents or 'some_id_1237' in parents or 'some_id_1238' in parents or 'some_id_1239' in parents or 'some_id_1240' in parents and trashed=false
您可以一次查询多个文件夹中的文件。您的查询不能太大,所以超过 300 个文件夹('some_id_1234' in parents'),您将开始出现错误,因此将批量大小保持在 250 左右。
假设您要检查的文件夹有 1,110 个文件夹,并且您将批量大小设置为 250。 然后它将发出 5 个单独的请求来查询所有文件夹。
-请求1查询250个文件夹
-请求2查询250个文件夹
-请求3查询250个文件夹
-请求4次查询250个文件夹
-请求5次查询110个文件夹
然后其中的任何子文件夹将被创建成批次并递归查询。
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
def parse_gdrive_path(gd_path):
if ':' in gd_path:
gd_path = gd_path.split(':')[1]
gd_path = gd_path.replace('\', '/').replace('//', '/')
if gd_path.startswith('/'):
gd_path = gd_path[1:]
if gd_path.endswith('/'):
gd_path = gd_path[:-1]
return gd_path.split('/')
def resolve_path_to_id(folder_path):
_id = 'root'
folder_path = parse_gdrive_path(folder_path)
for idx, folder in enumerate(folder_path):
folder_list = gdrive.ListFile({'q': f"'{_id}' in parents and title='{folder}' and trashed=false and mimeType='application/vnd.google-apps.folder'", 'fields': 'items(id, title, mimeType)'}).GetList()
_id = folder_list[0]['id']
title = folder_list[0]['title']
if idx == (len(folder_path) - 1) and folder == title:
return _id
return _id
def get_folder_files(folder_ids, batch_size=100):
base_query = "'{target_id}' in parents"
target_queries = []
query = ''
for idx, folder_id in enumerate(folder_ids):
query += base_query.format(target_id=folder_id)
if len(folder_ids) == 1 or idx > 0 and idx % batch_size == 0:
target_queries.append(query)
query = ''
elif idx != len(folder_ids)-1:
query += " or "
else:
target_queries.append(query)
for query in target_queries:
for f in gdrive.ListFile({'q': f"{query} and trashed=false", 'fields': 'items(id, title, mimeType, version)'}).GetList():
yield f
def get_files(folder_path=None, target_ids=None, files=[]):
if target_ids is None:
target_ids = [resolve_path_to_id(folder_path)]
file_list = get_folder_files(folder_ids=target_ids, batch_size=250)
subfolder_ids = []
for f in file_list:
if f['mimeType'] == 'application/vnd.google-apps.folder':
subfolder_ids.append(f['id'])
else:
files.append(f['title'])
if len(subfolder_ids) > 0:
get_files(target_ids=subfolder_ids)
return files
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
gdrive = GoogleDrive(gauth)
file_list = get_files('/Some/Folder/Path')
for f in file_list:
print(f)
例如:
您的 google 驱动器包含:
(folder) Root
(folder) Docs
(subfolder) Notes
(subfolder) School
(file) notes_1.txt
(file) notes_2.txt
(file) notes_3.txt
(file) notes_4.txt
(file) notes_5.txt
(subfolder) Important
(file) important_notes_1.txt
(file) important_notes_2.txt
(file) important_notes_3.txt
(subfolder) Old Notes
(file) old_1.txt
(file) old_2.txt
(file) old_3.txt
(subfolder) Secrets
(file) secret_1.txt
(file) secret_2.txt
(file) secret_3.txt
(folder) Stuff
(file) nothing.txt
(file) this-will-not-be-found.txt
并且您想从“笔记”中获取所有文件folder/subfolders
你会做:
file_list = get_files('/Docs/Notes')
for f in file_list:
print(f)
Output:
>> notes_1.txt
>> notes_2.txt
>> notes_3.txt
>> notes_4.txt
>> notes_5.txt
>> important_notes_1.txt
>> important_notes_2.txt
>> important_notes_3.txt
>> old_1.txt
>> old_2.txt
>> old_3.txt
>> secret_1.txt
>> secret_2.txt
>> secret_3.txt
希望这对某人有所帮助:)