Pygsheets:无法使用 Google 服务帐户将超过 200 个文件上传到云端硬盘。得到 403 错误
Pygsheets: cannot upload more than 200-ish files to Drive with Google Service Account. Get 403 Error
我正在使用 pygsheets 来处理 Google 电子表格和云端硬盘。我需要上传超过10000个文件到公司Drive,但是每次我使用服务账户时,我在上传200个文件后就报错。报错如下:
{
"error": {
"errors": [
{
"domain": "usageLimits",
"reason": "dailyLimitExceededUnreg",
"message": "Daily Limit for Unauthenticated Use Exceeded. Continued use requires signup.",
"extendedHelp": "https://code.google.com/apis/console"
}
],
"code": 403,
"message": "Daily Limit for Unauthenticated Use Exceeded. Continued use requires signup."
}
}
我检查了我的云控制台。我不会在任何给定时间内过度使用对请求数量的限制。我在每个请求之间等待 10 秒。
附上相关代码如下:
def upload_cluster(sheets, cluster_array):
max_global = 0
max_local = 0
max_traffic = 0
sum_global = 0
sum_local = 0
sum_traffic = 0
keywords = len(cluster_array)
cluster_name = ''
for elem in cluster_array:
if elem.get('Global volume') >= max_global:
cluster_name = elem.get('Keyword')
max_global = elem.get('Global volume')
if elem.get('Volume') > max_local:
max_local = elem.get('Volume')
if elem.get('Traffic potential') > max_traffic:
max_traffic = elem.get('Traffic potential')
sum_global += elem.get('Global volume')
sum_local += elem.get('Volume')
sum_traffic += elem.get('Traffic potential')
book = sheets.create(title=re.sub('\"', '', cluster_name), folder='FOLDER_ID')
link = f'HYPERLINK(\"https://docs.google.com/spreadsheets/d/{book.id}\",\"{cluster_name}\")'
dataframe = pandas.DataFrame(cluster_array)
out_sheet = book.worksheet(property='index', value=0)
out_sheet.set_dataframe(df=dataframe, start='A1', extend=True, copy_head=True, copy_index=False)
cluster_summary = {
'Cluster': link,
'Volume': sum_local,
'Global volume': sum_global,
'Traffic potential': sum_traffic,
'Max volume': max_local,
'Max global volume': max_global,
'Max traffic potential': max_traffic,
'Queries': keywords
}
return cluster_summary
def main():
gsheets = pygsheets.authorize(service_account_file='service-account.json')
for i in range(len(output_keywords) - 1):
if not output_keywords[i].get('Clustered'):
cluster = []
cluster.append(output_keywords[i])
cluster_max = get_vol(output_keywords[i].get('Global volume'))
cluster_urls = output_keywords[i].get('URLs')
output_keywords[i]['Clustered'] = True
print(f'Added to cluster: {cluster[-1]}')
for j in range(len(output_keywords)):
if not output_keywords[j].get('Clustered'):
if len(set(cluster_urls) & set(output_keywords[j].get('URLs'))) >= 4:
cluster.append(output_keywords[j])
output_keywords[j]['Clustered'] = True
print(f'Added to cluster: {cluster[-1]}')
print('Uploading cluster...')
clusters.append(upload_cluster(gsheets, cluster))
sleep(5)
print(f'Uploaded: {clusters[-1]}')
cluster = []
我也试过通过client secret授权,好像没问题,可惜我在文件夹里看不到上传的文件。
服务帐户可以访问云端硬盘文件夹。
经过更多测试后,我注意到脚本在 一个 特定文件上阻塞,因为该文件的名称中包含 '
。使用正则表达式从文件名中删除 '
后,它没有阻塞。很奇怪,但没关系。任何人都对为什么会发生这种情况有任何想法?我以为 Google 驱动器没有命名文件的禁用字符。
我正在使用 pygsheets 来处理 Google 电子表格和云端硬盘。我需要上传超过10000个文件到公司Drive,但是每次我使用服务账户时,我在上传200个文件后就报错。报错如下:
{
"error": {
"errors": [
{
"domain": "usageLimits",
"reason": "dailyLimitExceededUnreg",
"message": "Daily Limit for Unauthenticated Use Exceeded. Continued use requires signup.",
"extendedHelp": "https://code.google.com/apis/console"
}
],
"code": 403,
"message": "Daily Limit for Unauthenticated Use Exceeded. Continued use requires signup."
}
}
我检查了我的云控制台。我不会在任何给定时间内过度使用对请求数量的限制。我在每个请求之间等待 10 秒。
附上相关代码如下:
def upload_cluster(sheets, cluster_array):
max_global = 0
max_local = 0
max_traffic = 0
sum_global = 0
sum_local = 0
sum_traffic = 0
keywords = len(cluster_array)
cluster_name = ''
for elem in cluster_array:
if elem.get('Global volume') >= max_global:
cluster_name = elem.get('Keyword')
max_global = elem.get('Global volume')
if elem.get('Volume') > max_local:
max_local = elem.get('Volume')
if elem.get('Traffic potential') > max_traffic:
max_traffic = elem.get('Traffic potential')
sum_global += elem.get('Global volume')
sum_local += elem.get('Volume')
sum_traffic += elem.get('Traffic potential')
book = sheets.create(title=re.sub('\"', '', cluster_name), folder='FOLDER_ID')
link = f'HYPERLINK(\"https://docs.google.com/spreadsheets/d/{book.id}\",\"{cluster_name}\")'
dataframe = pandas.DataFrame(cluster_array)
out_sheet = book.worksheet(property='index', value=0)
out_sheet.set_dataframe(df=dataframe, start='A1', extend=True, copy_head=True, copy_index=False)
cluster_summary = {
'Cluster': link,
'Volume': sum_local,
'Global volume': sum_global,
'Traffic potential': sum_traffic,
'Max volume': max_local,
'Max global volume': max_global,
'Max traffic potential': max_traffic,
'Queries': keywords
}
return cluster_summary
def main():
gsheets = pygsheets.authorize(service_account_file='service-account.json')
for i in range(len(output_keywords) - 1):
if not output_keywords[i].get('Clustered'):
cluster = []
cluster.append(output_keywords[i])
cluster_max = get_vol(output_keywords[i].get('Global volume'))
cluster_urls = output_keywords[i].get('URLs')
output_keywords[i]['Clustered'] = True
print(f'Added to cluster: {cluster[-1]}')
for j in range(len(output_keywords)):
if not output_keywords[j].get('Clustered'):
if len(set(cluster_urls) & set(output_keywords[j].get('URLs'))) >= 4:
cluster.append(output_keywords[j])
output_keywords[j]['Clustered'] = True
print(f'Added to cluster: {cluster[-1]}')
print('Uploading cluster...')
clusters.append(upload_cluster(gsheets, cluster))
sleep(5)
print(f'Uploaded: {clusters[-1]}')
cluster = []
我也试过通过client secret授权,好像没问题,可惜我在文件夹里看不到上传的文件。
服务帐户可以访问云端硬盘文件夹。
经过更多测试后,我注意到脚本在 一个 特定文件上阻塞,因为该文件的名称中包含 '
。使用正则表达式从文件名中删除 '
后,它没有阻塞。很奇怪,但没关系。任何人都对为什么会发生这种情况有任何想法?我以为 Google 驱动器没有命名文件的禁用字符。