在for循环中将文件上传到gs bucket
Upload files to gs bucket in for loop
在下面的代码中,一个 pdf 文档被拆分并保存在我的本地驱动器中,一旦拆分过程完成,就会进行上传过程。在上传过程中,所有拆分后的文件都会递归上传到gs bucket中。如何转换我的以下代码以将拆分后的文件直接上传到 gs 存储桶,而不是存储在本地然后再上传?我尝试了但没有成功
#!/usr/bin/python3
import PyPDF2
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
import glob
import sys
from google.cloud import storage
inputpdf = PdfFileReader(open(r"ace.pdf", "rb"))
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
with open(r"/home/playground/doc_pages/document-page%s.pdf" % i, "wb") as outputStream:
output.write(outputStream)
def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
if not os.path.isfile(local_file):
continue
remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
storage_client = storage.Client()
buck = storage_client.bucket(bucket)
blob = buck.blob(remote_path)
blob.upload_from_filename(local_file)
print("Uploaded " + local_file + " to gs bucket " + bucket)
upload_local_directory_to_gcs('/home/playground/doc_pages', 'doc_pages', '')
使用临时文件看起来像这样:
#!/usr/bin/python3
import PyPDF2
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
import glob
import sys
from google.cloud import storage
inputpdf = PdfFileReader(open(r"ace.pdf", "rb"))
# create temporal folder
os.makedirs('/tmp/doc_pages')
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
# Write to temporal files
with open(r"/tmp/doc_pages/document-page%s.pdf" % i, "wb") as outputStream:
output.write(outputStream)
def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
if not os.path.isfile(local_file):
continue
remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
storage_client = storage.Client()
buck = storage_client.bucket(bucket)
blob = buck.blob(remote_path)
blob.upload_from_filename(local_file)
print("Uploaded " + local_file + " to gs bucket " + bucket)
upload_local_directory_to_gcs('/tmp/doc_pages', 'doc_pages', '') # Change source
在下面的代码中,一个 pdf 文档被拆分并保存在我的本地驱动器中,一旦拆分过程完成,就会进行上传过程。在上传过程中,所有拆分后的文件都会递归上传到gs bucket中。如何转换我的以下代码以将拆分后的文件直接上传到 gs 存储桶,而不是存储在本地然后再上传?我尝试了但没有成功
#!/usr/bin/python3
import PyPDF2
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
import glob
import sys
from google.cloud import storage
inputpdf = PdfFileReader(open(r"ace.pdf", "rb"))
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
with open(r"/home/playground/doc_pages/document-page%s.pdf" % i, "wb") as outputStream:
output.write(outputStream)
def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
if not os.path.isfile(local_file):
continue
remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
storage_client = storage.Client()
buck = storage_client.bucket(bucket)
blob = buck.blob(remote_path)
blob.upload_from_filename(local_file)
print("Uploaded " + local_file + " to gs bucket " + bucket)
upload_local_directory_to_gcs('/home/playground/doc_pages', 'doc_pages', '')
使用临时文件看起来像这样:
#!/usr/bin/python3
import PyPDF2
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
import glob
import sys
from google.cloud import storage
inputpdf = PdfFileReader(open(r"ace.pdf", "rb"))
# create temporal folder
os.makedirs('/tmp/doc_pages')
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
# Write to temporal files
with open(r"/tmp/doc_pages/document-page%s.pdf" % i, "wb") as outputStream:
output.write(outputStream)
def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
if not os.path.isfile(local_file):
continue
remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
storage_client = storage.Client()
buck = storage_client.bucket(bucket)
blob = buck.blob(remote_path)
blob.upload_from_filename(local_file)
print("Uploaded " + local_file + " to gs bucket " + bucket)
upload_local_directory_to_gcs('/tmp/doc_pages', 'doc_pages', '') # Change source