无法将多个 python 数据帧上传到 s3
Unable to upload multiple python dataframes to s3
我正在尝试使用 boto3 将 google 播放控制台报告上传到 s3。当我尝试循环打印数据帧时,下面的代码运行良好,这意味着我成功获取了我需要的文件。
from io import StringIO # python3; python2: BytesIO
import boto3
import os
from google.cloud import storage
import pandas as pd
import io
jsonfile = os.path.join(
os.path.dirname(__file__), 'private_key.json')
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= jsonfile
# getting all file names from bucket
storage_client = storage.Client()
bucket_name = 'pubsite_prod_rev_1223445566778899'
bucket = storage_client.bucket(bucket_name)
#blob = bucket.blob(source_blob_name)
allblobs = storage_client.list_blobs(bucket_name)
# filtering out the files i need. for example: abc/123/game1/201801_channel.csv,abc/123/game1/202110_channel.csv
for blobfile in allblobs:
if "abc/123" in blobfile.name:
if "game1" in blobfile.name:
if "channel.csv" in blobfile.name:
#print(blobfile.name) # successfully getting all file names
source_blob_name = blobfile.name
blob = bucket.blob(source_blob_name)
data = blob.download_as_string()
df = pd.read_csv(io.BytesIO(data),encoding='utf-16')
print(df) # successfully printing dataframes for all of the files
#upload files to s3
bucket = 'the-knights-iaps-raw' # already created on S3
csv_buffer = StringIO()
df.to_csv(csv_buffer)
s3_resource = boto3.resource('s3', aws_access_key_id='JE4WNFJCN24JNJN13FC',aws_secret_access_key = 'jdsafjlhsafj34j32n4tj23nZ')
fileNamefors3 = source_blob_name.split("/", 2)
s3_resource.Object(bucket, "Acquisition/Buyers7d/StickmanAdventureGame/Channel/"+fileNamefors3[2]).put(Body=csv_buffer.getvalue())
但是将所有这些数据帧上传到 s3 导致错误:
File "C:\Users\USER\PycharmProjects\Gamexis_gpc\cvcv.py", line 28,
in
blob = bucket.blob(source_blob_name) AttributeError: 'str' object has no attribute 'blob'
我不是 python 专业人士,但如果有人能提供帮助,那就太好了。
您在此处创建存储桶:
bucket = storage_client.bucket(bucket_name)
.. 但是稍后在 for 循环中你覆盖了那个变量:
bucket = 'the-knights-iaps-raw'
为字符串使用不同的变量名。
我正在尝试使用 boto3 将 google 播放控制台报告上传到 s3。当我尝试循环打印数据帧时,下面的代码运行良好,这意味着我成功获取了我需要的文件。
from io import StringIO # python3; python2: BytesIO
import boto3
import os
from google.cloud import storage
import pandas as pd
import io
jsonfile = os.path.join(
os.path.dirname(__file__), 'private_key.json')
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= jsonfile
# getting all file names from bucket
storage_client = storage.Client()
bucket_name = 'pubsite_prod_rev_1223445566778899'
bucket = storage_client.bucket(bucket_name)
#blob = bucket.blob(source_blob_name)
allblobs = storage_client.list_blobs(bucket_name)
# filtering out the files i need. for example: abc/123/game1/201801_channel.csv,abc/123/game1/202110_channel.csv
for blobfile in allblobs:
if "abc/123" in blobfile.name:
if "game1" in blobfile.name:
if "channel.csv" in blobfile.name:
#print(blobfile.name) # successfully getting all file names
source_blob_name = blobfile.name
blob = bucket.blob(source_blob_name)
data = blob.download_as_string()
df = pd.read_csv(io.BytesIO(data),encoding='utf-16')
print(df) # successfully printing dataframes for all of the files
#upload files to s3
bucket = 'the-knights-iaps-raw' # already created on S3
csv_buffer = StringIO()
df.to_csv(csv_buffer)
s3_resource = boto3.resource('s3', aws_access_key_id='JE4WNFJCN24JNJN13FC',aws_secret_access_key = 'jdsafjlhsafj34j32n4tj23nZ')
fileNamefors3 = source_blob_name.split("/", 2)
s3_resource.Object(bucket, "Acquisition/Buyers7d/StickmanAdventureGame/Channel/"+fileNamefors3[2]).put(Body=csv_buffer.getvalue())
但是将所有这些数据帧上传到 s3 导致错误:
File "C:\Users\USER\PycharmProjects\Gamexis_gpc\cvcv.py", line 28, in blob = bucket.blob(source_blob_name) AttributeError: 'str' object has no attribute 'blob'
我不是 python 专业人士,但如果有人能提供帮助,那就太好了。
您在此处创建存储桶:
bucket = storage_client.bucket(bucket_name)
.. 但是稍后在 for 循环中你覆盖了那个变量:
bucket = 'the-knights-iaps-raw'
为字符串使用不同的变量名。