由于溢出无法读取 botocore.response.StreamingBody
Unable to read botocore.response.StreamingBody due to overflow
我正在尝试从 S3 获取文件并将其读取到 python。该对象返回为 botocore.response.Streamingbody
。通常可以使用strmingbody.read()
的方式读取。但是当我尝试使用 read 时,它会抛出 Overflowerror: Python int too large to convert to C long
。
互联网上所有其他可用的解决方案都建议将 int
转换为 int64
或 float64
。但是由于这个错误,我一开始就不能使用 .read()
。我们甚至尝试腌制 csv 并发送,但这也不起作用。
import boto3
import pandas as pd
def get_cx_data():
""" Get cx data
Returns:
Pandas DataFrame: CX index DataFrame
"""
client = boto3.client('s3',
aws_access_key_id = 'key_id_here',
aws_secret_access_key = 'secret_key_here',
region_name = 'us-east-2')
obj = client.get_object(
Bucket = 'bucket name',
Key = 'key_here')
print(type(obj))
print(obj['Body'])
file_ = obj['Body'].read() #throws_overflowerror
with open('training_data.csv', 'w') as file:
file.write(obj['Body'].read()) #throws_overflowerror
# combine_inde_dep_vars_featools.pkl
# Read data from the S3 object
#data = pandas.read_csv(obj['Body'])
# df_cx_index = pd.read_pickle("combine_inde_dep_vars_featools.pkl")
df_cx_index = pd.read_csv(io.BytesIO(obj['Body'].read())) #throws_overflowerror
print(df_cx_index.head())
return df_cx_index
回溯如下
<class 'dict'>
<botocore.response.StreamingBody object at 0x0000027EB0533A60>
Traceback (most recent call last):
File "C:/my_folder/git repos/collections_completed_checklist_items/save_csv.py", line 22, in <module>
get_cx_data()
File "C:/my_folder/git repos/collections_completed_checklist_items/save_csv.py", line 18, in get_cx_data
file_ = obj['Body'].read()
File "C:\CX_codes\environments\collections_completed_checklist_items\lib\site-packages\botocore\response.py", line 77, in read
chunk = self._raw_stream.read(amt)
File "C:\CX_codes\environments\collections_completed_checklist_items\lib\site-packages\urllib3\response.py", line 515, in read
data = self._fp.read() if not fp_closed else b""
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 468, in read
s = self._safe_read(self.length)
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 609, in _safe_read
data = self.fp.read(amt)
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\socket.py", line 669, in readinto
return self._sock.recv_into(b)
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\ssl.py", line 1241, in recv_into
return self.read(nbytes, buffer)
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\ssl.py", line 1099, in read
return self._sslobj.read(len, buffer)
OverflowError: Python int too large to convert to C long
有没有其他方法可以在不使用 .read()
的情况下将 botocore.response.StreamingBody
对象读取或保存为 csv。或者是否有任何变通方法可以使用 .read()
而不会得到 OverflowError
?
df = pd.read_csv('s3://path_to_file/training_data.csv')
应该可以。如果没有安装 s3fs。并确保您的凭据可访问。
如果不行,试试
import boto3
import pandas as pd
from io import StringIO
s3_root_bucket = 'the_main_bucket_you_start_in'
s3_path_to_file = 'the rest of the path from there to the csv file including the csv filename'
s3_client = boto3.client('s3') #add credentials if necessary
csv_object = s3_client.get_object(Bucket = s3_root_bucket, Key = s3_path_to_file)
csv_string = csv_object['Body'].read().decode('utf-8')
df = pd.read_csv(StringIO(csv_string))
我正在尝试从 S3 获取文件并将其读取到 python。该对象返回为 botocore.response.Streamingbody
。通常可以使用strmingbody.read()
的方式读取。但是当我尝试使用 read 时,它会抛出 Overflowerror: Python int too large to convert to C long
。
互联网上所有其他可用的解决方案都建议将 int
转换为 int64
或 float64
。但是由于这个错误,我一开始就不能使用 .read()
。我们甚至尝试腌制 csv 并发送,但这也不起作用。
import boto3
import pandas as pd
def get_cx_data():
""" Get cx data
Returns:
Pandas DataFrame: CX index DataFrame
"""
client = boto3.client('s3',
aws_access_key_id = 'key_id_here',
aws_secret_access_key = 'secret_key_here',
region_name = 'us-east-2')
obj = client.get_object(
Bucket = 'bucket name',
Key = 'key_here')
print(type(obj))
print(obj['Body'])
file_ = obj['Body'].read() #throws_overflowerror
with open('training_data.csv', 'w') as file:
file.write(obj['Body'].read()) #throws_overflowerror
# combine_inde_dep_vars_featools.pkl
# Read data from the S3 object
#data = pandas.read_csv(obj['Body'])
# df_cx_index = pd.read_pickle("combine_inde_dep_vars_featools.pkl")
df_cx_index = pd.read_csv(io.BytesIO(obj['Body'].read())) #throws_overflowerror
print(df_cx_index.head())
return df_cx_index
回溯如下
<class 'dict'>
<botocore.response.StreamingBody object at 0x0000027EB0533A60>
Traceback (most recent call last):
File "C:/my_folder/git repos/collections_completed_checklist_items/save_csv.py", line 22, in <module>
get_cx_data()
File "C:/my_folder/git repos/collections_completed_checklist_items/save_csv.py", line 18, in get_cx_data
file_ = obj['Body'].read()
File "C:\CX_codes\environments\collections_completed_checklist_items\lib\site-packages\botocore\response.py", line 77, in read
chunk = self._raw_stream.read(amt)
File "C:\CX_codes\environments\collections_completed_checklist_items\lib\site-packages\urllib3\response.py", line 515, in read
data = self._fp.read() if not fp_closed else b""
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 468, in read
s = self._safe_read(self.length)
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\http\client.py", line 609, in _safe_read
data = self.fp.read(amt)
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\socket.py", line 669, in readinto
return self._sock.recv_into(b)
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\ssl.py", line 1241, in recv_into
return self.read(nbytes, buffer)
File "C:\Users\a.mundachal\AppData\Local\Programs\Python\Python38\lib\ssl.py", line 1099, in read
return self._sslobj.read(len, buffer)
OverflowError: Python int too large to convert to C long
有没有其他方法可以在不使用 .read()
的情况下将 botocore.response.StreamingBody
对象读取或保存为 csv。或者是否有任何变通方法可以使用 .read()
而不会得到 OverflowError
?
df = pd.read_csv('s3://path_to_file/training_data.csv')
应该可以。如果没有安装 s3fs。并确保您的凭据可访问。
如果不行,试试
import boto3
import pandas as pd
from io import StringIO
s3_root_bucket = 'the_main_bucket_you_start_in'
s3_path_to_file = 'the rest of the path from there to the csv file including the csv filename'
s3_client = boto3.client('s3') #add credentials if necessary
csv_object = s3_client.get_object(Bucket = s3_root_bucket, Key = s3_path_to_file)
csv_string = csv_object['Body'].read().decode('utf-8')
df = pd.read_csv(StringIO(csv_string))