Python 内存错误保存拆分 csv python
Python memory error Save split csv python
我通过 python 从 salesforce 获得了潜在客户对象的所有数据,并通过 csv 保存。
但是因为有很多信息我得到 python memory error
.
**This code get python memory error code**
from simple_salesforce import Salesforce
from datetime import datetime
import csv
import os
import json
import account
SALESFORCE_USERNAME = '123'
PASSWORD = '123'
SECURITY_TOKEN = '123'
def main():
# Authentication settings
sf = Salesforce(username=SALESFORCE_USERNAME,
password=PASSWORD,
security_token=SECURITY_TOKEN)
# Lead Column setting to be acquired
columns = [
"CreatedDate"
]
sosl = 'SELECT {0[0]} FROM Lead'.format(
columns)
# Data acquisition with SOSL
data = sf.query_all(sosl)
# Delete CSV file if it exists
output_csv = 'output.csv'
if os.path.exists(output_csv):
os.remove(output_csv)
# Write to CSV file
for k, v in data.items():
if type(v) is list:
with open(output_csv, 'w', newline="") as f:
writer = csv.DictWriter(f, fieldnames=columns)
writer.writeheader()
for d in v:
data = json.loads(json.dumps(d))
del data['attributes']
writer.writerow(data)
if __name__ == '__main__':
main()
这就是为什么我想要的 csv 中有超过 1000 行的原因
csv记录如下
1 output1.csv (1000 row)
2 output2.csv (1000 row)
3 output3.csv ......
我收到以下错误,我需要做什么才能通过这种方式退出?
我想拆分 cvs 并放入 open csv
iterator = True, chunk size = 1000
代码
from simple_salesforce import Salesforce
from datetime import datetime
import csv
import os
import json
import account
SALESFORCE_USERNAME = '123'
PASSWORD = '123'
SECURITY_TOKEN = '123'
def main():
# Authentication settings
sf = Salesforce(username=SALESFORCE_USERNAME,
password=PASSWORD,
security_token=SECURITY_TOKEN)
# Lead Column setting to be acquired
columns = [
"CreatedDate"
]
sosl = 'SELECT {0[0]} FROM Lead'.format(
columns)
# Data acquisition with SOSL
data = sf.query_all(sosl)
# Delete CSV file if it exists
output_csv = 'output.csv'
if os.path.exists(output_csv):
os.remove(output_csv)
# Write to CSV file
for k, v in data.items():
if type(v) is list:
with open(output_csv, 'w', newline="",iterator=True,chunksize=1000) as f:
writer = csv.DictWriter(f, fieldnames=columns)
writer.writeheader()
for d in v:
data = json.loads(json.dumps(d))
del data['attributes']
writer.writerow(data)
if __name__ == '__main__':
main()
错误信息
Traceback (most recent call last):
File "c:/Users/test/Documents/test/test5.py", line 44, in <module>
main()
File "c:/Users/test/Documents//test5.py", line 36, in main
with open(output_csv, 'w', newline="",iterator=True,chunksize=1000) as f:
TypeError: 'iterator' is an invalid keyword argument for open()
如果有其他方法可以教我,我认为我不会得到 python 错误?
如果有人知道,请告诉我。
data = sf.query_all(sosl)
此调用将所有信息检索到给定查询的内存中,该查询是 SOQL,而不是 SOSL。
改为使用
data = sf.query_all_iter(sosl)
并迭代生成的迭代器而不是 data.items()
,这将更加节省内存,因为它不会尝试一次检索所有项目。
我通过 python 从 salesforce 获得了潜在客户对象的所有数据,并通过 csv 保存。
但是因为有很多信息我得到 python memory error
.
**This code get python memory error code**
from simple_salesforce import Salesforce
from datetime import datetime
import csv
import os
import json
import account
SALESFORCE_USERNAME = '123'
PASSWORD = '123'
SECURITY_TOKEN = '123'
def main():
# Authentication settings
sf = Salesforce(username=SALESFORCE_USERNAME,
password=PASSWORD,
security_token=SECURITY_TOKEN)
# Lead Column setting to be acquired
columns = [
"CreatedDate"
]
sosl = 'SELECT {0[0]} FROM Lead'.format(
columns)
# Data acquisition with SOSL
data = sf.query_all(sosl)
# Delete CSV file if it exists
output_csv = 'output.csv'
if os.path.exists(output_csv):
os.remove(output_csv)
# Write to CSV file
for k, v in data.items():
if type(v) is list:
with open(output_csv, 'w', newline="") as f:
writer = csv.DictWriter(f, fieldnames=columns)
writer.writeheader()
for d in v:
data = json.loads(json.dumps(d))
del data['attributes']
writer.writerow(data)
if __name__ == '__main__':
main()
这就是为什么我想要的 csv 中有超过 1000 行的原因 csv记录如下
1 output1.csv (1000 row)
2 output2.csv (1000 row)
3 output3.csv ......
我收到以下错误,我需要做什么才能通过这种方式退出?
我想拆分 cvs 并放入 open csv
iterator = True, chunk size = 1000
代码
from simple_salesforce import Salesforce
from datetime import datetime
import csv
import os
import json
import account
SALESFORCE_USERNAME = '123'
PASSWORD = '123'
SECURITY_TOKEN = '123'
def main():
# Authentication settings
sf = Salesforce(username=SALESFORCE_USERNAME,
password=PASSWORD,
security_token=SECURITY_TOKEN)
# Lead Column setting to be acquired
columns = [
"CreatedDate"
]
sosl = 'SELECT {0[0]} FROM Lead'.format(
columns)
# Data acquisition with SOSL
data = sf.query_all(sosl)
# Delete CSV file if it exists
output_csv = 'output.csv'
if os.path.exists(output_csv):
os.remove(output_csv)
# Write to CSV file
for k, v in data.items():
if type(v) is list:
with open(output_csv, 'w', newline="",iterator=True,chunksize=1000) as f:
writer = csv.DictWriter(f, fieldnames=columns)
writer.writeheader()
for d in v:
data = json.loads(json.dumps(d))
del data['attributes']
writer.writerow(data)
if __name__ == '__main__':
main()
错误信息
Traceback (most recent call last):
File "c:/Users/test/Documents/test/test5.py", line 44, in <module>
main()
File "c:/Users/test/Documents//test5.py", line 36, in main
with open(output_csv, 'w', newline="",iterator=True,chunksize=1000) as f:
TypeError: 'iterator' is an invalid keyword argument for open()
如果有其他方法可以教我,我认为我不会得到 python 错误?
如果有人知道,请告诉我。
data = sf.query_all(sosl)
此调用将所有信息检索到给定查询的内存中,该查询是 SOQL,而不是 SOSL。
改为使用
data = sf.query_all_iter(sosl)
并迭代生成的迭代器而不是 data.items()
,这将更加节省内存,因为它不会尝试一次检索所有项目。