Python:循环慢
Python: Slow for Loop
执行脚本花费的时间比预期的要多。对于仅 1250 条记录,循环并插入 table 需要 20 多分钟。
请告诉我们这是否正常
下面是从 API(JSON) 中提取的 11 列,并将每一行加载到 table(oracle) 中。
脚本:
auth_values = (user, passwd)
response = requests.get(url, auth=auth_values)
json_data = json.loads(response.text)
for data in json_data['result']:
branchFullName = data['full_name']
branchNum = data['u_branch_id']
branchName = data['u_branch_name']
sysId= data['sys_id']
sys_updated_on = data['sys_updated_on']
sys_created_on = data['sys_created_on']
cursor.execute("INSERT INTO "+PrestageTable+"(BRANCH_FULL_NAME,
BRANCH_NUM, BRANCH_NAME,SYS_ID,SYS_CREATED_ON,SYS_UPDATED_ON) VALUES
(:1, :2, :3, :4, :5, :6)",
(branchFullName,branchNum,branchName,sysId,sys_updated_on,sys_created_on))
con.commit()
有没有办法使用索引,或者有什么建议。
Updated:
insert_data = []
for data in json_data['result']:
branchFullName = data['full_name']
branchNum = data['u_branch_id']
branchName = data['u_branch_name']
sysId= data['sys_id']
sys_updated_on = data['sys_updated_on']
sys_created_on = data['sys_created_on']
insert_data.append(
(branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
)
args_str = ','.join(cur.mogrify("(%s,%s,%s,%s,%s,%s)", x) for x in insert_data)
cursor.execute(f"INSERT INTO {PrestageTable} VALUES " + args_str)
con.commit()
添加了 JSON 文件。
{
"result": [
{
"country": {
"link": "https://xyz.service-now.com/api/now/table/core_country/ebc0713f4",
"value": "9d38b7111b121100763d91eebc0713f4"
},
"parent": "",
"city": "trgtt",
"latitude": "",
"sys_updated_on": "2019-10-04 16:44:21",
"type": "",
"sys_class_name": "u_branch",
"sys_id": "565666",
"u_daily_limit": "0",
"sys_updated_by": "SYffffLVPAU",
"u_branch_name": "tyfhh",
"stock_room": "false",
"street": "KM 66 jhhhj jhg jhhhh - PORTÃO 01 - SALA DG",
"sys_created_on": "2019-07-25 16:26:18",
"contact": "",
"phone_territory": "",
"u_active": "true",
"company": "",
"lat_long_error": "",
"u_branch_id": "vfg",
"state": "",
"consumer": "",
"sys_created_by": "gghhh",
"longitude": "",
"u_region": "REGIONAL 1",
"zip": "",
"u_code": "",
"u_dmm": "false",
"sys_mod_count": "1",
"sys_tags": "",
"time_zone": "",
"full_name": "VCP - yghg",
"fax_phone": "",
"phone": "09-4321-3920",
"u_ddr": "false",
"name": "VCP - gfghj",
"u_cutoff_time": "",
"coordinates_retrieved_on": "",
"u_schedule": "",
"account": "",
"primary": "false"
},
{
"country": {
"link": "https://xyz.service-now.com/api/now/table/core_country/1eebc0713f4",
"value": "fgddddfghh"
},
"parent": "",
"city": "rerdr BRAZ",
"latitude": "",
"sys_updated_on": "2019-10-04 16:44:21",
"type": "",
"sys_class_name": "u_branch",
"sys_id": "5644",
"u_daily_limit": "0",
"sys_updated_by": "gg",
"u_branch_name": "hff BRAS",
"stock_room": "false",
"street": "302B RUA gffg ELIAS",
"sys_created_on": "2019-07-25 16:26:19",
"contact": "",
"phone_territory": "",
"u_active": "true",
"company": "",
"lat_long_error": "",
"u_branch_id": "frg",
"state": "",
"consumer": "",
"sys_created_by": "gtft",
"longitude": "",
"u_region": "REGIO 2",
"zip": "",
"u_code": "",
"u_dmm": "false",
"sys_mod_count": "1",
"sys_tags": "",
"time_zone": "",
"full_name": "WBR - yyjh BRAS",
"fax_phone": "",
"phone": "04-352-160",
"u_ddr": "false",
"name": "dfee - Wdd BRdS",
"u_cutoff_time": "",
"coordinates_retrieved_on": "",
"u_schedule": "",
"account": "",
"primary": "false"
} ]
}
此代码中缺少某些内容。这个循环不能花时间来执行,所以问题在于检索数据,或者插入到 oracle 中。
首先,我建议确定问题出在哪里,像 perf_tool 这样的分析工具可以帮助你 lot.It 不可能想象这里出了什么问题,但我认为经过一些检查你会发现问题出在写入数据库时,所以解决方案可能是进行批量插入或处理索引。
正如我所说,您应该一次插入多行并执行一次。
试试这个:
insert_data = []
for data in json_data['result']:
... # branchFullName, branchNum, etc. variables
inser_data.append(
(branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
)
args_str = ','.join(cursor.mogrify("(%s,%s,%s,%s,%s,%s)", x) for x in insert_data)
cursor.execute(f"INSERT INTO {PrestageTable} VALUES " + args_str)
con.commit()
请注意 execute
在循环之外。
使用cursor.executemany()
一次插入所有行。这需要您为所有行创建一个二维参数列表。
params = []
for data in json_data['result']:
branchFullName = data['full_name']
branchNum = data['u_branch_id']
branchName = data['u_branch_name']
sysId= data['sys_id']
sys_updated_on = data['sys_updated_on']
sys_created_on = data['sys_created_on']
params.append((branchFullName,branchNum,branchName,sysId,sys_updated_on,sys_created_on)
cursor.executemany("""INSERT INTO "+PrestageTable+"(BRANCH_FULL_NAME,
BRANCH_NUM, BRANCH_NAME,SYS_ID,SYS_CREATED_ON,SYS_UPDATED_ON) VALUES
(:1, :2, :3, :4, :5, :6)""",
params)
con.commit()
我按照以下进行了更改,现在 运行 更快了。
代码:
Updated:
insert_data = []
for data in json_data['result']:
branchFullName = data['full_name']
branchNum = data['u_branch_id']
branchName = data['u_branch_name']
sysId= data['sys_id']
sys_updated_on = data['sys_updated_on']
sys_created_on = data['sys_created_on']
insert_data.append(
(branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
)
cursor.executemany(f"INSERT INTO {PrestageTable} VALUES " + insert_data)
con.commit()```
执行脚本花费的时间比预期的要多。对于仅 1250 条记录,循环并插入 table 需要 20 多分钟。 请告诉我们这是否正常
下面是从 API(JSON) 中提取的 11 列,并将每一行加载到 table(oracle) 中。
脚本:
auth_values = (user, passwd)
response = requests.get(url, auth=auth_values)
json_data = json.loads(response.text)
for data in json_data['result']:
branchFullName = data['full_name']
branchNum = data['u_branch_id']
branchName = data['u_branch_name']
sysId= data['sys_id']
sys_updated_on = data['sys_updated_on']
sys_created_on = data['sys_created_on']
cursor.execute("INSERT INTO "+PrestageTable+"(BRANCH_FULL_NAME,
BRANCH_NUM, BRANCH_NAME,SYS_ID,SYS_CREATED_ON,SYS_UPDATED_ON) VALUES
(:1, :2, :3, :4, :5, :6)",
(branchFullName,branchNum,branchName,sysId,sys_updated_on,sys_created_on))
con.commit()
有没有办法使用索引,或者有什么建议。
Updated:
insert_data = []
for data in json_data['result']:
branchFullName = data['full_name']
branchNum = data['u_branch_id']
branchName = data['u_branch_name']
sysId= data['sys_id']
sys_updated_on = data['sys_updated_on']
sys_created_on = data['sys_created_on']
insert_data.append(
(branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
)
args_str = ','.join(cur.mogrify("(%s,%s,%s,%s,%s,%s)", x) for x in insert_data)
cursor.execute(f"INSERT INTO {PrestageTable} VALUES " + args_str)
con.commit()
添加了 JSON 文件。
{
"result": [
{
"country": {
"link": "https://xyz.service-now.com/api/now/table/core_country/ebc0713f4",
"value": "9d38b7111b121100763d91eebc0713f4"
},
"parent": "",
"city": "trgtt",
"latitude": "",
"sys_updated_on": "2019-10-04 16:44:21",
"type": "",
"sys_class_name": "u_branch",
"sys_id": "565666",
"u_daily_limit": "0",
"sys_updated_by": "SYffffLVPAU",
"u_branch_name": "tyfhh",
"stock_room": "false",
"street": "KM 66 jhhhj jhg jhhhh - PORTÃO 01 - SALA DG",
"sys_created_on": "2019-07-25 16:26:18",
"contact": "",
"phone_territory": "",
"u_active": "true",
"company": "",
"lat_long_error": "",
"u_branch_id": "vfg",
"state": "",
"consumer": "",
"sys_created_by": "gghhh",
"longitude": "",
"u_region": "REGIONAL 1",
"zip": "",
"u_code": "",
"u_dmm": "false",
"sys_mod_count": "1",
"sys_tags": "",
"time_zone": "",
"full_name": "VCP - yghg",
"fax_phone": "",
"phone": "09-4321-3920",
"u_ddr": "false",
"name": "VCP - gfghj",
"u_cutoff_time": "",
"coordinates_retrieved_on": "",
"u_schedule": "",
"account": "",
"primary": "false"
},
{
"country": {
"link": "https://xyz.service-now.com/api/now/table/core_country/1eebc0713f4",
"value": "fgddddfghh"
},
"parent": "",
"city": "rerdr BRAZ",
"latitude": "",
"sys_updated_on": "2019-10-04 16:44:21",
"type": "",
"sys_class_name": "u_branch",
"sys_id": "5644",
"u_daily_limit": "0",
"sys_updated_by": "gg",
"u_branch_name": "hff BRAS",
"stock_room": "false",
"street": "302B RUA gffg ELIAS",
"sys_created_on": "2019-07-25 16:26:19",
"contact": "",
"phone_territory": "",
"u_active": "true",
"company": "",
"lat_long_error": "",
"u_branch_id": "frg",
"state": "",
"consumer": "",
"sys_created_by": "gtft",
"longitude": "",
"u_region": "REGIO 2",
"zip": "",
"u_code": "",
"u_dmm": "false",
"sys_mod_count": "1",
"sys_tags": "",
"time_zone": "",
"full_name": "WBR - yyjh BRAS",
"fax_phone": "",
"phone": "04-352-160",
"u_ddr": "false",
"name": "dfee - Wdd BRdS",
"u_cutoff_time": "",
"coordinates_retrieved_on": "",
"u_schedule": "",
"account": "",
"primary": "false"
} ]
}
此代码中缺少某些内容。这个循环不能花时间来执行,所以问题在于检索数据,或者插入到 oracle 中。 首先,我建议确定问题出在哪里,像 perf_tool 这样的分析工具可以帮助你 lot.It 不可能想象这里出了什么问题,但我认为经过一些检查你会发现问题出在写入数据库时,所以解决方案可能是进行批量插入或处理索引。
正如我所说,您应该一次插入多行并执行一次。
试试这个:
insert_data = []
for data in json_data['result']:
... # branchFullName, branchNum, etc. variables
inser_data.append(
(branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
)
args_str = ','.join(cursor.mogrify("(%s,%s,%s,%s,%s,%s)", x) for x in insert_data)
cursor.execute(f"INSERT INTO {PrestageTable} VALUES " + args_str)
con.commit()
请注意 execute
在循环之外。
使用cursor.executemany()
一次插入所有行。这需要您为所有行创建一个二维参数列表。
params = []
for data in json_data['result']:
branchFullName = data['full_name']
branchNum = data['u_branch_id']
branchName = data['u_branch_name']
sysId= data['sys_id']
sys_updated_on = data['sys_updated_on']
sys_created_on = data['sys_created_on']
params.append((branchFullName,branchNum,branchName,sysId,sys_updated_on,sys_created_on)
cursor.executemany("""INSERT INTO "+PrestageTable+"(BRANCH_FULL_NAME,
BRANCH_NUM, BRANCH_NAME,SYS_ID,SYS_CREATED_ON,SYS_UPDATED_ON) VALUES
(:1, :2, :3, :4, :5, :6)""",
params)
con.commit()
我按照以下进行了更改,现在 运行 更快了。 代码:
Updated:
insert_data = []
for data in json_data['result']:
branchFullName = data['full_name']
branchNum = data['u_branch_id']
branchName = data['u_branch_name']
sysId= data['sys_id']
sys_updated_on = data['sys_updated_on']
sys_created_on = data['sys_created_on']
insert_data.append(
(branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
)
cursor.executemany(f"INSERT INTO {PrestageTable} VALUES " + insert_data)
con.commit()```