Python:循环慢

Python: Slow for Loop

执行脚本花费的时间比预期的要多。对于仅 1250 条记录,循环并插入 table 需要 20 多分钟。 请告诉我们这是否正常

下面是从 API(JSON) 中提取的 11 列,并将每一行加载到 table(oracle) 中。

脚本:

auth_values = (user, passwd)
response = requests.get(url, auth=auth_values)
json_data = json.loads(response.text)

for data in json_data['result']:
        branchFullName = data['full_name']
        branchNum = data['u_branch_id']
        branchName = data['u_branch_name']
        sysId= data['sys_id']
        sys_updated_on = data['sys_updated_on']
        sys_created_on = data['sys_created_on']
        cursor.execute("INSERT INTO "+PrestageTable+"(BRANCH_FULL_NAME, 
        BRANCH_NUM, BRANCH_NAME,SYS_ID,SYS_CREATED_ON,SYS_UPDATED_ON) VALUES 
        (:1, :2, :3, :4, :5, :6)", 
    (branchFullName,branchNum,branchName,sysId,sys_updated_on,sys_created_on))
        con.commit()

有没有办法使用索引,或者有什么建议。

Updated:
    insert_data = []
    for data in json_data['result']:
        branchFullName = data['full_name']
        branchNum = data['u_branch_id']
        branchName = data['u_branch_name']
        sysId= data['sys_id']
        sys_updated_on = data['sys_updated_on']
        sys_created_on = data['sys_created_on']
        insert_data.append(
        (branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
        )

        args_str = ','.join(cur.mogrify("(%s,%s,%s,%s,%s,%s)", x) for x in insert_data)
    cursor.execute(f"INSERT INTO {PrestageTable} VALUES " + args_str)
    con.commit()

添加了 JSON 文件。

{
    "result": [
        {
            "country": {
                "link": "https://xyz.service-now.com/api/now/table/core_country/ebc0713f4",
                "value": "9d38b7111b121100763d91eebc0713f4"
            },
            "parent": "",
            "city": "trgtt",
            "latitude": "",
            "sys_updated_on": "2019-10-04 16:44:21",
            "type": "",
            "sys_class_name": "u_branch",
            "sys_id": "565666",
            "u_daily_limit": "0",
            "sys_updated_by": "SYffffLVPAU",
            "u_branch_name": "tyfhh",
            "stock_room": "false",
            "street": "KM 66 jhhhj jhg jhhhh - PORTÃO 01 - SALA DG",
            "sys_created_on": "2019-07-25 16:26:18",
            "contact": "",
            "phone_territory": "",
            "u_active": "true",
            "company": "",
            "lat_long_error": "",
            "u_branch_id": "vfg",
            "state": "",
            "consumer": "",
            "sys_created_by": "gghhh",
            "longitude": "",
            "u_region": "REGIONAL 1",
            "zip": "",
            "u_code": "",
            "u_dmm": "false",
            "sys_mod_count": "1",
            "sys_tags": "",
            "time_zone": "",
            "full_name": "VCP - yghg",
            "fax_phone": "",
            "phone": "09-4321-3920",
            "u_ddr": "false",
            "name": "VCP - gfghj",
            "u_cutoff_time": "",
            "coordinates_retrieved_on": "",
            "u_schedule": "",
            "account": "",
            "primary": "false"
        },
        {
            "country": {
                "link": "https://xyz.service-now.com/api/now/table/core_country/1eebc0713f4",
                "value": "fgddddfghh"
            },
            "parent": "",
            "city": "rerdr BRAZ",
            "latitude": "",
            "sys_updated_on": "2019-10-04 16:44:21",
            "type": "",
            "sys_class_name": "u_branch",
            "sys_id": "5644",
            "u_daily_limit": "0",
            "sys_updated_by": "gg",
            "u_branch_name": "hff BRAS",
            "stock_room": "false",
            "street": "302B RUA gffg ELIAS",
            "sys_created_on": "2019-07-25 16:26:19",
            "contact": "",
            "phone_territory": "",
            "u_active": "true",
            "company": "",
            "lat_long_error": "",
            "u_branch_id": "frg",
            "state": "",
            "consumer": "",
            "sys_created_by": "gtft",
            "longitude": "",
            "u_region": "REGIO 2",
            "zip": "",
            "u_code": "",
            "u_dmm": "false",
            "sys_mod_count": "1",
            "sys_tags": "",
            "time_zone": "",
            "full_name": "WBR - yyjh BRAS",
            "fax_phone": "",
            "phone": "04-352-160",
            "u_ddr": "false",
            "name": "dfee - Wdd BRdS",
            "u_cutoff_time": "",
            "coordinates_retrieved_on": "",
            "u_schedule": "",
            "account": "",
            "primary": "false"
        }     ]
}

此代码中缺少某些内容。这个循环不能花时间来执行,所以问题在于检索数据,或者插入到 oracle 中。 首先,我建议确定问题出在哪里,像 perf_tool 这样的分析工具可以帮助你 lot.It 不可能想象这里出了什么问题,但我认为经过一些检查你会发现问题出在写入数据库时​​,所以解决方案可能是进行批量插入或处理索引。

正如我所说,您应该一次插入多行并执行一次。

试试这个:

insert_data = []
for data in json_data['result']:
    ... # branchFullName, branchNum, etc. variables
    inser_data.append(
        (branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
    )

args_str = ','.join(cursor.mogrify("(%s,%s,%s,%s,%s,%s)", x) for x in insert_data)
cursor.execute(f"INSERT INTO {PrestageTable} VALUES " + args_str) 
con.commit()

请注意 execute 在循环之外。

使用cursor.executemany()一次插入所有行。这需要您为所有行创建一个二维参数列表。

params = []
for data in json_data['result']:
    branchFullName = data['full_name']
    branchNum = data['u_branch_id']
    branchName = data['u_branch_name']
    sysId= data['sys_id']
    sys_updated_on = data['sys_updated_on']
    sys_created_on = data['sys_created_on']
    params.append((branchFullName,branchNum,branchName,sysId,sys_updated_on,sys_created_on)

cursor.executemany("""INSERT INTO "+PrestageTable+"(BRANCH_FULL_NAME, 
        BRANCH_NUM, BRANCH_NAME,SYS_ID,SYS_CREATED_ON,SYS_UPDATED_ON) VALUES 
        (:1, :2, :3, :4, :5, :6)""", 
     params)
con.commit()

我按照以下进行了更改,现在 运行 更快了。 代码:


Updated:
    insert_data = []
    for data in json_data['result']:
        branchFullName = data['full_name']
        branchNum = data['u_branch_id']
        branchName = data['u_branch_name']
        sysId= data['sys_id']
        sys_updated_on = data['sys_updated_on']
        sys_created_on = data['sys_created_on']
        insert_data.append(
        (branchFullName, branchNum, branchName, sysId, sys_updated_on, sys_created_on)
        )

 
    cursor.executemany(f"INSERT INTO {PrestageTable} VALUES " + insert_data)
    con.commit()```