使用 python 批量更新 MySql

Bulk update MySql with python

我必须将数百万行更新为 MySQL。我目前正在使用 for 循环来执行查询。为了加快更新速度,我想使用 Python MySQL 连接器的 executemany(),这样我就可以对每个批次使用单个查询来分批更新。

也许这能帮上忙

cur.executemany("UPDATE Writers SET Name = %s WHERE Id = %s ",
    [("new_value" , "3"),("new_value" , "6")])
conn.commit()

我认为 mysqldb 无法同时处理多个 UPDATE 查询。

但是您可以使用最后带有 ON DUPLICATE KEY UPDATE 条件的 INSERT 查询。

为了易用性和可读性,我写了下面的例子。

import MySQLdb

def update_many(data_list=None, mysql_table=None):
    """
    Updates a mysql table with the data provided. If the key is not unique, the
    data will be inserted into the table.

    The dictionaries must have all the same keys due to how the query is built.

    Param:
        data_list (List):
            A list of dictionaries where the keys are the mysql table
            column names, and the values are the update values
        mysql_table (String):
            The mysql table to be updated.
    """

    # Connection and Cursor
    conn = MySQLdb.connect('localhost', 'jeff', 'atwood', 'Whosebug')
    cur = conn.cursor()

    query = ""
    values = []

    for data_dict in data_list:

        if not query:
            columns = ', '.join('`{0}`'.format(k) for k in data_dict)
            duplicates = ', '.join('{0}=VALUES({0})'.format(k) for k in data_dict)
            place_holders = ', '.join('%s'.format(k) for k in data_dict)
            query = "INSERT INTO {0} ({1}) VALUES ({2})".format(mysql_table, columns, place_holders)
            query = "{0} ON DUPLICATE KEY UPDATE {1}".format(query, duplicates)

        v = data_dict.values()
        values.append(v)

    try:
        cur.executemany(query, values)
    except MySQLdb.Error, e:
        try:
            print"MySQL Error [%d]: %s" % (e.args[0], e.args[1])
        except IndexError:
            print "MySQL Error: %s" % str(e)

        conn.rollback()
        return False

    conn.commit()
    cur.close()
    conn.close()

一行解释

columns = ', '.join('`{}`'.format(k) for k in data_dict)

相同
column_list = []
for k in data_dict:
    column_list.append(k)
columns = ", ".join(columns)

这是一个用法示例

test_data_list = []
test_data_list.append( {'id' : 1, 'name' : 'Marco', 'articles' : 1 } )
test_data_list.append( {'id' : 2, 'name' : 'Keshaw', 'articles' : 8 } )
test_data_list.append( {'id' : 3, 'name' : 'Wes', 'articles' : 0 } )

update_many(data_list=test_data_list, mysql_table='writers')

查询输出

INSERT INTO writers (`articles`, `id`, `name`) VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE articles=VALUES(articles), id=VALUES(id), name=VALUES(name)

值输出

[[1, 1, 'Marco'], [8, 2, 'Keshaw'], [0, 3, 'Wes']]