如何使用 node-mysql 向 MySQL 数据库插入大约 100 万行?

How to insert about 1 million rows to a MySQL db using node-mysql?

我正在尝试构建一个应用程序,其中有两个给定列表 - 一个是名字,另一个是姓氏。我正在尝试创建一个包含每个名字和姓氏组合的数据库。我正在生成组合,然后尝试将它们插入到我的数据库中。但是似乎内存不足以处理这个问题,即使我已经将我的 fullNames (firstname+lastname) 数组分割成更小的数组。

// using the node-mysql driver to interact with mysql db
var mysql      = require('mysql');
var async = require('async');

var connection = mysql.createConnection({
    host     : 'localhost',
    user     : 'root',
    password : '*******',
    database : '*******',
    multipleStatements: true,
});

var firstNames, lastNames; // arrays that will store
                           // first-names and last-names from files

function readFirstLastNames() {
    connection.connect();
    var fs = require("fs");
    firstNames = fs.readFileSync('firstnames.out').toString().split("\n");
    firstNames.sort();
    lastNames = fs.readFileSync('lastnames.out').toString().split("\n");
    lastNames.sort();
    connection.end();
}

/*
const f = firstNames; const l = lastNames;

genNames(firstNames,lastNames);

var allNames;

function genNames(fN, lN) {
    flatmap = (xs, fN) => xs.map(fN).reduce((a, b) => a.concat(b), []);
    allNames = flatmap(fN, a => lN.map(b => `${a} ${b}`));
}
*/

function insertIntoDB(x, y) {
    connection.connect();
    var fullname_part = fullNames.slice(x, y);
    connection.query('INSERT INTO names (firstname, lastname) VALUES ?', [fullNames], (err, result) => {
        if (err) {
            console.error('error connecting: ' + err.stack);
            return;
        }
    });
    console.log("Done inserting all combinations of names.");
    connection.end();
}

readFirstLastNames();

var fullNames = firstNames.reduce((pairs, first) => { lastNames.forEach(last => pairs.push([first, last])); return pairs; }, [])

var x = 1;
for (var y = 10000; y < 1000000;) {
    insertIntoDB(x, y);
    x = y;
    y = y + 10000;
}

但是当我尝试 运行 - node index.js 时,出现以下错误:

Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.

<--- Last few GCs --->

    7095 ms: Scavenge 1399.0 (1457.9) -> 1399.0 (1457.9) MB, 1.4 / 0 ms (+ 56.0 ms in 1 steps since last GC) [allocation failure] [incremental marking delaying mark-sweep].
    7843 ms: Mark-sweep 1399.0 (1457.9) -> 1398.4 (1457.9) MB, 748.5 / 0 ms (+ 441.4 ms in 1126 steps since start of marking, biggest step 60.3 ms) [last resort gc].
    8585 ms: Mark-sweep 1398.4 (1457.9) -> 1398.4 (1457.9) MB, 741.2 / 0 ms [last resort gc].


<--- JS stacktrace --->

==== JS stack trace =========================================

Security context: 0x3fc5864b4629 <JS Object>
    2: arrayToList [/Users/adas/Downloads/signzy/node_modules/sqlstring/lib/SqlString.js:~58] [pc=0x3d90a7d8ead7] (this=0x233152605a09 <an Object with map 0x38dc0d04dcc1>,array=0x225c88bf01f1 <JS Array[881892]>,timeZone=0x2ed2ed0de679 <String[5]: local>)
    3: escape [/Users/adas/Downloads/signzy/node_modules/sqlstring/lib/SqlString.js:~33] [pc=0x3d90a7d877e8] (this=0x233152605a09 <an Object w...

FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
Abort trap: 6

我想了解如何解决这个问题并解决我的问题!提前谢谢你。

编辑 1 :根据@tadman 下面的评论,我对代码执行了以下更改,以便使用 LOAD DATA INFILE.

// using the node-mysql driver to interact with mysql db
var mysql      = require('mysql');
var async = require('async');

var connection = mysql.createConnection({
    host     : 'localhost',
    user     : 'root',
    password : '******',
    database : '******',
    multipleStatements: true,
});

connection.connect();

var firstNames, lastNames; // arrays that will store
                           // first-names and last-names from files

function readFirstLastNames() { // reads firstnames.out and lastnames.out, puts them in arrays and sorts etc.
    var fs = require("fs");
    firstNames = fs.readFileSync('firstnames.out').toString().split("\n");
    lastNames = fs.readFileSync('lastnames.out').toString().split("\n");
}


readFirstLastNames();

var fullNames = firstNames.reduce((pairs, first) => { lastNames.forEach(last => pairs.push([first, last])); return pairs; }, []) // fullNames has all combinations of names.
fullNames.sort();


// Writing all combinations out to file in a comma separated syntax
var fs = require('fs');
var file = fs.createWriteStream('db_inserts.txt');
file.on('error', function(err) { /* error handling */ });
fullNames.forEach(function(v) { file.write(v.join(',') + '\n'); });
file.end();


// using node-mysql in conjunction with LOAD DATA INFILE to read the inserts into the table.
connection.query('LOAD DATA INFILE \'/Users/adas/Downloads/signzy/db_inserts.txt\' INTO TABLE names (firstname, lastname) FIELDS TERMINATED BY \',\'', (err, result) => {
    if (err) {
        console.error('error connecting: ' + err.stack);
        return;
    }
});
connection.end();
console.log("Done");

但现在我看到了一个不同的问题。它给了我一个语法错误:#1064 - 你的 SQL 语法有错误;查看与您的 MySQL 服务器版本相对应的手册,了解在第 1 行的 'FIELDS TERMINATED BY ', '' 附近使用的正确语法。

EDIT 2 :正如@tadman 所指出的,列名列表应该出现在查询的末尾。因此,将查询更改为:LOAD DATA INFILE '/Users/adas/Downloads/signzy/db_inserts.txt' INTO TABLE names FIELDS TERMINATED BY ',' (firstname, lastname); 解决了该问题。

SIDENOTE :@tadman 还指出,使用 Node,我们不需要强制使用反斜杠来转义单引号。只需用双引号将整个查询括起来,然后在需要的地方继续使用单引号。

编辑:随着线程的进行,很明显解决这个问题的方法是生成一个文件并将其加载到数据库中;我会在这里留下这个答案,以防有人真的需要做我描述的事情。


您可能 运行内存不足,因为 node-mysql 的 .query() 方法与 Node 中的大多数方法一样是异步的。因此,您实际上在做的是在 for 循环中调用 insertIntoDB(x, y);,然后立即调用另一个,而不是等待第一个查询完成后再尝试另一个查询,直到 运行 内存不足。

我看到您已经包含 async。您可以使用 async.mapSeries 之类的东西来序列化您的 INSERT(前提是您重新实现了 allNames 累加器)。

'use strict';
connection.connect();
async.mapSeries(allNames, (data, callback) => {
    connection.query('INSERT INTO names (firstname, lastname) VALUES ?', [data.firstName, data.firstName], (err, result) => {
        if (err) {
            console.error('error: ' + err.stack);
            callback(err);
        } else {
            callback(null, result);
        }
    });
}, (err, results) => {
    // Final callback
    if (err) {
        console.log(`Error: ${err}`);
    }
});
connection.end();

此外:

  1. 将您的文件排序一次,然后将其写回磁盘,这样您就不必在每次加载时都对其进行排序。

  2. 删除对 connection.connect().end() 的额外调用:只需在批处理或整个脚本开始时连接一次。