如何使用 node-mysql 向 MySQL 数据库插入大约 100 万行?
How to insert about 1 million rows to a MySQL db using node-mysql?
我正在尝试构建一个应用程序,其中有两个给定列表 - 一个是名字,另一个是姓氏。我正在尝试创建一个包含每个名字和姓氏组合的数据库。我正在生成组合,然后尝试将它们插入到我的数据库中。但是似乎内存不足以处理这个问题,即使我已经将我的 fullNames (firstname+lastname) 数组分割成更小的数组。
// using the node-mysql driver to interact with mysql db
var mysql = require('mysql');
var async = require('async');
var connection = mysql.createConnection({
host : 'localhost',
user : 'root',
password : '*******',
database : '*******',
multipleStatements: true,
});
var firstNames, lastNames; // arrays that will store
// first-names and last-names from files
function readFirstLastNames() {
connection.connect();
var fs = require("fs");
firstNames = fs.readFileSync('firstnames.out').toString().split("\n");
firstNames.sort();
lastNames = fs.readFileSync('lastnames.out').toString().split("\n");
lastNames.sort();
connection.end();
}
/*
const f = firstNames; const l = lastNames;
genNames(firstNames,lastNames);
var allNames;
function genNames(fN, lN) {
flatmap = (xs, fN) => xs.map(fN).reduce((a, b) => a.concat(b), []);
allNames = flatmap(fN, a => lN.map(b => `${a} ${b}`));
}
*/
function insertIntoDB(x, y) {
connection.connect();
var fullname_part = fullNames.slice(x, y);
connection.query('INSERT INTO names (firstname, lastname) VALUES ?', [fullNames], (err, result) => {
if (err) {
console.error('error connecting: ' + err.stack);
return;
}
});
console.log("Done inserting all combinations of names.");
connection.end();
}
readFirstLastNames();
var fullNames = firstNames.reduce((pairs, first) => { lastNames.forEach(last => pairs.push([first, last])); return pairs; }, [])
var x = 1;
for (var y = 10000; y < 1000000;) {
insertIntoDB(x, y);
x = y;
y = y + 10000;
}
但是当我尝试 运行 - node index.js
时,出现以下错误:
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
<--- Last few GCs --->
7095 ms: Scavenge 1399.0 (1457.9) -> 1399.0 (1457.9) MB, 1.4 / 0 ms (+ 56.0 ms in 1 steps since last GC) [allocation failure] [incremental marking delaying mark-sweep].
7843 ms: Mark-sweep 1399.0 (1457.9) -> 1398.4 (1457.9) MB, 748.5 / 0 ms (+ 441.4 ms in 1126 steps since start of marking, biggest step 60.3 ms) [last resort gc].
8585 ms: Mark-sweep 1398.4 (1457.9) -> 1398.4 (1457.9) MB, 741.2 / 0 ms [last resort gc].
<--- JS stacktrace --->
==== JS stack trace =========================================
Security context: 0x3fc5864b4629 <JS Object>
2: arrayToList [/Users/adas/Downloads/signzy/node_modules/sqlstring/lib/SqlString.js:~58] [pc=0x3d90a7d8ead7] (this=0x233152605a09 <an Object with map 0x38dc0d04dcc1>,array=0x225c88bf01f1 <JS Array[881892]>,timeZone=0x2ed2ed0de679 <String[5]: local>)
3: escape [/Users/adas/Downloads/signzy/node_modules/sqlstring/lib/SqlString.js:~33] [pc=0x3d90a7d877e8] (this=0x233152605a09 <an Object w...
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
Abort trap: 6
我想了解如何解决这个问题并解决我的问题!提前谢谢你。
编辑 1 :根据@tadman 下面的评论,我对代码执行了以下更改,以便使用 LOAD DATA INFILE
.
// using the node-mysql driver to interact with mysql db
var mysql = require('mysql');
var async = require('async');
var connection = mysql.createConnection({
host : 'localhost',
user : 'root',
password : '******',
database : '******',
multipleStatements: true,
});
connection.connect();
var firstNames, lastNames; // arrays that will store
// first-names and last-names from files
function readFirstLastNames() { // reads firstnames.out and lastnames.out, puts them in arrays and sorts etc.
var fs = require("fs");
firstNames = fs.readFileSync('firstnames.out').toString().split("\n");
lastNames = fs.readFileSync('lastnames.out').toString().split("\n");
}
readFirstLastNames();
var fullNames = firstNames.reduce((pairs, first) => { lastNames.forEach(last => pairs.push([first, last])); return pairs; }, []) // fullNames has all combinations of names.
fullNames.sort();
// Writing all combinations out to file in a comma separated syntax
var fs = require('fs');
var file = fs.createWriteStream('db_inserts.txt');
file.on('error', function(err) { /* error handling */ });
fullNames.forEach(function(v) { file.write(v.join(',') + '\n'); });
file.end();
// using node-mysql in conjunction with LOAD DATA INFILE to read the inserts into the table.
connection.query('LOAD DATA INFILE \'/Users/adas/Downloads/signzy/db_inserts.txt\' INTO TABLE names (firstname, lastname) FIELDS TERMINATED BY \',\'', (err, result) => {
if (err) {
console.error('error connecting: ' + err.stack);
return;
}
});
connection.end();
console.log("Done");
但现在我看到了一个不同的问题。它给了我一个语法错误:#1064 - 你的 SQL 语法有错误;查看与您的 MySQL 服务器版本相对应的手册,了解在第 1 行的 'FIELDS TERMINATED BY ', '' 附近使用的正确语法。
EDIT 2 :正如@tadman 所指出的,列名列表应该出现在查询的末尾。因此,将查询更改为:LOAD DATA INFILE '/Users/adas/Downloads/signzy/db_inserts.txt' INTO TABLE names FIELDS TERMINATED BY ',' (firstname, lastname);
解决了该问题。
SIDENOTE :@tadman 还指出,使用 Node,我们不需要强制使用反斜杠来转义单引号。只需用双引号将整个查询括起来,然后在需要的地方继续使用单引号。
编辑:随着线程的进行,很明显解决这个问题的方法是生成一个文件并将其加载到数据库中;我会在这里留下这个答案,以防有人真的需要做我描述的事情。
您可能 运行内存不足,因为 node-mysql 的 .query()
方法与 Node 中的大多数方法一样是异步的。因此,您实际上在做的是在 for 循环中调用 insertIntoDB(x, y);
,然后立即调用另一个,而不是等待第一个查询完成后再尝试另一个查询,直到 运行 内存不足。
我看到您已经包含 async
。您可以使用 async.mapSeries
之类的东西来序列化您的 INSERT
(前提是您重新实现了 allNames
累加器)。
'use strict';
connection.connect();
async.mapSeries(allNames, (data, callback) => {
connection.query('INSERT INTO names (firstname, lastname) VALUES ?', [data.firstName, data.firstName], (err, result) => {
if (err) {
console.error('error: ' + err.stack);
callback(err);
} else {
callback(null, result);
}
});
}, (err, results) => {
// Final callback
if (err) {
console.log(`Error: ${err}`);
}
});
connection.end();
此外:
将您的文件排序一次,然后将其写回磁盘,这样您就不必在每次加载时都对其进行排序。
删除对 connection.connect()
和 .end()
的额外调用:只需在批处理或整个脚本开始时连接一次。
我正在尝试构建一个应用程序,其中有两个给定列表 - 一个是名字,另一个是姓氏。我正在尝试创建一个包含每个名字和姓氏组合的数据库。我正在生成组合,然后尝试将它们插入到我的数据库中。但是似乎内存不足以处理这个问题,即使我已经将我的 fullNames (firstname+lastname) 数组分割成更小的数组。
// using the node-mysql driver to interact with mysql db
var mysql = require('mysql');
var async = require('async');
var connection = mysql.createConnection({
host : 'localhost',
user : 'root',
password : '*******',
database : '*******',
multipleStatements: true,
});
var firstNames, lastNames; // arrays that will store
// first-names and last-names from files
function readFirstLastNames() {
connection.connect();
var fs = require("fs");
firstNames = fs.readFileSync('firstnames.out').toString().split("\n");
firstNames.sort();
lastNames = fs.readFileSync('lastnames.out').toString().split("\n");
lastNames.sort();
connection.end();
}
/*
const f = firstNames; const l = lastNames;
genNames(firstNames,lastNames);
var allNames;
function genNames(fN, lN) {
flatmap = (xs, fN) => xs.map(fN).reduce((a, b) => a.concat(b), []);
allNames = flatmap(fN, a => lN.map(b => `${a} ${b}`));
}
*/
function insertIntoDB(x, y) {
connection.connect();
var fullname_part = fullNames.slice(x, y);
connection.query('INSERT INTO names (firstname, lastname) VALUES ?', [fullNames], (err, result) => {
if (err) {
console.error('error connecting: ' + err.stack);
return;
}
});
console.log("Done inserting all combinations of names.");
connection.end();
}
readFirstLastNames();
var fullNames = firstNames.reduce((pairs, first) => { lastNames.forEach(last => pairs.push([first, last])); return pairs; }, [])
var x = 1;
for (var y = 10000; y < 1000000;) {
insertIntoDB(x, y);
x = y;
y = y + 10000;
}
但是当我尝试 运行 - node index.js
时,出现以下错误:
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
Done inserting all combinations of names.
<--- Last few GCs --->
7095 ms: Scavenge 1399.0 (1457.9) -> 1399.0 (1457.9) MB, 1.4 / 0 ms (+ 56.0 ms in 1 steps since last GC) [allocation failure] [incremental marking delaying mark-sweep].
7843 ms: Mark-sweep 1399.0 (1457.9) -> 1398.4 (1457.9) MB, 748.5 / 0 ms (+ 441.4 ms in 1126 steps since start of marking, biggest step 60.3 ms) [last resort gc].
8585 ms: Mark-sweep 1398.4 (1457.9) -> 1398.4 (1457.9) MB, 741.2 / 0 ms [last resort gc].
<--- JS stacktrace --->
==== JS stack trace =========================================
Security context: 0x3fc5864b4629 <JS Object>
2: arrayToList [/Users/adas/Downloads/signzy/node_modules/sqlstring/lib/SqlString.js:~58] [pc=0x3d90a7d8ead7] (this=0x233152605a09 <an Object with map 0x38dc0d04dcc1>,array=0x225c88bf01f1 <JS Array[881892]>,timeZone=0x2ed2ed0de679 <String[5]: local>)
3: escape [/Users/adas/Downloads/signzy/node_modules/sqlstring/lib/SqlString.js:~33] [pc=0x3d90a7d877e8] (this=0x233152605a09 <an Object w...
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
Abort trap: 6
我想了解如何解决这个问题并解决我的问题!提前谢谢你。
编辑 1 :根据@tadman 下面的评论,我对代码执行了以下更改,以便使用 LOAD DATA INFILE
.
// using the node-mysql driver to interact with mysql db
var mysql = require('mysql');
var async = require('async');
var connection = mysql.createConnection({
host : 'localhost',
user : 'root',
password : '******',
database : '******',
multipleStatements: true,
});
connection.connect();
var firstNames, lastNames; // arrays that will store
// first-names and last-names from files
function readFirstLastNames() { // reads firstnames.out and lastnames.out, puts them in arrays and sorts etc.
var fs = require("fs");
firstNames = fs.readFileSync('firstnames.out').toString().split("\n");
lastNames = fs.readFileSync('lastnames.out').toString().split("\n");
}
readFirstLastNames();
var fullNames = firstNames.reduce((pairs, first) => { lastNames.forEach(last => pairs.push([first, last])); return pairs; }, []) // fullNames has all combinations of names.
fullNames.sort();
// Writing all combinations out to file in a comma separated syntax
var fs = require('fs');
var file = fs.createWriteStream('db_inserts.txt');
file.on('error', function(err) { /* error handling */ });
fullNames.forEach(function(v) { file.write(v.join(',') + '\n'); });
file.end();
// using node-mysql in conjunction with LOAD DATA INFILE to read the inserts into the table.
connection.query('LOAD DATA INFILE \'/Users/adas/Downloads/signzy/db_inserts.txt\' INTO TABLE names (firstname, lastname) FIELDS TERMINATED BY \',\'', (err, result) => {
if (err) {
console.error('error connecting: ' + err.stack);
return;
}
});
connection.end();
console.log("Done");
但现在我看到了一个不同的问题。它给了我一个语法错误:#1064 - 你的 SQL 语法有错误;查看与您的 MySQL 服务器版本相对应的手册,了解在第 1 行的 'FIELDS TERMINATED BY ', '' 附近使用的正确语法。
EDIT 2 :正如@tadman 所指出的,列名列表应该出现在查询的末尾。因此,将查询更改为:LOAD DATA INFILE '/Users/adas/Downloads/signzy/db_inserts.txt' INTO TABLE names FIELDS TERMINATED BY ',' (firstname, lastname);
解决了该问题。
SIDENOTE :@tadman 还指出,使用 Node,我们不需要强制使用反斜杠来转义单引号。只需用双引号将整个查询括起来,然后在需要的地方继续使用单引号。
编辑:随着线程的进行,很明显解决这个问题的方法是生成一个文件并将其加载到数据库中;我会在这里留下这个答案,以防有人真的需要做我描述的事情。
您可能 运行内存不足,因为 node-mysql 的 .query()
方法与 Node 中的大多数方法一样是异步的。因此,您实际上在做的是在 for 循环中调用 insertIntoDB(x, y);
,然后立即调用另一个,而不是等待第一个查询完成后再尝试另一个查询,直到 运行 内存不足。
我看到您已经包含 async
。您可以使用 async.mapSeries
之类的东西来序列化您的 INSERT
(前提是您重新实现了 allNames
累加器)。
'use strict';
connection.connect();
async.mapSeries(allNames, (data, callback) => {
connection.query('INSERT INTO names (firstname, lastname) VALUES ?', [data.firstName, data.firstName], (err, result) => {
if (err) {
console.error('error: ' + err.stack);
callback(err);
} else {
callback(null, result);
}
});
}, (err, results) => {
// Final callback
if (err) {
console.log(`Error: ${err}`);
}
});
connection.end();
此外:
将您的文件排序一次,然后将其写回磁盘,这样您就不必在每次加载时都对其进行排序。
删除对
connection.connect()
和.end()
的额外调用:只需在批处理或整个脚本开始时连接一次。