解析 excel 文件并在 exceljs ON nodejs 中创建 JSON 格式
Parse excel file and create JSON format in exceljs ON nodejs
我有这个 excel 文件
我需要将数据 excel 从文件转换为 JSON,如下所示
[
{
"no_pemohonan": "PNL-202109200826210023105",
"sumber_data": "HOSTS",
"tgl_permohonan": "2021-09-20",
},
{
"no_pemohonan": "PNL-202109200845131363376",
"sumber_data": "HOSTS",
"tgl_permohonan": "2021-09-20",
},
...
]
我可以用这条评论制作数据,但我必须像下面的语法一样再次设置 object
const excel = require('exceljs');
const workbook = new excel.Workbook();
await workbook.xlsx.load(objDescExcel.buffer);
let json = workbook.model;
let worksheetsArr = json.worksheets.length;
const arrRow = [];
const arrIdPembatalan = [];
// looping per worksheet
for (let index = 0; index < worksheetsArr; index++) {
let worksheet = workbook.worksheets[index];
// console.log("worksheet " + worksheet);
const rowlast = worksheet.lastRow;
// looping semua row untuk
worksheet.eachRow(async (row, rowNumber) => {
let new_row = row.values
// console.log(new_row);
let no_permohonan= new_row[2]
let sumber_data = new_row[3]
let tgl_permohonan = new_row[4]
let header = {
no_permohonan: no_permohonan,
sumber_data : sumber_data ,
tgl_permohonan : tgl_permohonan ,
};
arrIdPembatalan.push(header)
}
})
}
我想自动设置 header 而不必像上面的语法那样再次设置 header 。
我看过 解决方案,但它是在 xlsx
库中编写的,而我正在使用 exceljs
.
这里是一个nodejs实现。
(async function() {
const excel = require('exceljs');
const workbook = new excel.Workbook();
// use readFile for testing purpose
// await workbook.xlsx.load(objDescExcel.buffer);
await workbook.xlsx.readFile(process.argv[2]);
let jsonData = [];
workbook.worksheets.forEach(function(sheet) {
// read first row as data keys
let firstRow = sheet.getRow(1);
if (!firstRow.cellCount) return;
let keys = firstRow.values;
sheet.eachRow((row, rowNumber) => {
if (rowNumber == 1) return;
let values = row.values
let obj = {};
for (let i = 1; i < keys.length; i ++) {
obj[keys[i]] = values[i];
}
jsonData.push(obj);
})
});
console.log(jsonData);
})();
测试结果
$ node ./test.js ~/Documents/Book1.xlsx
[
{
no_pemohonan: 'PNL-202109200826210023105',
sumber_data: 'HOSTS',
tgl_permohonan: 2021-09-20T00:00:00.000Z
},
{
no_pemohonan: 'PNL-202109200845131363376',
sumber_data: 'HOSTS',
tgl_permohonan: 2021-09-20T00:00:00.000Z
}
]
var Excel = require('exceljs');
var ReadExcelCSV = function (fileType, fileName, filePath, delimeter, textQualifier) {
var deffered = q.defer();
var workbook = new Excel.Workbook();
var finalFilePath = filePath + fileName;
if (fileType == "excel") {
console.log("File Type: Excel");
workbook.xlsx.readFile(finalFilePath).then(function () {
ParseExcelCSVFile(workbook).then(function (resp) {
deffered.resolve(resp);
}, function (error) {
logger.info("Error in Parsing Excel/CSV");
});
}, function (err) {
logger.info("Error In Read Excel: " + JSON.stringify(err));
});
} else {
if (delimeter != undefined && textQualifier != undefined) {
var options = {};
options.delimiter = delimeter;
options.quote = textQualifier;
options.dateFormats = [];
workbook.csv.readFile(finalFilePath, options).then(function () {
ParseExcelCSVFile(workbook).then(function (resp) {
// fs.unlink(finalFilePath);
deffered.resolve(resp);
}, function (error) {
logger.info("Error in Parsing Excel/CSV");
deffered.reject(error);
});
}, function (error) {
logger.info("Error In Read CSV: " + JSON.stringify(error));
deffered.reject(error);
});
} else {
workbook.csv.readFile(finalFilePath).then(function () {
ParseExcelCSVFile(workbook).then(function (resp) {
deffered.resolve(resp);
}, function (error) {
logger.info("Error in Parsing Excel/CSV");
deffered.reject(error);
});
}, function (error) {
logger.info("Error In Read CSV: " + JSON.stringify(error));
deffered.reject(error);
});
}
}
return deffered.promise;
};
var ParseExcelCSVFile = function (workbook) {
try {
var deffered = q.defer();
var objresult = [];
var objheaders = [];
var worksheet = workbook.getWorksheet(1);
worksheet.eachRow(function (row, rowNumber) {
var currentobj = {};
row.eachCell({
includeEmpty: true
}, function (cell, colNumber) {
if (rowNumber == 1) {
objheaders.push(cell.value);
} else {
currentobj[objheaders[colNumber - 1]] = cell.value == null ? '' : cell.value;
}
});
if (rowNumber != 1) {
objresult.push(currentobj);
}
});
deffered.resolve(objresult);
return deffered.promise;
} catch (ex) {
logger.error("Error in ParseExcel: " + ex.stack);
}
};
这段代码是我很久以前写的,所以你会看到一个像 deffered 这样的旧模块,你可以很容易地改变它,但它会帮助你实现你想要实现的目标。它可以读取和解析 excel 和 csv。
如果处理大文件,我会使用以下库探索流处理:
- 使用
exceljs
将.xlsx
文件作为流读取并写入.csv
作为流:
// read from a stream
const readStream = fs.createReadStream('largeWorkbook.xlsx');
const workbook = new Excel.Workbook();
await workbook.xlsx.read(readStream);
// write to stream
const writeStream = fs.createWriteStream('largeWorksheet.csv');
await workbook.csv.write(writeStream, { sheetName: 'Page name' });
- 然后使用
csvtojson
将CSV转换为JSON:
import csvToJson from 'csvtojson'
const readStream = fs.createReadStream('largeWorksheet.csv')
const writeStream = fs.createWriteStream('largeWorksheet.json')
readStream
.pipe(csvToJson())
.pipe(writeStream)
即使在内存不足的硬件上,这也适用于大文件。
完整代码片段:
import fs from 'fs'
import Excel from 'exceljs'
import csvToJson from 'csvtojson'
const xlsxRead = fs.createReadStream('largeWorkbook.xlsx')
const csvWrite = fs.createWriteStream('largeWorksheet.csv')
const csvRead = () => fs.createReadStream('largeWorksheet.csv')
const jsonWrite = fs.createWriteStream('largeWorksheet.json')
(async function process() {
const workbook = new Excel.Workbook()
await workbook.xlsx.read(xlsxRead)
await workbook.csv.write(csvWrite, { sheetName: 'Worksheet Name' })
csvRead()
.pipe(csvToJson())
.pipe(jsonWrite)
})() // this immediately-invoked wrapper function is just for Node.js runtimes
// that don't support top-level await yet
// if running via `--esm` or from `.mjs` file, it can be ommitted
我有这个 excel 文件
我需要将数据 excel 从文件转换为 JSON,如下所示
[
{
"no_pemohonan": "PNL-202109200826210023105",
"sumber_data": "HOSTS",
"tgl_permohonan": "2021-09-20",
},
{
"no_pemohonan": "PNL-202109200845131363376",
"sumber_data": "HOSTS",
"tgl_permohonan": "2021-09-20",
},
...
]
我可以用这条评论制作数据,但我必须像下面的语法一样再次设置 object
const excel = require('exceljs');
const workbook = new excel.Workbook();
await workbook.xlsx.load(objDescExcel.buffer);
let json = workbook.model;
let worksheetsArr = json.worksheets.length;
const arrRow = [];
const arrIdPembatalan = [];
// looping per worksheet
for (let index = 0; index < worksheetsArr; index++) {
let worksheet = workbook.worksheets[index];
// console.log("worksheet " + worksheet);
const rowlast = worksheet.lastRow;
// looping semua row untuk
worksheet.eachRow(async (row, rowNumber) => {
let new_row = row.values
// console.log(new_row);
let no_permohonan= new_row[2]
let sumber_data = new_row[3]
let tgl_permohonan = new_row[4]
let header = {
no_permohonan: no_permohonan,
sumber_data : sumber_data ,
tgl_permohonan : tgl_permohonan ,
};
arrIdPembatalan.push(header)
}
})
}
我想自动设置 header 而不必像上面的语法那样再次设置 header 。
我看过 xlsx
库中编写的,而我正在使用 exceljs
.
这里是一个nodejs实现。
(async function() {
const excel = require('exceljs');
const workbook = new excel.Workbook();
// use readFile for testing purpose
// await workbook.xlsx.load(objDescExcel.buffer);
await workbook.xlsx.readFile(process.argv[2]);
let jsonData = [];
workbook.worksheets.forEach(function(sheet) {
// read first row as data keys
let firstRow = sheet.getRow(1);
if (!firstRow.cellCount) return;
let keys = firstRow.values;
sheet.eachRow((row, rowNumber) => {
if (rowNumber == 1) return;
let values = row.values
let obj = {};
for (let i = 1; i < keys.length; i ++) {
obj[keys[i]] = values[i];
}
jsonData.push(obj);
})
});
console.log(jsonData);
})();
测试结果
$ node ./test.js ~/Documents/Book1.xlsx
[
{
no_pemohonan: 'PNL-202109200826210023105',
sumber_data: 'HOSTS',
tgl_permohonan: 2021-09-20T00:00:00.000Z
},
{
no_pemohonan: 'PNL-202109200845131363376',
sumber_data: 'HOSTS',
tgl_permohonan: 2021-09-20T00:00:00.000Z
}
]
var Excel = require('exceljs');
var ReadExcelCSV = function (fileType, fileName, filePath, delimeter, textQualifier) {
var deffered = q.defer();
var workbook = new Excel.Workbook();
var finalFilePath = filePath + fileName;
if (fileType == "excel") {
console.log("File Type: Excel");
workbook.xlsx.readFile(finalFilePath).then(function () {
ParseExcelCSVFile(workbook).then(function (resp) {
deffered.resolve(resp);
}, function (error) {
logger.info("Error in Parsing Excel/CSV");
});
}, function (err) {
logger.info("Error In Read Excel: " + JSON.stringify(err));
});
} else {
if (delimeter != undefined && textQualifier != undefined) {
var options = {};
options.delimiter = delimeter;
options.quote = textQualifier;
options.dateFormats = [];
workbook.csv.readFile(finalFilePath, options).then(function () {
ParseExcelCSVFile(workbook).then(function (resp) {
// fs.unlink(finalFilePath);
deffered.resolve(resp);
}, function (error) {
logger.info("Error in Parsing Excel/CSV");
deffered.reject(error);
});
}, function (error) {
logger.info("Error In Read CSV: " + JSON.stringify(error));
deffered.reject(error);
});
} else {
workbook.csv.readFile(finalFilePath).then(function () {
ParseExcelCSVFile(workbook).then(function (resp) {
deffered.resolve(resp);
}, function (error) {
logger.info("Error in Parsing Excel/CSV");
deffered.reject(error);
});
}, function (error) {
logger.info("Error In Read CSV: " + JSON.stringify(error));
deffered.reject(error);
});
}
}
return deffered.promise;
};
var ParseExcelCSVFile = function (workbook) {
try {
var deffered = q.defer();
var objresult = [];
var objheaders = [];
var worksheet = workbook.getWorksheet(1);
worksheet.eachRow(function (row, rowNumber) {
var currentobj = {};
row.eachCell({
includeEmpty: true
}, function (cell, colNumber) {
if (rowNumber == 1) {
objheaders.push(cell.value);
} else {
currentobj[objheaders[colNumber - 1]] = cell.value == null ? '' : cell.value;
}
});
if (rowNumber != 1) {
objresult.push(currentobj);
}
});
deffered.resolve(objresult);
return deffered.promise;
} catch (ex) {
logger.error("Error in ParseExcel: " + ex.stack);
}
};
这段代码是我很久以前写的,所以你会看到一个像 deffered 这样的旧模块,你可以很容易地改变它,但它会帮助你实现你想要实现的目标。它可以读取和解析 excel 和 csv。
如果处理大文件,我会使用以下库探索流处理:
- 使用
exceljs
将.xlsx
文件作为流读取并写入.csv
作为流:
// read from a stream
const readStream = fs.createReadStream('largeWorkbook.xlsx');
const workbook = new Excel.Workbook();
await workbook.xlsx.read(readStream);
// write to stream
const writeStream = fs.createWriteStream('largeWorksheet.csv');
await workbook.csv.write(writeStream, { sheetName: 'Page name' });
- 然后使用
csvtojson
将CSV转换为JSON:
import csvToJson from 'csvtojson'
const readStream = fs.createReadStream('largeWorksheet.csv')
const writeStream = fs.createWriteStream('largeWorksheet.json')
readStream
.pipe(csvToJson())
.pipe(writeStream)
即使在内存不足的硬件上,这也适用于大文件。
完整代码片段:
import fs from 'fs'
import Excel from 'exceljs'
import csvToJson from 'csvtojson'
const xlsxRead = fs.createReadStream('largeWorkbook.xlsx')
const csvWrite = fs.createWriteStream('largeWorksheet.csv')
const csvRead = () => fs.createReadStream('largeWorksheet.csv')
const jsonWrite = fs.createWriteStream('largeWorksheet.json')
(async function process() {
const workbook = new Excel.Workbook()
await workbook.xlsx.read(xlsxRead)
await workbook.csv.write(csvWrite, { sheetName: 'Worksheet Name' })
csvRead()
.pipe(csvToJson())
.pipe(jsonWrite)
})() // this immediately-invoked wrapper function is just for Node.js runtimes
// that don't support top-level await yet
// if running via `--esm` or from `.mjs` file, it can be ommitted