解析 excel 文件并在 exceljs ON nodejs 中创建 JSON 格式

Parse excel file and create JSON format in exceljs ON nodejs

我有这个 excel 文件

我需要将数据 excel 从文件转换为 JSON,如下所示

[
   {
   "no_pemohonan": "PNL-202109200826210023105",
   "sumber_data": "HOSTS",
   "tgl_permohonan": "2021-09-20",
   },
   {
   "no_pemohonan": "PNL-202109200845131363376",
   "sumber_data": "HOSTS",
   "tgl_permohonan": "2021-09-20",
   },
   ...
]

我可以用这条评论制作数据,但我必须像下面的语法一样再次设置 object

            const excel = require('exceljs');
            const workbook = new excel.Workbook();


            await workbook.xlsx.load(objDescExcel.buffer);
            let json = workbook.model;
            let worksheetsArr = json.worksheets.length;
            const arrRow = [];
            const arrIdPembatalan = [];

            // looping per worksheet
            for (let index = 0; index < worksheetsArr; index++) {
                let worksheet = workbook.worksheets[index];
                // console.log("worksheet " + worksheet);
                const rowlast = worksheet.lastRow;
                // looping semua row untuk
                worksheet.eachRow(async (row, rowNumber) => {
                    let new_row = row.values
                    // console.log(new_row);
                        let no_permohonan= new_row[2]
                        let sumber_data = new_row[3]
                        let tgl_permohonan = new_row[4]
                        
                        let header = {
                            no_permohonan: no_permohonan,
                            sumber_data : sumber_data ,
                            tgl_permohonan : tgl_permohonan ,
                        };
                        arrIdPembatalan.push(header)
                    }
                })
        }

我想自动设置 header 而不必像上面的语法那样再次设置 header 。 我看过 解决方案,但它是在 xlsx 库中编写的,而我正在使用 exceljs.

这里是一个nodejs实现。

(async function() {
    const excel = require('exceljs');
    const workbook = new excel.Workbook();
    // use readFile for testing purpose
    // await workbook.xlsx.load(objDescExcel.buffer);
    await workbook.xlsx.readFile(process.argv[2]);
    let jsonData = [];
    workbook.worksheets.forEach(function(sheet) {
        // read first row as data keys
        let firstRow = sheet.getRow(1);
        if (!firstRow.cellCount) return;
        let keys = firstRow.values;
        sheet.eachRow((row, rowNumber) => {
            if (rowNumber == 1) return;
            let values = row.values
            let obj = {};
            for (let i = 1; i < keys.length; i ++) {
                obj[keys[i]] = values[i];
            }
            jsonData.push(obj);
        })

    });
    console.log(jsonData);
})();

测试结果

$ node ./test.js ~/Documents/Book1.xlsx
[
  {
    no_pemohonan: 'PNL-202109200826210023105',
    sumber_data: 'HOSTS',
    tgl_permohonan: 2021-09-20T00:00:00.000Z
  },
  {
    no_pemohonan: 'PNL-202109200845131363376',
    sumber_data: 'HOSTS',
    tgl_permohonan: 2021-09-20T00:00:00.000Z
  }
]
var Excel = require('exceljs');

var ReadExcelCSV = function (fileType, fileName, filePath, delimeter, textQualifier) {

var deffered = q.defer();
var workbook = new Excel.Workbook();
var finalFilePath = filePath + fileName;

if (fileType == "excel") {
    console.log("File Type: Excel");
    workbook.xlsx.readFile(finalFilePath).then(function () {
        ParseExcelCSVFile(workbook).then(function (resp) {
            deffered.resolve(resp);
        }, function (error) {
            logger.info("Error in Parsing Excel/CSV");
        });
    }, function (err) {
        logger.info("Error In Read Excel: " + JSON.stringify(err));
    });
} else {
    if (delimeter != undefined && textQualifier != undefined) {
        var options = {};
        options.delimiter = delimeter;
        options.quote = textQualifier;
        options.dateFormats = [];

        workbook.csv.readFile(finalFilePath, options).then(function () {
            ParseExcelCSVFile(workbook).then(function (resp) {
                // fs.unlink(finalFilePath);
                deffered.resolve(resp);
            }, function (error) {
                logger.info("Error in Parsing Excel/CSV");
                deffered.reject(error);
            });
        }, function (error) {
            logger.info("Error In Read CSV: " + JSON.stringify(error));
            deffered.reject(error);
        });
    } else {
        workbook.csv.readFile(finalFilePath).then(function () {
            ParseExcelCSVFile(workbook).then(function (resp) {
                deffered.resolve(resp);
            }, function (error) {
                logger.info("Error in Parsing Excel/CSV");
                deffered.reject(error);
            });
        }, function (error) {
            logger.info("Error In Read CSV: " + JSON.stringify(error));
            deffered.reject(error);
        });
    }
}
return deffered.promise;
};

var ParseExcelCSVFile = function (workbook) {
try {
    var deffered = q.defer();

    var objresult = [];
    var objheaders = [];

    var worksheet = workbook.getWorksheet(1);
    worksheet.eachRow(function (row, rowNumber) {
        var currentobj = {};
        row.eachCell({
            includeEmpty: true
        }, function (cell, colNumber) {
            if (rowNumber == 1) {
                objheaders.push(cell.value);
            } else {
                currentobj[objheaders[colNumber - 1]] = cell.value == null ? '' : cell.value;
            }
        });
        if (rowNumber != 1) {
            objresult.push(currentobj);
        }
    });
    deffered.resolve(objresult);
    return deffered.promise;
} catch (ex) {
    logger.error("Error in ParseExcel: " + ex.stack);
}
};

这段代码是我很久以前写的,所以你会看到一个像 deffered 这样的旧模块,你可以很容易地改变它,但它会帮助你实现你想要实现的目标。它可以读取和解析 excel 和 csv。

如果处理大文件,我会使用以下库探索流处理:

  1. 使用exceljs.xlsx文件作为流读取并写入.csv作为流:
// read from a stream
const readStream = fs.createReadStream('largeWorkbook.xlsx');
const workbook = new Excel.Workbook();
await workbook.xlsx.read(readStream);

// write to stream
const writeStream = fs.createWriteStream('largeWorksheet.csv');
await workbook.csv.write(writeStream, { sheetName: 'Page name' });
  1. 然后使用csvtojson将CSV转换为JSON:
import csvToJson from 'csvtojson'

const readStream = fs.createReadStream('largeWorksheet.csv')
const writeStream = fs.createWriteStream('largeWorksheet.json')

readStream
  .pipe(csvToJson())
  .pipe(writeStream)

即使在内存不足的硬件上,这也适用于大文件。

完整代码片段:

import fs from 'fs'
import Excel from 'exceljs'
import csvToJson from 'csvtojson'

const xlsxRead = fs.createReadStream('largeWorkbook.xlsx')
const csvWrite = fs.createWriteStream('largeWorksheet.csv')
const csvRead = () => fs.createReadStream('largeWorksheet.csv')
const jsonWrite = fs.createWriteStream('largeWorksheet.json')

(async function process() {
  const workbook = new Excel.Workbook()
  await workbook.xlsx.read(xlsxRead)
  await workbook.csv.write(csvWrite, { sheetName: 'Worksheet Name' })
  csvRead()
    .pipe(csvToJson())
    .pipe(jsonWrite)
})() // this immediately-invoked wrapper function is just for Node.js runtimes
// that don't support top-level await yet
// if running via `--esm` or from `.mjs` file, it can be ommitted