在 Grunt 构建中验证 CSV 文件

validate CSV files in Grunt build

如何在我的 Grunt 构建中验证 CSV 文件(编码、标题、分隔符、列数)?我看过 CSVLint 但既没有让它工作,也不知道如何将它包含在 Grunt 中。

编辑:PapaParse 看起来很有前途,但也没有 Grunt 集成。

尽管 Gruntfile.js.

PapaParse does not exist its API can be utilized by configuring a custom Function Task 的 g运行t 集成

通过 npm 安装 papaparse

首先,cd 到您的项目目录,通过 npm 安装 papaparse 并将其添加到项目 package.jsondevDependencies 部分。为此 运行 通过 CLI 工具执行以下命令:

$ npm i -D papaparse


Gruntfile.js

以下要点展示了如何在 Gruntfile.js.

中配置名为 validateCSV 的自定义函数任务
module.exports = function(grunt) {

    // Requirements
    var fs = require('fs');
    var Papa = require('papaparse');

    // Other project configuration tasks.
    grunt.initConfig({
        // ...
    });

    /**
     * Register a custom Function task to validate .csv files using Papa Parse.
     */
    grunt.registerTask('validateCSV', 'Lint .csv files via Papa Parse', function() {

        var glob = './csv/*.csv'; // <-- Note: Edit glob pattern as required.

        var success = true;

        // Create an Array of all .csv files using the glob pattern provided.
        var csvFiles = grunt.file.expand(glob).map(function(file) {
            return file;
        });

        // Report if no .csv files were found and return early.
        if (csvFiles.length === 0) {
            grunt.log.write('No .csv files were found');
            return;
        }

        // Loop over each .csv file in the csvFiles Array.
        csvFiles.forEach(function(csvFile) {

            // Read the contents of the .csv file.
            var csvString = fs.readFileSync(csvFile, {
                encoding: 'utf8'
            });

            // Parse the .csv contents via Papa Parse.
            var papa = Papa.parse(csvString, {
                delimiter: ',',
                newline: '',
                quoteChar: '"',
                header: true,
                skipEmptyLines: true

                // For additional config options visit:
                // http://papaparse.com/docs#config
            });

            // Basic error and success logging.
            if (papa.errors.length > 0) {
                grunt.log.error('Error(s) in file: '['red'] + csvFile['red']);

                // Report each error for a single .csv file.
                // For additional Papa Parse errors visit:
                // http://papaparse.com/docs#errors
                papa.errors.forEach(function(error) {
                    grunt.log.write('\n   type: ' + error.type);
                    grunt.log.write('\n   code: ' + error.code);
                    grunt.log.write('\n   message: ' + error.message);
                    grunt.log.write('\n   row: ' + error.row + '\n\n');
                });

                // Indicate that a .csv file failed validation.
                success = false;

            } else {
                grunt.log.ok('No errors found in file: ' + csvFile);
            }
        });

        // If errors are found in any of the .csv files this will
        // prevent subsequent defined tasks from being processed.
        if (!success) {
            grunt.fail.warn('Errors(s) were found when validating .csv files');
        }
    });

    // Register the custom Function task.
    grunt.registerTask('default', [
        'validateCSV'
        // ...
    ]);

};

注释

下面这行代码(取自上面的Gruntfile.js)是这样写的:

var glob = './csv/*.csv';

...将需要 changed/edited 根据您的项目要求。目前 globbing pattern 假定所有 .csv 文件都位于名为 csv.

的文件夹中

可能还需要根据您的要求设置config选项。

自定义函数任务还包括一些将记录到 CLI 中的基本错误和成功报告。


运行 任务

要运行 g运行t 任务只需通过您的 CLI 工具执行以下命令:

$ grunt validateCSV


编辑:更新答案 (基于以下评论...)

Would it also be possible to "configure" the task from within the grunt.initConfig()? For example linting different CSV directories?

为此,您可以创建一个单独的 Javascript 模块来导出 Registered MutliTask.

我们将其命名为 papaparse.js 并将其保存到名为 custom-grunt-tasks 的目录中,该目录与 Gruntfile.js

位于同一顶级目录中

注意:此.js文件和目录名称可以是您喜欢的任何名称,但是您需要更新Gruntfile.js.中的引用

papaparse.js

module.exports = function(grunt) {

    'use strict';

    // Requirements
    var fs = require('fs');
    var Papa = require('papaparse');

    grunt.registerMultiTask('papaparse', 'Misc Tasks', function() {

        // Default options. These are used when no options are
        // provided via the  initConfig({...}) papaparse task.
        var options = this.options({
            quotes: false,
            delimiter: ',',
            newline: '',
            quoteChar: '"',
            header: true,
            skipEmptyLines: true
        });

        // Loop over each path provided via the src array.
        this.data.src.forEach(function(dir) {

            // Append a forward slash If a directory path
            // provided does not end in with one.
            if (dir.slice(-1) !== '/') {
                dir += '/';
            }

            // Generate the globbin pattern.
            var glob = [dir, '*.csv'].join('');

            // Create an Array of all .csv files using the glob pattern.
            var csvFiles = grunt.file.expand(glob).map(function(file) {
                return file;
            });

            // Report if no .csv files were found and return early.
            if (csvFiles.length === 0) {
                grunt.log.write(
                    '>> No .csv files found using the globbing '['yellow'] +
                    'pattern: '['yellow'] + glob['yellow']
                );
                return;
            }

            // Loop over each .csv file in the csvFiles Array.
            csvFiles.forEach(function(csvFile) {

                var success = true;

                // Read the contents of the .csv file.
                var csvString = fs.readFileSync(csvFile, {
                    encoding: 'utf8'
                });

                // Parse the .csv contents via Papa Parse.
                var papa = Papa.parse(csvString, options);

                // Basic error and success logging.
                if (papa.errors.length > 0) {
                    grunt.log.error('Error(s) in file: '['red'] + csvFile['red']);

                    // Report each error for a single .csv file.
                    // For additional Papa Parse errors visit:
                    // http://papaparse.com/docs#errors
                    papa.errors.forEach(function(error) {
                        grunt.log.write('\n   type: ' + error.type);
                        grunt.log.write('\n   code: ' + error.code);
                        grunt.log.write('\n   message: ' + error.message);
                        grunt.log.write('\n   row: ' + error.row + '\n\n');
                    });

                    // Indicate that a .csv file failed validation.
                    success = false;

                } else {
                    grunt.log.ok('No errors found in file: ' + csvFile);
                }

                // If errors are found in any of the .csv files this will prevent
                // subsequent files and defined tasks from being processed.
                if (!success) {
                    grunt.fail.warn('Errors(s) found when validating .csv files');
                }
            });

        });
    });
};

Gruntfile.js

您的 Gruntfile.js 可以这样配置:

module.exports = function(grunt) {

    grunt.initConfig({
        // ...
        papaparse: {
            setOne: {
                src: ['./csv/', './csv2']
            },
            setTwo: {
                src: ['./csv3/'],
                options: {
                    skipEmptyLines: false
                }
            }
        }

    });

    // Load the custom multiTask named `papaparse` - which is defined in
    // `papaparse.js` stored in the directory named `custom-grunt-tasks`.
    grunt.loadTasks('./custom-grunt-tasks');

    // Register and add papaparse to the default Task.
    grunt.registerTask('default', [
        'papaparse' // <-- This runs Targets named setOne and setTwo
        // ...
    ]);

    // `papaparse.js` allows for multiple targets to be defined, so
    // you can use the colon notation to just run one Target.
    // The following only runs the setTwo Target.
    grunt.registerTask('processOneTarget', [
        'papaparse:setTwo'
        // ...
    ]);

};

运行 任务

papaparse 任务已添加到 default 任务的 taskList 数组中,因此可以通过 CLI 工具输入以下命令来执行它:

$g运行t

备注

  1. 运行 通过 CLI 输入 $ grunt 的示例要点将处理名为 csv、[=39 的目录中的所有 .csv 文件=], 和 csv3.

  2. 运行 $ grunt processOneTarget 通过 CLI 将仅处理名为 csv3.

    [=173 的目录中的 .csv 个文件=]
  3. 由于 papaparse.js 使用 MultiTask,您会注意到在 Gruntfile.js 中定义的 papaparse 任务中,它包含两个目标。即setOnesetTwo.

  4. setOne 目标src 数组定义了两个应该处理的目录的路径。 IE。目录 ./csv/./csv2。在这些路径中找到的所有 .csv 文件将使用 papaparse.js 中定义的默认 papaparse 选项进行处理,因为目标未定义任何自定义 options.

  5. setTwo 目标src 数组定义一个目录的路径。 (即 ./csv3/)。在此路径中找到的所有 .csv 文件将使用 papaparse.js 中定义的默认 papaparse 选项进行处理,但 skipEmptyLines 选项除外,因为它设置为 false .

  6. 您可能会发现,只需在Gruntfile.js中定义一个Target,在src数组中定义多个路径,无需任何自定义选项即可满足您的要求。例如:

// ...
    grunt.initConfig({
        // ...
        papaparse: {
            myTask: {
                src: ['./csv/', './csv2', './csv3']
            }
        }
        // ...
    });
// ...

希望对您有所帮助!