不保存到内存的Node js Stream文件

Node js Stream file without saving to memory

我正在构建一个需要接受文件上传的 API。所以用户可以 POST 一个文件到一个端点,该文件将被发送到病毒扫描,然后如果它是干净的将被发送到存储(可能是 S3)。到目前为止,我已经通过一个问题实现了这一点:文件临时保存在应用程序文件系统中。我需要设计一个不在内存中存储内容的应用程序。这是我目前的工作代码:

app.js

const express = require('express');
const bb = require('express-busboy');

const app = express();

// Busboy modules extends the express app to handle incoming files
bb.extend(app, {
    upload: true,
    path: './tmp'
});

Routes.js

const express = require('express');
const router = express.Router();
const fileManagementService = require('./file-management-service')();

router
.route('/:fileId')
.post(async (req, res, next) => {
    try {
        const {fileId} = req.params;
        const {files} = req;
        const response = await fileManagementService.postFile(files, fileId);

        res.status(201).json(response);
    } catch (err) {
        next(err);
    }
})

文件管理-service.js

const fs = require('fs');

function createUploader() {
    // POST /:fileId
    async function postFile(data, fileId) {
        const {file} = data.file;
        const fileStream = fs.createReadStream(file);
        const scanOutput = await scanFile(fileStream); // Function scans file for viruses
        const status = scanOutput.status === 'OK';
        let upload = 'NOT UPLOADED';
        if (status) {
            upload = await postS3Object({file}); // Some function that sends the file to S3 or other storage
        }
        fs.unlinkSync(file);
        return {
            fileId,
            scanned: scanOutput,
            upload 
        };
    }

    return Object.freeze({
        postFile
    });
}

module.exports = createUploader;

如前所述,上述工作按预期进行,文件被发送以供扫描,然后发送到 S3 存储桶,然后再将响应返回给发帖人。然而,我的 express-busboy 实现是将文件存储在 ./tmp 文件夹中,然后我在将其发送到 AV 之前使用 fs.createReadStream(filePath); 将其转换为可读流,并再次在发送文件到 S3。

此 API 托管在 kubernetes 集群中,我需要避免创建状态。如何在不实际保存文件的情况下实现上述目标?我猜 busboy 以某种流的形式接收此文件,所以听起来并不密集,它能否不只是保持流并通过这些函数进行管道传输以实现相同的结果?

您可以在较低级别使用 busboy 并访问它的翻译读取流。这是来自 the busboy doc 的示例,可以根据您的情况进行调整:

http.createServer(function(req, res) {
  if (req.method === 'POST') {
    var busboy = new Busboy({ headers: req.headers });
    busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
      var saveTo = path.join(os.tmpDir(), path.basename(fieldname));
      file.pipe(fs.createWriteStream(saveTo));
    });
    busboy.on('finish', function() {
      res.writeHead(200, { 'Connection': 'close' });
      res.end("That's all folks!");
    });
    return req.pipe(busboy);
  }
  res.writeHead(404);
  res.end();
}).listen(8000, function() {
  console.log('Listening for requests');
});

关键部分是我注释的:

    // create a new busboy instance on each incoming request that has files with it
    var busboy = new Busboy({ headers: req.headers });

    // register for the file event
    busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
      // at this point the file argument is a readstream for the data of an uploaded file
      // you can do whatever you want with this readstream such as
      // feed it directly to your anti-virus 

      // this example code saves it to a tempfile
      // you would replace this with code that sends the stream to your anti-virus
      var saveTo = path.join(os.tmpDir(), path.basename(fieldname));
      file.pipe(fs.createWriteStream(saveTo));
    });

    // this recognizes the end of the upload stream and sends 
    // whatever you want the final http response to be
    busboy.on('finish', function() {
      res.writeHead(200, { 'Connection': 'close' });
      res.end("That's all folks!");
    });

    // this gets busboy started, feeding the incoming request to busboy
    // so it can start reading it and parsing it and will eventually trigger
    // one or more "file" events
    return req.pipe(busboy);

当您确定要在其中执行此自定义 busboy 操作的传入请求时,您可以创建一个 Busboy 实例,将其传递给 headers 并注册 file 事件。该文件事件为您提供了一个新的 file 读取流,即转换后的文件作为读取流。然后,您可以将该流直接通过管道传输到您的 anti-virus,而无需通过文件系统。