使用 PDFKit 在 S3 中即时存储 PDF

Using PDFKit to store a PDF in S3 on the fly

我正在尝试创建一些图像的 pdf(datauri 格式)是 nodejs 并将 pdf 存储在我的 S3 中。函数的return需要提供文件的s3URL

我在这里使用 parse-server 作为服务器,使用 node-canvas 创建图像的 canvas,然后使用 PDFKit 从 canvas 元素创建 pdf。 (jsPdf 没有成功)。现在我希望使用 AWS-SDK 将此 pdf 发送到我的 s3,最后 return 文件的 URL。下面是我的代码,一直有效到 canvas 代。我不知道是否首先创建了 pdf,甚至在发送到 s3 之前。哦!整个事情在 heroku 上 运行。

Parse.Cloud.define('getBulkMeta',async (req)=>{
    const PDFDocument = require('pdfkit'),
        {Canvas,loadImage} = require('canvas');

        try {       
            let baseImg = await loadImage('data:image/png;base64,'+req.params.labels[0]);
            let labels = req.params.labels,
                allCanvas = [],
                rowH = baseImg.naturalHeight,
                rowW = baseImg.naturalWidth,
                perpage = req.params.size[1],
                pages = Math.ceil(labels.length/perpage),
                imgInd = 0,
                g = 10;
                size = req.params.size[0];

            for(var p=0;p<pages;p++){
                let canvas = new Canvas(rowW*((size=='A4')?2:1),rowH*((size=='A4')?2:1)),
                    ctx = canvas.getContext("2d");

                ctx.beginPath();
                ctx.rect(0,0,canvas.width,canvas.height)
                ctx.fillStyle = "#fff";
                ctx.fill();

                if(perpage == 1){
                    let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
                    ctx.drawImage(img,g,g,rowW-(2*g),rowH-(2*g));
                } else {
                    var thisImgInd = 0;
                    for (var r=0;r<2;r++){
                        for(var c=0;c<2;c++){
                            let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
                            ctx.drawImage(img,g+(c*(rowW-g/2)),g+(r*(rowH-g/2)),rowW-(1.5*g),rowH-(1.5*g));
                            thisImgInd++
                            if(thisImgInd>=perpage||imgInd>=labels.length){break;}
                        }
                        if(thisImgInd>=perpage||imgInd>=labels.length){break;}
                    }
                }
                allCanvas.push(canvas)
            }
        var thisPDF = new PDFDocument({layout: 'landscape',size:size});
        var bcoded;
        thisPDF.pipe(()=>{bcoded = new Buffer.from(thisPDF).toString('base64')});
        allCanvas.forEach((c,i)=>{
            if(i){thisPDF.addPage();}
            thisPDF.image(c.toDataURL(),0,0,thisPDF.page.width,thisPDF.page.width);
        })
        thisPDF.end();
        const S3_BUCKET = process.env.S3_BUCKET;
        aws.config.region = process.env.AWS_REGION;
        aws.config.signatureVersion  = 'v4';

        let s3 = new aws.S3();
        let fileName = req.params.name;
        let s3Params = {
            Bucket: S3_BUCKET,
            Body: bcoded,
            Key: fileName,
            ContentType : 'application/pdf',
            ACL: 'public-read'
        };
        s3.putObject(s3Params, (err, data) => {
            if(err){
                console.log('\n\n\n\n\n\n\n'+err+'\n\n\n\n\n\n\n');
                throw 'Error: '+ (err);
            }
            let returnData = {
                signedRequest: data,
                url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}`
            };
            return (returnData);
        })
    } catch (e) {throw e;}
})

更新。我已经使用以下代码将 pdf 文件保存在 s3 中:

    Parse.Cloud.define('getBulkMeta',async (req)=>{
    const PDFDocument = require('pdfkit'),
        {Canvas,loadImage} = require('canvas');

        try {       
            let baseImg = await loadImage('data:image/png;base64,'+req.params.labels[0]);
            let labels = req.params.labels,
                allCanvas = [],
                rowH = baseImg.naturalHeight,
                rowW = baseImg.naturalWidth,
                perpage = req.params.size[1],
                pages = Math.ceil(labels.length/perpage),
                imgInd = 0,
                g = 10;
                size = req.params.size[0];

            for(var p=0;p<pages;p++){
                let canvas = new Canvas(),
                    ctx = canvas.getContext("2d");

                canvas.height = rowH*((size=='A4')?2:1);
                canvas.width = rowW*((size=='A4')?2:1);

                ctx.beginPath();
                ctx.rect(0,0,canvas.width,canvas.height)
                ctx.fillStyle = "#fff";
                ctx.fill();

                if(perpage == 1){
                    let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
                    ctx.drawImage(img,g,g,rowW-(2*g),rowH-(2*g));
                } else {
                    var thisImgInd = 0;
                    for (var r=0;r<2;r++){
                        for(var c=0;c<2;c++){
                            let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
                            ctx.drawImage(img,g+(c*(rowW-g/2)),g+(r*(rowH-g/2)),rowW-(1.5*g),rowH-(1.5*g));
                            thisImgInd++
                            if(thisImgInd>=perpage||imgInd>=labels.length){break;}
                        }
                        if(thisImgInd>=perpage||imgInd>=labels.length){break;}
                    }
                }
                allCanvas.push(canvas)
            }
        var thisPDF = new PDFDocument({layout: 'landscape',size:size});
        let buffers = [],pdfData,returnData='Hi';
        thisPDF.on('data', buffers.push.bind(buffers));
        thisPDF.on('end',() => {
            pdfData = Buffer.concat(buffers);
            const S3_BUCKET = process.env.S3_BUCKET;
            aws.config.region = process.env.AWS_REGION;
            aws.config.signatureVersion  = 'v4';

            let s3 = new aws.S3();
            let fileName = req.params.name;
            let s3Params = {
                Bucket: S3_BUCKET,
                Body: pdfData,
                Key: (+new Date())+'-'+fileName,
                ContentType : 'application/pdf',
                ACL: 'public-read'
            };
            s3.putObject(s3Params,(err, data) => {
                delete pdfData,thisPDF;
                pdfData = null;thisPDF = null;
                if(err){ throw 'Error: '+ (err); }
                returnData = { signedRequest: data, url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}` };
            })
        })
        allCanvas.forEach((c,i)=>{
            if(i){thisPDF.addPage();}
            thisPDF.image(c.toDataURL(),0,0,{fit:[thisPDF.page.width,thisPDF.page.height]});
        })
        thisPDF.end();
        return returnData;
    } catch (e) {throw e;}
})

但是,returnData 总是给出 "Hi" 作为输出,而且函数似乎也没有关闭 - Heroku 每次都会抛出内存超出错误。

由于您使用异步函数创建 PDF 并将其发送到 S3,因此您的云函数会在这些操作实际完成之前返回。这就是为什么您的 returnData 变量中总是有 Hi 的原因。您需要创建一个承诺并等待这个承诺在这两个操作结束时完成。它应该是这样的:

await (new Promise((resolve, reject) => {
  var thisPDF = new PDFDocument({layout: 'landscape',size:size});
  let buffers = [];
  thisPDF.on('data', buffers.push.bind(buffers));
  thisPDF.on('end',() => {
    pdfData = Buffer.concat(buffers);
    const S3_BUCKET = process.env.S3_BUCKET;
    aws.config.region = process.env.AWS_REGION;
    aws.config.signatureVersion  = 'v4';

    let s3 = new aws.S3();
    let fileName = req.params.name;
    let s3Params = {
      Bucket: S3_BUCKET,
      Body: pdfData,
      Key: (+new Date())+'-'+fileName,
      ContentType : 'application/pdf',
      ACL: 'public-read'
    };
    s3.putObject(s3Params,(err, data) => {
      delete pdfData,thisPDF;
      pdfData = null;thisPDF = null;
      if(err){ reject(err); }
      returnData = { signedRequest: data, url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}` };
      resolve();
    })
  })
});

顺便说一句,您可以使用 Parse S3 Adapter 并将 PDF 保存为常规 Parse 文件,而不是使用 AWS SDK。