将文件从 GCP 存储桶发送到第 3 方 pdf 转换器
Send file from GCP bucket to 3rd party pdf converter
我正在尝试改编 Qwiklabs 教程以使用 pdfCrowd,而不是 LibreOffice。
该服务的工作原理是从 'uploads' 的一个 GCP 存储桶下载一个文件,对其进行处理,然后将其上传到另一个 'processed' 个文件的存储桶,然后从 'uploads'桶.
这是下载、上传、发送处理、删除的功能。这是来自 Qwiklabs tut 的代码,效果很好。
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
await downloadFile(file.bucket, file.name);
const pdfFileName = await convertFile(file.name);
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
})
原来的convertFile函数是:
async function convertFile(fileName) {
const cmd = 'libreoffice --headless --convert-to pdf --outdir /tmp ' +
`"/tmp/${fileName}"`;
console.log(cmd);
const { stdout, stderr } = await exec(cmd);
if (stderr) {
throw stderr;
}
console.log(stdout);
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
当我更改 convertFile 函数时出现问题。 LibreOffice 接受 file.name,但 pdfCrowd 需要文件路径。
所以我将 pdfCrowd 的功能更改为:
async function convertFile(fileName) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return console.error("Pdfcrowd Error: " + err);
console.log("Success: the file was created " + fileName);
});
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
现在 pdf 转换 returns 成功,但在通知说没有文件或目录在我传递给 convertFileToFile 的 'out' 文件路径中指定后。 _newPdfPath 指定的文件不存在。
Error: Error: ENOENT: no such file or directory, stat '/tmp/mynew.pdf'
Success: the file was created /tmp/hello (31).pdf
pdfCrowd 函数应该是在 tmp 目录中创建文件,但是 async 是否正在等待在 tmp 目录中创建文件?
我的完整代码是:
const {promisify} = require('util');
const {Storage} = require('@google-cloud/storage');
const exec = promisify(require('child_process').exec);
const storage = new Storage();
const express = require('express');
const bodyParser = require('body-parser');
const app = express();
const pdfcrowd = require("pdfcrowd");
app.use(bodyParser.json());
const port = process.env.PORT || 8080;
app.listen(port, () => {
console.log('Listening on port', port);
});
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
// console.log("FILE=========", file, req.body.message.data)
await downloadFile(file.bucket, file.name);
const pdfFileName = await convertFile(file.name);
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
})
function decodeBase64Json(data) {
return JSON.parse(Buffer.from(data, 'base64').toString());
}
async function downloadFile(bucketName, fileName) {
const options = {destination: `/tmp/${fileName}`};
await storage.bucket(bucketName).file(fileName).download(options);
}
async function convertFile(fileName) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return console.error("Pdfcrowd Error: " + err);
console.log("Success: the file was created " + fileName);
});
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
async function deleteFile(bucketName, fileName) {
await storage.bucket(bucketName).file(fileName).delete();
}
async function uploadFile(bucketName, fileName) {
await storage.bucket(bucketName).upload(`/tmp/${fileName}`);
}
问题是您的 convertFile 函数在调用 convertFileToFile 回调之前完成。
我会将成功和错误的回调传递给 convertFile,例如
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
// console.log("FILE=========", file, req.body.message.data)
await downloadFile(file.bucket, file.name);
let on_pdf_done = async function(pdfFileName) {
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
};
let on_pdf_fail = function() {
res.set('Content-Type', 'text/plain');
res.send('\n\nERROR\n\n');
};
convertFile(file.name, on_pdf_done, on_pdf_fail);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
})
function convertFile(fileName, success_callback, fail_callback) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return fail_callback();
success_callback(fileName.replace(/\.\w+$/, '.pdf'));
});
}
我正在尝试改编 Qwiklabs 教程以使用 pdfCrowd,而不是 LibreOffice。
该服务的工作原理是从 'uploads' 的一个 GCP 存储桶下载一个文件,对其进行处理,然后将其上传到另一个 'processed' 个文件的存储桶,然后从 'uploads'桶.
这是下载、上传、发送处理、删除的功能。这是来自 Qwiklabs tut 的代码,效果很好。
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
await downloadFile(file.bucket, file.name);
const pdfFileName = await convertFile(file.name);
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
})
原来的convertFile函数是:
async function convertFile(fileName) {
const cmd = 'libreoffice --headless --convert-to pdf --outdir /tmp ' +
`"/tmp/${fileName}"`;
console.log(cmd);
const { stdout, stderr } = await exec(cmd);
if (stderr) {
throw stderr;
}
console.log(stdout);
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
当我更改 convertFile 函数时出现问题。 LibreOffice 接受 file.name,但 pdfCrowd 需要文件路径。
所以我将 pdfCrowd 的功能更改为:
async function convertFile(fileName) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return console.error("Pdfcrowd Error: " + err);
console.log("Success: the file was created " + fileName);
});
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
现在 pdf 转换 returns 成功,但在通知说没有文件或目录在我传递给 convertFileToFile 的 'out' 文件路径中指定后。 _newPdfPath 指定的文件不存在。
Error: Error: ENOENT: no such file or directory, stat '/tmp/mynew.pdf'
Success: the file was created /tmp/hello (31).pdf
pdfCrowd 函数应该是在 tmp 目录中创建文件,但是 async 是否正在等待在 tmp 目录中创建文件?
我的完整代码是:
const {promisify} = require('util');
const {Storage} = require('@google-cloud/storage');
const exec = promisify(require('child_process').exec);
const storage = new Storage();
const express = require('express');
const bodyParser = require('body-parser');
const app = express();
const pdfcrowd = require("pdfcrowd");
app.use(bodyParser.json());
const port = process.env.PORT || 8080;
app.listen(port, () => {
console.log('Listening on port', port);
});
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
// console.log("FILE=========", file, req.body.message.data)
await downloadFile(file.bucket, file.name);
const pdfFileName = await convertFile(file.name);
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
})
function decodeBase64Json(data) {
return JSON.parse(Buffer.from(data, 'base64').toString());
}
async function downloadFile(bucketName, fileName) {
const options = {destination: `/tmp/${fileName}`};
await storage.bucket(bucketName).file(fileName).download(options);
}
async function convertFile(fileName) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return console.error("Pdfcrowd Error: " + err);
console.log("Success: the file was created " + fileName);
});
pdfFileName = fileName.replace(/\.\w+$/, '.pdf');
return pdfFileName;
}
async function deleteFile(bucketName, fileName) {
await storage.bucket(bucketName).file(fileName).delete();
}
async function uploadFile(bucketName, fileName) {
await storage.bucket(bucketName).upload(`/tmp/${fileName}`);
}
问题是您的 convertFile 函数在调用 convertFileToFile 回调之前完成。
我会将成功和错误的回调传递给 convertFile,例如
app.post('/', async (req, res) => {
try {
const file = decodeBase64Json(req.body.message.data);
// console.log("FILE=========", file, req.body.message.data)
await downloadFile(file.bucket, file.name);
let on_pdf_done = async function(pdfFileName) {
await uploadFile(process.env.PDF_BUCKET, pdfFileName);
await deleteFile(file.bucket, file.name);
res.set('Content-Type', 'text/plain');
res.send('\n\nOK\n\n');
};
let on_pdf_fail = function() {
res.set('Content-Type', 'text/plain');
res.send('\n\nERROR\n\n');
};
convertFile(file.name, on_pdf_done, on_pdf_fail);
}
catch (ex) {
console.log(`Error: ${ex}`);
}
})
function convertFile(fileName, success_callback, fail_callback) {
// create the API client instance
const _newPdfPath = `/tmp/${fileName.replace(/\.\w+$/, '.pdf')}`
const client = new pdfcrowd.HtmlToPdfClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");
// run the conversion and write the result to a file
client.convertFileToFile(`/tmp/${fileName}`, _newPdfPath, function (err, fileName) {
if (err)
return fail_callback();
success_callback(fileName.replace(/\.\w+$/, '.pdf'));
});
}