"InvalidParameterType" 从外部源作为 blob 发送到 AWS Textract 的图像文件出错
"InvalidParameterType" error for image files sent as blob to AWS Textract from external source
目前
我正在尝试让 AWS Textract 处理从 Google 脚本中的函数提供的图像,这些图像被发送到已解析的 Lambda。我正在关注 https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Textract.html#analyzeDocument-property
上的文档
我的Google脚本代码:
function googleFunction(id) {
let file = DriveApp.getFileById(id);
console.log("File is a " + file.getMimeType());
let blob = file.getBlob();
let params = {
doc: blob,
};
var options = {
method: "PUT",
"Content-Type": "application/json",
payload: JSON.stringify(params),
};
let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}
我的 Lambda resolver
代码:
"use strict";
const AWS = require("aws-sdk");
exports.handler = async (event) => {
let params = JSON.parse(event.body);
console.log("Parse as document...");
let textract = new AWS.Textract();
let doc = params["doc"];
let config = {
Document: {
Bytes: doc,
FeatureTypes: ["TABLES"],
}
};
textract.analyzeDocument(config, function (err, data) {
console.log("analyzing...");
if (err) {
console.log(err, err.stack);
}
// an error occurred
else {
console.log("data:" + JSON.stringfy(data));
} // successful response
});
};
问题
文件已成功从 Google 个脚本发送到 Lambda,但返回以下错误:
"errorType": "InvalidParameterType",
"errorMessage": "Expected params.Document.Bytes to be a string, Buffer, Stream, Blob, or typed array object"
问题
- 有没有办法验证
doc
变量的格式,以确保它符合 AWS Textract 的要求?
- 谁能看出返回错误的可能原因?
注释
- 当同一文件上传到 S3 存储并在配置中提供时,Textract 工作正常:
S3Object: { Bucket: 'bucket_name', Name: 'file_name' }
- 我已确认文件是 JPEG
通过 2 处更改使其正常工作:
- 添加
getBytes()
到 Google 边码
- 向 AWS 端代码添加
Buffer.from()
我的Google脚本代码:
function googleFunction(id) {
let file = DriveApp.getFileById(id);
console.log("File is a " + file.getMimeType());
let blob = file.getBlob().getBytes();
let params = {
doc: blob,
};
var options = {
method: "PUT",
"Content-Type": "application/json",
payload: JSON.stringify(params),
};
let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}
我的 Lambda resolver
代码:
"use strict";
const AWS = require("aws-sdk");
exports.handler = async (event) => {
let params = JSON.parse(event.body);
console.log("Parse as document...");
let textract = new AWS.Textract();
let doc = params["doc"];
let config = {
Document: {
Bytes: Buffer.from(doc),
FeatureTypes: ["TABLES"],
}
};
textract.analyzeDocument(config, function (err, data) {
console.log("analyzing...");
if (err) {
console.log(err, err.stack);
}
// an error occurred
else {
console.log("data:" + JSON.stringfy(data));
} // successful response
});
};
目前
我正在尝试让 AWS Textract 处理从 Google 脚本中的函数提供的图像,这些图像被发送到已解析的 Lambda。我正在关注 https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Textract.html#analyzeDocument-property
上的文档我的Google脚本代码:
function googleFunction(id) {
let file = DriveApp.getFileById(id);
console.log("File is a " + file.getMimeType());
let blob = file.getBlob();
let params = {
doc: blob,
};
var options = {
method: "PUT",
"Content-Type": "application/json",
payload: JSON.stringify(params),
};
let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}
我的 Lambda resolver
代码:
"use strict";
const AWS = require("aws-sdk");
exports.handler = async (event) => {
let params = JSON.parse(event.body);
console.log("Parse as document...");
let textract = new AWS.Textract();
let doc = params["doc"];
let config = {
Document: {
Bytes: doc,
FeatureTypes: ["TABLES"],
}
};
textract.analyzeDocument(config, function (err, data) {
console.log("analyzing...");
if (err) {
console.log(err, err.stack);
}
// an error occurred
else {
console.log("data:" + JSON.stringfy(data));
} // successful response
});
};
问题
文件已成功从 Google 个脚本发送到 Lambda,但返回以下错误:
"errorType": "InvalidParameterType",
"errorMessage": "Expected params.Document.Bytes to be a string, Buffer, Stream, Blob, or typed array object"
问题
- 有没有办法验证
doc
变量的格式,以确保它符合 AWS Textract 的要求? - 谁能看出返回错误的可能原因?
注释
- 当同一文件上传到 S3 存储并在配置中提供时,Textract 工作正常:
S3Object: { Bucket: 'bucket_name', Name: 'file_name' }
- 我已确认文件是 JPEG
通过 2 处更改使其正常工作:
- 添加
getBytes()
到 Google 边码 - 向 AWS 端代码添加
Buffer.from()
我的Google脚本代码:
function googleFunction(id) {
let file = DriveApp.getFileById(id);
console.log("File is a " + file.getMimeType());
let blob = file.getBlob().getBytes();
let params = {
doc: blob,
};
var options = {
method: "PUT",
"Content-Type": "application/json",
payload: JSON.stringify(params),
};
let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}
我的 Lambda resolver
代码:
"use strict";
const AWS = require("aws-sdk");
exports.handler = async (event) => {
let params = JSON.parse(event.body);
console.log("Parse as document...");
let textract = new AWS.Textract();
let doc = params["doc"];
let config = {
Document: {
Bytes: Buffer.from(doc),
FeatureTypes: ["TABLES"],
}
};
textract.analyzeDocument(config, function (err, data) {
console.log("analyzing...");
if (err) {
console.log(err, err.stack);
}
// an error occurred
else {
console.log("data:" + JSON.stringfy(data));
} // successful response
});
};