"InvalidParameterType" 从外部源作为 blob 发送到 AWS Textract 的图像文件出错

Question

目前

我正在尝试让 AWS Textract 处理从 Google 脚本中的函数提供的图像，这些图像被发送到已解析的 Lambda。我正在关注 https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Textract.html#analyzeDocument-property

上的文档

我的Google脚本代码：

function googleFunction(id) {
  
  let file = DriveApp.getFileById(id);
  console.log("File is a " + file.getMimeType());
  let blob = file.getBlob();

  let params = {
    doc: blob,
  };

  var options = {
    method: "PUT",
    "Content-Type": "application/json",
    payload: JSON.stringify(params),
  };

  let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}

我的 Lambda resolver 代码：

"use strict";

const AWS = require("aws-sdk");

exports.handler = async (event) => {
  let params = JSON.parse(event.body);
  console.log("Parse as document...");
  let textract = new AWS.Textract();
  let doc = params["doc"];
  let config = {
    Document: {
      Bytes: doc,
      FeatureTypes: ["TABLES"],
    }
  };
  textract.analyzeDocument(config, function (err, data) {
    console.log("analyzing...");
    if (err) {
      console.log(err, err.stack);
    }
    // an error occurred
    else {
      console.log("data:" + JSON.stringfy(data));
    } // successful response
  });
};

问题

文件已成功从 Google 个脚本发送到 Lambda，但返回以下错误：

"errorType": "InvalidParameterType",
"errorMessage": "Expected params.Document.Bytes to be a string, Buffer, Stream, Blob, or typed array object"

问题

有没有办法验证 doc 变量的格式，以确保它符合 AWS Textract 的要求？
谁能看出返回错误的可能原因？

注释

当同一文件上传到 S3 存储并在配置中提供时，Textract 工作正常： S3Object: { Bucket: 'bucket_name', Name: 'file_name' }
我已确认文件是 JPEG

Answer 1

通过 2 处更改使其正常工作：

添加 getBytes() 到 Google 边码
向 AWS 端代码添加 Buffer.from()

我的Google脚本代码：

function googleFunction(id) {
  
  let file = DriveApp.getFileById(id);
  console.log("File is a " + file.getMimeType());
  let blob = file.getBlob().getBytes();

  let params = {
    doc: blob,
  };

  var options = {
    method: "PUT",
    "Content-Type": "application/json",
    payload: JSON.stringify(params),
  };

  let response = UrlFetchApp.fetch("https://api-path/prod/resolver", options);
}

我的 Lambda resolver 代码：

"use strict";

const AWS = require("aws-sdk");

exports.handler = async (event) => {
  let params = JSON.parse(event.body);
  console.log("Parse as document...");
  let textract = new AWS.Textract();
  let doc = params["doc"];
  let config = {
    Document: {
      Bytes: Buffer.from(doc),
      FeatureTypes: ["TABLES"],
    }
  };
  textract.analyzeDocument(config, function (err, data) {
    console.log("analyzing...");
    if (err) {
      console.log(err, err.stack);
    }
    // an error occurred
    else {
      console.log("data:" + JSON.stringfy(data));
    } // successful response
  });
};

"InvalidParameterType" 从外部源作为 blob 发送到 AWS Textract 的图像文件出错

"InvalidParameterType" error for image files sent as blob to AWS Textract from external source

amazon-web-services

google-apps-script

aws-lambda

aws-sdk-js

amazon-textract