如果我有来自 AWS textract 方法(StartDocumentAnalysis)的 JobId 如何找到提取的文本?
If I have the JobId from AWS textract method(StartDocumentAnalysis) how to find the text extracted?
我这里有这个代码-
const _ = require("lodash");
const aws = require("aws-sdk");
const config = require("./config");
aws.config.update({
accessKeyId: config.awsAccesskeyID,
secretAccessKey: config.awsSecretAccessKey,
region: config.awsRegion
});
const textract = new aws.Textract();
const getText = (result, blocksMap) => {
let text = "";
if (_.has(result, "Relationships")) {
result.Relationships.forEach(relationship => {
if (relationship.Type === "CHILD") {
relationship.Ids.forEach(childId => {
const word = blocksMap[childId];
if (word.BlockType === "WORD") {
text += `${word.Text} `;
}
if (word.BlockType === "SELECTION_ELEMENT") {
if (word.SelectionStatus === "SELECTED") {
text += `X `;
}
}
});
}
});
}
return text.trim();
};
const findValueBlock = (keyBlock, valueMap) => {
let valueBlock;
keyBlock.Relationships.forEach(relationship => {
if (relationship.Type === "VALUE") {
relationship.Ids.every(valueId => {
if (_.has(valueMap, valueId)) {
valueBlock = valueMap[valueId];
return false;
}
});
}
});
return valueBlock;
};
const getKeyValueRelationship = (keyMap, valueMap, blockMap) => {
const keyValues = {};
console.log("Hello1");
const keyMapValues = _.values(keyMap);
keyMapValues.forEach(keyMapValue => {
const valueBlock = findValueBlock(keyMapValue, valueMap);
const key = getText(keyMapValue, blockMap);
const value = getText(valueBlock, blockMap);
keyValues[key] = value;
console.log("Hello2");
});
return keyValues;
};
const getKeyValueMap = blocks => {
const keyMap = {};
const valueMap = {};
const blockMap = {};
console.log("Hello3");
let blockId;
blocks.forEach(block => {
blockId = block.Id;
blockMap[blockId] = block;
console.log("Hello4");
if (block.BlockType === "KEY_VALUE_SET") {
if (_.includes(block.EntityTypes, "KEY")) {
keyMap[blockId] = block;
console.log("Hello5a");
} else {
valueMap[blockId] = block;
console.log("Hello5b");
}
}
});
return { keyMap, valueMap, blockMap };
};
module.exports = async buffer => {
const params = {
DocumentLocation: {
S3Object:{
Bucket : "<data>",
Name : "<data>"
}
},
FeatureTypes: ["FORMS"]
};
const request = textract.startDocumentAnalysis(params);
const params1 = {
JobId : request.JobId
};
const request1 = textract.getDocumentAnalysis(params1);
console.log("A");
console.log(request1);
console.log("A2");
console.log(request1.Blocks);
if(request1){
console.log("B");
console.log("C");
console.log("D");
const { keyMap, valueMap, blockMap } = getKeyValueMap(data1.Blocks);
const keyValues = getKeyValueRelationship(keyMap, valueMap, blockMap);
return keyValues;
}
return undefined;
};
现在我正在获取 JobId。这是一个异步操作。如何在不使用 AWS SNS 的情况下将此 JobId 传递到 getDocumentAnalysis 方法中?(作为试验,我还可以在我的代码中延迟所需的输出,作为 json 格式的提取词列表。但是我该如何处理那个?)
找到解决方案:
使用 await 和 promise 构建代码并使用从异步函数返回的数据。
我这里有这个代码-
const _ = require("lodash");
const aws = require("aws-sdk");
const config = require("./config");
aws.config.update({
accessKeyId: config.awsAccesskeyID,
secretAccessKey: config.awsSecretAccessKey,
region: config.awsRegion
});
const textract = new aws.Textract();
const getText = (result, blocksMap) => {
let text = "";
if (_.has(result, "Relationships")) {
result.Relationships.forEach(relationship => {
if (relationship.Type === "CHILD") {
relationship.Ids.forEach(childId => {
const word = blocksMap[childId];
if (word.BlockType === "WORD") {
text += `${word.Text} `;
}
if (word.BlockType === "SELECTION_ELEMENT") {
if (word.SelectionStatus === "SELECTED") {
text += `X `;
}
}
});
}
});
}
return text.trim();
};
const findValueBlock = (keyBlock, valueMap) => {
let valueBlock;
keyBlock.Relationships.forEach(relationship => {
if (relationship.Type === "VALUE") {
relationship.Ids.every(valueId => {
if (_.has(valueMap, valueId)) {
valueBlock = valueMap[valueId];
return false;
}
});
}
});
return valueBlock;
};
const getKeyValueRelationship = (keyMap, valueMap, blockMap) => {
const keyValues = {};
console.log("Hello1");
const keyMapValues = _.values(keyMap);
keyMapValues.forEach(keyMapValue => {
const valueBlock = findValueBlock(keyMapValue, valueMap);
const key = getText(keyMapValue, blockMap);
const value = getText(valueBlock, blockMap);
keyValues[key] = value;
console.log("Hello2");
});
return keyValues;
};
const getKeyValueMap = blocks => {
const keyMap = {};
const valueMap = {};
const blockMap = {};
console.log("Hello3");
let blockId;
blocks.forEach(block => {
blockId = block.Id;
blockMap[blockId] = block;
console.log("Hello4");
if (block.BlockType === "KEY_VALUE_SET") {
if (_.includes(block.EntityTypes, "KEY")) {
keyMap[blockId] = block;
console.log("Hello5a");
} else {
valueMap[blockId] = block;
console.log("Hello5b");
}
}
});
return { keyMap, valueMap, blockMap };
};
module.exports = async buffer => {
const params = {
DocumentLocation: {
S3Object:{
Bucket : "<data>",
Name : "<data>"
}
},
FeatureTypes: ["FORMS"]
};
const request = textract.startDocumentAnalysis(params);
const params1 = {
JobId : request.JobId
};
const request1 = textract.getDocumentAnalysis(params1);
console.log("A");
console.log(request1);
console.log("A2");
console.log(request1.Blocks);
if(request1){
console.log("B");
console.log("C");
console.log("D");
const { keyMap, valueMap, blockMap } = getKeyValueMap(data1.Blocks);
const keyValues = getKeyValueRelationship(keyMap, valueMap, blockMap);
return keyValues;
}
return undefined;
};
现在我正在获取 JobId。这是一个异步操作。如何在不使用 AWS SNS 的情况下将此 JobId 传递到 getDocumentAnalysis 方法中?(作为试验,我还可以在我的代码中延迟所需的输出,作为 json 格式的提取词列表。但是我该如何处理那个?)
找到解决方案: 使用 await 和 promise 构建代码并使用从异步函数返回的数据。