Google 应用脚本从关键文本旁边的 Google 文档中查找文本
Google App Script find text from Google Document next to key text
我在 Google 驱动器中保存了一个 PDF 文件,我想从该文件中找到一个文本,即美元,然后选择找到的文本旁边的值,即:167.1764,并将其插入我的 google 电子表格。
下面是我的 PDF 文件的预览。
Link 添加到我的 PDF 文件。
下面是我尝试但未能找到文本并达到旁边那个值的代码。
下面是我的代码。
function extractTextFromPDF() {
var drive = DriveApp;
var folders = drive.getFolderById('folderid');
var newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
var blob = file1;
var resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});
// Extract Text from PDF file
var doc = DocumentApp.openById(file.id);
var text = doc.getBody().getText();
Logger.log(text);
//DriveApp.getFileById(file.id).setTrashed(true);
var body = doc.getBody();
var foundElement = body.findText("(USD)");
while (foundElement != null) {
// Get the text object from the element
var foundText = foundElement.getElement().asText();
// Where in the element is the found text?
var start = foundElement.getStartOffset();
var end = foundElement.getEndOffsetInclusive();
}
// i want the value of USD i.e 167.1144 in log
Logger.log(foundText);
}
借助RegEx you can extract this. I'm not the best with those patterns. But maybe somebody else can optimize so the split is not necessary. (here是一个link).
代码:
function extractTextFromPDF() {
const folders = DriveApp.getFolderById('1QVo_pxxx387WPH9Yx');
const newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
const blob = file1;
const resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
const file = Drive.Files.insert(resource, blob, {convert: true});
// Extract Text from PDF file
const doc = DocumentApp.openById(file.id);
const text = doc.getBody().getText();
Logger.log(text);
const buying = /USD\n(.*?)$/gm.exec(text)[1].trim();
const selling = /USD\n\s*\S*\n(.*?)$/gm.exec(text)[1].trim();
console.log(buying)
console.log(selling)
//Remove the converted file.
DriveApp.getFileById(file.id).setTrashed(true);
}
我在 Google 驱动器中保存了一个 PDF 文件,我想从该文件中找到一个文本,即美元,然后选择找到的文本旁边的值,即:167.1764,并将其插入我的 google 电子表格。
下面是我的 PDF 文件的预览。
Link 添加到我的 PDF 文件。
下面是我尝试但未能找到文本并达到旁边那个值的代码。
下面是我的代码。
function extractTextFromPDF() {
var drive = DriveApp;
var folders = drive.getFolderById('folderid');
var newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
var blob = file1;
var resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});
// Extract Text from PDF file
var doc = DocumentApp.openById(file.id);
var text = doc.getBody().getText();
Logger.log(text);
//DriveApp.getFileById(file.id).setTrashed(true);
var body = doc.getBody();
var foundElement = body.findText("(USD)");
while (foundElement != null) {
// Get the text object from the element
var foundText = foundElement.getElement().asText();
// Where in the element is the found text?
var start = foundElement.getStartOffset();
var end = foundElement.getEndOffsetInclusive();
}
// i want the value of USD i.e 167.1144 in log
Logger.log(foundText);
}
借助RegEx you can extract this. I'm not the best with those patterns. But maybe somebody else can optimize so the split is not necessary. (here是一个link).
代码:
function extractTextFromPDF() {
const folders = DriveApp.getFolderById('1QVo_pxxx387WPH9Yx');
const newfile = folders.getFilesByName('08-Sep-2021.pdf');
if(newfile.hasNext()){
var file1 = newfile.next().getBlob();
}
const blob = file1;
const resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
// Enable the Advanced Drive API Service
const file = Drive.Files.insert(resource, blob, {convert: true});
// Extract Text from PDF file
const doc = DocumentApp.openById(file.id);
const text = doc.getBody().getText();
Logger.log(text);
const buying = /USD\n(.*?)$/gm.exec(text)[1].trim();
const selling = /USD\n\s*\S*\n(.*?)$/gm.exec(text)[1].trim();
console.log(buying)
console.log(selling)
//Remove the converted file.
DriveApp.getFileById(file.id).setTrashed(true);
}