Google 应用脚本从关键文本旁边的 Google 文档中查找文本

Google App Script find text from Google Document next to key text

我在 Google 驱动器中保存了一个 PDF 文件,我想从该文件中找到一个文本,即美元,然后选择找到的文本旁边的值,即:167.1764,并将其插入我的 google 电子表格。

下面是我的 PDF 文件的预览。 Link 添加到我的 PDF 文件。

下面是我尝试但未能找到文本并达到旁边那个值的代码。

下面是我的代码。

function extractTextFromPDF() {

  var drive = DriveApp;
  var folders = drive.getFolderById('folderid');
  var newfile = folders.getFilesByName('08-Sep-2021.pdf');
  if(newfile.hasNext()){
    var file1 = newfile.next().getBlob();
  }
  
  var blob = file1;
  var resource = {
    title: blob.getName(),
    mimeType: blob.getContentType()
  };

  // Enable the Advanced Drive API Service
  var file = Drive.Files.insert(resource, blob, {ocr: true, ocrLanguage: "en"});

  // Extract Text from PDF file
  var doc = DocumentApp.openById(file.id);
  var text = doc.getBody().getText();
  Logger.log(text);
  //DriveApp.getFileById(file.id).setTrashed(true);
  var body = doc.getBody();
  var foundElement = body.findText("(USD)");

while (foundElement != null) {
    // Get the text object from the element
    var foundText = foundElement.getElement().asText();

    // Where in the element is the found text?
    var start = foundElement.getStartOffset();
    var end = foundElement.getEndOffsetInclusive();
}
    // i want the value of USD i.e 167.1144 in log
    Logger.log(foundText);
  
  
}

借助RegEx you can extract this. I'm not the best with those patterns. But maybe somebody else can optimize so the split is not necessary. (here是一个link).

代码:

function extractTextFromPDF() {
  const folders = DriveApp.getFolderById('1QVo_pxxx387WPH9Yx');
  const newfile = folders.getFilesByName('08-Sep-2021.pdf');
  if(newfile.hasNext()){
    var file1 = newfile.next().getBlob();
  }
  
  const blob = file1;
  const resource = {
    title: blob.getName(),
    mimeType: blob.getContentType()
  };

  // Enable the Advanced Drive API Service
  const file = Drive.Files.insert(resource, blob, {convert: true});

  // Extract Text from PDF file
  const doc = DocumentApp.openById(file.id);
  const text = doc.getBody().getText();
  Logger.log(text);
  const buying = /USD\n(.*?)$/gm.exec(text)[1].trim();
  const selling = /USD\n\s*\S*\n(.*?)$/gm.exec(text)[1].trim();
  
  console.log(buying) 
  console.log(selling)

  //Remove the converted file.
  DriveApp.getFileById(file.id).setTrashed(true);

}