文档 AI:google.api_core.exceptions.InvalidArgument:400 请求包含无效参数

Document AI: google.api_core.exceptions.InvalidArgument: 400 Request contains an invalid argument

我在 python 中尝试从 google 云实现文档 OCR 时遇到此错误,如此处所述:https://cloud.google.com/document-ai/docs/ocr

当我运行

   result = client.process_document(request=request)

我收到这个错误

Traceback (most recent call last):
  File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 73, in error_remapped_callable
    return callable_(*args, **kwargs)
  File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/grpc/_channel.py", line 923, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/grpc/_channel.py", line 826, in _end_unary_response_blocking
    raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
    status = StatusCode.INVALID_ARGUMENT
    details = "Request contains an invalid argument."
    debug_error_string = "{"created":"@1614769280.332675000","description":"Error received from peer ipv4:142.250.180.138:443","file":"src/core/lib/surface/call.cc","file_line":1068,"grpc_message":"Request contains an invalid argument.","grpc_status":3}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "<input>", line 1, in <module>
  File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/cloud/documentai_v1beta3/services/document_processor_service/client.py", line 327, in process_document
    response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,)
  File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/gapic_v1/method.py", line 145, in __call__
    return wrapped_func(*args, **kwargs)
  File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/retry.py", line 281, in retry_wrapped_func
    return retry_target(
  File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/retry.py", line 184, in retry_target
    return target()
  File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 75, in error_remapped_callable
    six.raise_from(exceptions.from_grpc_error(exc), exc)
  File "<string>", line 3, in raise_from
google.api_core.exceptions.InvalidArgument: 400 Request contains an invalid argument.



  

我的完整代码:

import os
# Import the base64 encoding library.


  
project_id= 'your-project-id'
location = 'eu' # Format is 'us' or 'eu'
processor_id = 'your-processor-id' # Create processor in Cloud Console
file_path = '/file_path/invoice.pdf'
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/full_path/your_credentials.json"

def process_document_sample(
        project_id: str, location: str, processor_id: str, file_path: str
):
    from google.cloud import documentai_v1beta3 as documentai

    # Instantiates a client
    client = documentai.DocumentProcessorServiceClient()

    # The full resource name of the processor, e.g.:
    # projects/project-id/locations/location/processor/processor-id
    # You must create new processors in the Cloud Console first
    name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"

    with open(file_path, "rb") as image:
        image_content = image.read()

    # Read the file into memory
    document = {"content": image_content, "mime_type": "application/pdf"}

    # Configure the process request
    request = {"name": name, "document": document}

    # Recognizes text entities in the PDF document
    result = client.process_document(request=request)

    document = result.document

    print("Document processing complete.")

    # For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document

    document_pages = document.pages

    # Read the text recognition output from the processor
    print("The document contains the following paragraphs:")
    for page in document_pages:
        paragraphs = page.paragraphs
        for paragraph in paragraphs:
            paragraph_text = get_text(paragraph.layout, document)
            print(f"Paragraph text: {paragraph_text}")

client = documentai.DocumentProcessorServiceClient()默认指向美国终点。

in: client = documentai.DocumentProcessorServiceClient()
in: print(client.DEFAULT_ENDPOINT)
out: us-documentai.googleapis.com

您需要将 api_endpoint 覆盖为 EU 才能正常工作。

from google.api_core.client_options import ClientOptions
    # Set endpoint to EU 
    options = ClientOptions(api_endpoint="eu-documentai.googleapis.com:443")
    # Instantiates a client
    client = documentai.DocumentProcessorServiceClient(client_options=options)

完整代码如下:

import os

# TODO(developer): Uncomment these variables before running the sample.
project_id= 'your-project-id'
location = 'eu' # Format is 'us' or 'eu'
processor_id = 'your-processor-id' # Create processor in Cloud Console
file_path = '/file_path/invoice.pdf'
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/full_path/your_credentials.json"

def process_document_sample(
    project_id: str, location: str, processor_id: str, file_path: str
):
    from google.cloud import documentai_v1beta3 as documentai
    from google.api_core.client_options import ClientOptions

    # Set endpoint to EU
    options = ClientOptions(api_endpoint="eu-documentai.googleapis.com:443")
    # Instantiates a client
    client = documentai.DocumentProcessorServiceClient(client_options=options)

    # The full resource name of the processor, e.g.:
    # projects/project-id/locations/location/processor/processor-id
    # You must create new processors in the Cloud Console first
    name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"

    with open(file_path, "rb") as image:
        image_content = image.read()

    # Read the file into memory
    document = {"content": image_content, "mime_type": "application/pdf"}

    # Configure the process request
    request = {"name": name, "document": document}

    # Recognizes text entities in the PDF document
    result = client.process_document(request=request)

    document = result.document

    print("Document processing complete.")

    # For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document

    document_pages = document.pages

    # Read the text recognition output from the processor
    print("The document contains the following paragraphs:")
    for page in document_pages:
        paragraphs = page.paragraphs
        for paragraph in paragraphs:
            paragraph_text = get_text(paragraph.layout, document)
            print(f"Paragraph text: {paragraph_text}")

这是输出的一个片段: