Google Automl 400 错误位置 ID 和 Field:Name 无效

Question

我目前正在编写一个函数，该函数将监听 GCP pubsub 以获取将触发训练函数的消息。消息本身包含导入数据集的操作ID，该函数将接收此操作ID并继续循环直到导入完成之后，它将训练模型并循环直到它完全由于我需要标记开始时间和数据库的结束时间。我的问题是在我的代码中我已经指定了位置 ID 和项目 ID，它给我这个字段名称和名称无效的 400 错误。我使用 GCP 网站提供的示例代码尝试相同的火车模型设置。https://cloud.google.com/vision/automl/docs/train?hl=zh-TW 它工作正常。这是我的代码。感谢大家的帮助。

后端函数

from datetime import datetime
import time
from google.cloud import pubsub_v1, automl, datastore
from app import celeryConfig
from config import Config
import requests


def train_model():
    project_id = 'XXXX'
    topic_name = "XXXX"
    timeout = 20
    subscription_name = "XXXX"
    subscriber = pubsub_v1.SubscriberClient.from_service_account_json("./gerald-automl-test-ccf53bf513b7.json")
    subscription_path = subscriber.subscription_path(project_id, subscription_name)

    def callback(message):
        client = automl.AutoMlClient.from_service_account_json("XXXX.json")
        project_location = client.location_path('XXXXX', "us-central1")
        flow_control = pubsub_v1.types.FlowControl(max_messages=5)
        print("Received message: {}".format(message))
        pub_sub_message = str(message.data, encoding="utf-8")  # containing model_id,opreation_id & dataset_id
        message.ack()
        extract_from_data = [e.strip().replace("'", "") for e in pub_sub_message.strip("[]").split(",")]
        dataset_id = extract_from_data[0]
        model_name = extract_from_data[1]
        opreation_id = extract_from_data[2]
        print(opreation_id)


        while True:
            response = client.transport._operations_client.get_operation(
                operation_full_id
            )

            data_of_response = str(response)

            if response.done != True:
                print("not complete importing data")
                response = client.transport._operations_client.get_operation(
                    operation_full_id
                )
                print(str(response))
                time.sleep(10)

            elif response.done == True:
                print("start train model")
                dataset_id = dataset_id
                display_name = model_name
                location = 'XXXX'
                project_id='XXXX'

                client = automl.AutoMlClient.from_service_account_json("XXXXXX")

                # A resource that represents Google Cloud Platform location.
                parent = client.location_path('XXXX', 'XXXXX')

                metadata = automl.types.ImageClassificationModelMetadata(
                    train_budget_milli_node_hours=1000,
                    model_type='mobile-high-accuracy-1'
                )
                # Leave model unset to use the default base model provided by Google
                model = automl.types.Model(
                    display_name=display_name,
                    dataset_id=dataset_id,
                    image_classification_model_metadata=metadata,
                )

                datastore_client = datastore.Client.from_service_account_json(
                    "./XXXX.json")

                response = client.create_model(parent, model)

                opreation_full = str(response.operation.name).split('/')
                opreation_id_train_model = opreation_full[5]

                kind = 'job'
                data_key = datastore_client.key(kind)
                new_post = datastore.Entity(key=data_key)
                new_post['action_date'] = datetime.today().strftime('%Y-%m-%d %H:%M:%S')
                new_post['job_type'] = "train_model"
                new_post['status'] = 'training'
                datastore_client.put(new_post)
                create_model_opreation = client.transport._operations_client.get_operation(
                    opreation_id_train_model)

                if create_model_opreation.done == True:
                    print('complete training model')
                    kind = 'job'
                    data_key = datastore_client.key(kind)
                    new_post = datastore.Entity(key=data_key)
                    new_post['job_type'] = "train_model"
                    new_post['status'] = ['complete_training']
                    new_post['finish_date'] = datetime.today().strftime('%Y-%m-%d %H:%M')
                    new_post['description'] = 'from train model ' + dataset_id
                    datastore_client.put(new_post)
                    break
                else:
                    print("not complete traning mode")
                    response = client.transport._operations_client.get_operation(
                        opreation_id
                    )
                    print(str(response))
                    time.sleep(10)



    streaming_pull_future = subscriber.subscribe(
        subscription_path, callback=callback
    )
    print("Listening for messages on {}..\n".format(subscription_path))

    # Wrap subscriber in a 'with' block to automatically call close() when done.
    with subscriber:
        try:
            # When `timeout` is not set, result() will block indefinitely,
            # unless an exception is encountered first.
            streaming_pull_future.result(timeout=timeout)
        except:  # noqa
            streaming_pull_future.cancel()

Answer 1

你能添加你得到的错误的堆栈跟踪吗？这将有助于缩小可能性。根据我的理解，错误来自其中一行：

response = client.create_model(parent, model)
opreation_full = str(response.operation.name).split('/')

我还意识到 google 文档中的库尚未更新，现在 python Automl Client Library 中的模块名称是 automl_v1。你可以先试试看是不是版本问题：

from google.cloud import automl_v1

Google Automl 400 错误位置 ID 和 Field:Name 无效

Google Automl 400 error location ID and Field:Name is invalid

python

google-cloud-platform

google-cloud-pubsub

google-cloud-automl