GoogleAPICallError: None Unexpected state: Long-running operation had neither response nor error set
GoogleAPICallError: None Unexpected state: Long-running operation had neither response nor error set
我是 Google Cloud Platform 的新手,我正在尝试创建一个特征存储来填充来自 Google Cloud Storage 的 csv 文件中的值。目的是通过 Python 中的本地笔记本执行此操作。
我基本上遵循代码 here,进行适当的更改,因为我正在使用信用卡 public 数据集。
当我 运行 代码时引发的错误如下:
GoogleAPICallError: None Unexpected state: Long-running operation had neither response nor error set.
它发生在从 csv 文件中提取数据的过程中。
这是我正在处理的代码:
import os
from datetime import datetime
from google.cloud import bigquery
from google.cloud import aiplatform
from google.cloud.aiplatform_v1.types import feature as feature_pb2
from google.cloud.aiplatform_v1.types import featurestore as featurestore_pb2
from google.cloud.aiplatform_v1.types import \
featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1.types import entity_type as entity_type_pb2
from google.cloud.aiplatform_v1.types import FeatureSelector, IdMatcher
credential_path = r"C:\Users\...\.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path
## Constants
PROJECT_ID = "my-project-ID"
REGION = "us-central1"
API_ENDPOINT = "us-central1-aiplatform.googleapis.com"
INPUT_CSV_FILE = "my-input-file.csv"
FEATURESTORE_ID = "fraud_detection"
## Output dataset
DESTINATION_DATA_SET = "fraud_predictions"
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
DESTINATION_DATA_SET = "{prefix}_{timestamp}".format(
prefix=DESTINATION_DATA_SET, timestamp=TIMESTAMP
)
## Output table. Make sure that the table does NOT already exist;
## the BatchReadFeatureValues API cannot overwrite an existing table
DESTINATION_TABLE_NAME = "training_data"
DESTINATION_PATTERN = "bq://{project}.{dataset}.{table}"
DESTINATION_TABLE_URI = DESTINATION_PATTERN.format(
project=PROJECT_ID, dataset=DESTINATION_DATA_SET,
table=DESTINATION_TABLE_NAME
)
## Create dataset
client = bigquery.Client(project=PROJECT_ID)
dataset_id = "{}.{}".format(client.project, DESTINATION_DATA_SET)
dataset = bigquery.Dataset(dataset_id)
dataset.location = REGION
dataset = client.create_dataset(dataset)
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))
## Create client for CRUD and data_client for reading feature values.
client = aiplatform.gapic.FeaturestoreServiceClient(
client_options={"api_endpoint": API_ENDPOINT})
data_client = aiplatform.gapic.FeaturestoreOnlineServingServiceClient(
client_options={"api_endpoint": API_ENDPOINT})
BASE_RESOURCE_PATH = client.common_location_path(PROJECT_ID, REGION)
## Create featurestore (only the first time)
create_lro = client.create_featurestore(
featurestore_service_pb2.CreateFeaturestoreRequest(
parent=BASE_RESOURCE_PATH,
featurestore_id=FEATURESTORE_ID,
featurestore=featurestore_pb2.Featurestore(
online_serving_config=featurestore_pb2.Featurestore.OnlineServingConfig(
fixed_node_count=1
),
),
)
)
## Wait for LRO to finish and get the LRO result.
print(create_lro.result())
client.get_featurestore(
name=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID)
)
## Create credit card entity type (only the first time)
cc_entity_type_lro = client.create_entity_type(
featurestore_service_pb2.CreateEntityTypeRequest(
parent=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
entity_type_id="creditcards",
entity_type=entity_type_pb2.EntityType(
description="Credit card entity",
),
)
)
## Create fraud entity type (only the first time)
fraud_entity_type_lro = client.create_entity_type(
featurestore_service_pb2.CreateEntityTypeRequest(
parent=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
entity_type_id="frauds",
entity_type=entity_type_pb2.EntityType(
description="Fraud entity",
),
)
)
## Create features for credit card type (only the first time)
client.batch_create_features(
parent=client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, "creditcards"),
requests=[
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v1",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v2",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v3",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v4",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v5",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v6",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v7",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v8",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v9",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v10",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v11",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v12",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v13",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v14",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v15",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v16",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v17",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v18",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v19",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v20",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v21",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v22",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v23",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v24",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v25",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v26",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v27",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v28",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="amount",
),
],
).result()
## Create features for fraud type (only the first time)
client.batch_create_features(
parent=client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, "frauds"),
requests=[
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="class",
),
],
).result()
## Import features values for credit cards
import_cc_request = aiplatform.gapic.ImportFeatureValuesRequest(
entity_type=client.entity_type_path(
PROJECT_ID, REGION, FEATURESTORE_ID, "creditcards"),
csv_source=aiplatform.gapic.CsvSource(gcs_source=aiplatform.gapic.GcsSource(
uris=["gs://fraud-detection-19102021/dataset/cc_details_train.csv"])),
entity_id_field="cc_id",
feature_specs=[
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v1"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v2"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v3"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v4"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v5"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v6"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v7"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v8"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v9"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v10"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v11"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v12"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v13"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v14"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v15"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v16"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v17"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v18"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v19"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v20"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v21"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v22"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v23"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v24"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v25"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v26"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v27"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v28"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="amount"),
],
feature_time_field='time',
worker_count=1,
)
## Start to import
ingestion_lro = client.import_feature_values(import_cc_request)
## Polls for the LRO status and prints when the LRO has completed
ingestion_lro.result()
## Import features values for frauds
import_fraud_request = aiplatform.gapic.ImportFeatureValuesRequest(
entity_type=client.entity_type_path(
PROJECT_ID, REGION, FEATURESTORE_ID, "frauds"),
csv_source=aiplatform.gapic.CsvSource(gcs_source=aiplatform.gapic.GcsSource(
uris=["gs://fraud-detection-19102021/dataset/data_fraud_train.csv"])),
entity_id_field="fraud_id",
feature_specs=[
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="class"),
],
feature_time_field='time',
worker_count=1,
)
## Start to import
ingestion_lro = client.import_feature_values(import_fraud_request)
## Polls for the LRO status and prints when the LRO has completed
ingestion_lro.result()
当我从 Google Cloud Console 的 Feature
部分检查 Ingestion Jobs
时,我看到作业已完成,但没有值添加到我的功能中。
任何建议都非常宝贵。
谢谢大家
编辑 1
在下图中,有一个我用作输入的 csv 文件第一行的示例 (cc_details_train.csv
)。所有看不见的特征都是相似的,特征 class
可以取 0 或 1 值。
注入作业持续大约 5 分钟以导入(理想情况下)3000 行,但它结束时没有错误并且没有导入任何值。
使用 CSV 导入值/使用 ImportFeatureValuesRequest 时的 VERTEX AI 推荐
使用此功能时,您可能最终无法导入任何数据。您必须注意您使用的时间字段,因为它必须符合 google 时间格式。
- feature_time_field,必须遵循google设定的时间约束规则,即RFC3339,即:'2021-04-15T08:28:14Z'。您可以查看字段 here and details about timestamp format can be found here.
的详细信息
- 其他列、字段必须匹配设计值。一个例外是字段 entity_id_field,因为它可以是任何值。
注意:我的测试发现,如果我没有将时间字段正确设置为 google 推荐的日期格式,它根本不会上传任何特征值。
test.csv
cc_id,time,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v23,v24,v25,v26,v27,v28,amount
100,2021-04-15T08:28:14Z,-1.359807,-0.072781,2.534897,1.872351,2.596267,0.465238,0.923123,0.347986,0.987354,1.234657,2.128645,1.958237,0.876123,-1.712984,-0.876436,1.74699,-1.645877,-0.936121,1.456327,0.087623,1.900872,2.876234,1.874123,0.923451,0.123432,0.000012,1.212121,0.010203,1000
输出:
imported_entity_count: 1
imported_feature_value_count: 29
关于优化和使用功能
您可以查看官方文档 here 以查看建议处理的记录的最小和最大数量。作为一条建议,您应该只使用 运行 的实际工作特性和它的推荐值。
查看您的 运行ning 摄取作业
如果您使用 VertexUI 或代码来生成摄取的作业。您可以通过进入 UI 到此路径来跟踪其 运行:
VertexAI > Features > View Ingested Jobs
我是 Google Cloud Platform 的新手,我正在尝试创建一个特征存储来填充来自 Google Cloud Storage 的 csv 文件中的值。目的是通过 Python 中的本地笔记本执行此操作。 我基本上遵循代码 here,进行适当的更改,因为我正在使用信用卡 public 数据集。 当我 运行 代码时引发的错误如下:
GoogleAPICallError: None Unexpected state: Long-running operation had neither response nor error set.
它发生在从 csv 文件中提取数据的过程中。
这是我正在处理的代码:
import os
from datetime import datetime
from google.cloud import bigquery
from google.cloud import aiplatform
from google.cloud.aiplatform_v1.types import feature as feature_pb2
from google.cloud.aiplatform_v1.types import featurestore as featurestore_pb2
from google.cloud.aiplatform_v1.types import \
featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1.types import entity_type as entity_type_pb2
from google.cloud.aiplatform_v1.types import FeatureSelector, IdMatcher
credential_path = r"C:\Users\...\.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credential_path
## Constants
PROJECT_ID = "my-project-ID"
REGION = "us-central1"
API_ENDPOINT = "us-central1-aiplatform.googleapis.com"
INPUT_CSV_FILE = "my-input-file.csv"
FEATURESTORE_ID = "fraud_detection"
## Output dataset
DESTINATION_DATA_SET = "fraud_predictions"
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
DESTINATION_DATA_SET = "{prefix}_{timestamp}".format(
prefix=DESTINATION_DATA_SET, timestamp=TIMESTAMP
)
## Output table. Make sure that the table does NOT already exist;
## the BatchReadFeatureValues API cannot overwrite an existing table
DESTINATION_TABLE_NAME = "training_data"
DESTINATION_PATTERN = "bq://{project}.{dataset}.{table}"
DESTINATION_TABLE_URI = DESTINATION_PATTERN.format(
project=PROJECT_ID, dataset=DESTINATION_DATA_SET,
table=DESTINATION_TABLE_NAME
)
## Create dataset
client = bigquery.Client(project=PROJECT_ID)
dataset_id = "{}.{}".format(client.project, DESTINATION_DATA_SET)
dataset = bigquery.Dataset(dataset_id)
dataset.location = REGION
dataset = client.create_dataset(dataset)
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))
## Create client for CRUD and data_client for reading feature values.
client = aiplatform.gapic.FeaturestoreServiceClient(
client_options={"api_endpoint": API_ENDPOINT})
data_client = aiplatform.gapic.FeaturestoreOnlineServingServiceClient(
client_options={"api_endpoint": API_ENDPOINT})
BASE_RESOURCE_PATH = client.common_location_path(PROJECT_ID, REGION)
## Create featurestore (only the first time)
create_lro = client.create_featurestore(
featurestore_service_pb2.CreateFeaturestoreRequest(
parent=BASE_RESOURCE_PATH,
featurestore_id=FEATURESTORE_ID,
featurestore=featurestore_pb2.Featurestore(
online_serving_config=featurestore_pb2.Featurestore.OnlineServingConfig(
fixed_node_count=1
),
),
)
)
## Wait for LRO to finish and get the LRO result.
print(create_lro.result())
client.get_featurestore(
name=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID)
)
## Create credit card entity type (only the first time)
cc_entity_type_lro = client.create_entity_type(
featurestore_service_pb2.CreateEntityTypeRequest(
parent=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
entity_type_id="creditcards",
entity_type=entity_type_pb2.EntityType(
description="Credit card entity",
),
)
)
## Create fraud entity type (only the first time)
fraud_entity_type_lro = client.create_entity_type(
featurestore_service_pb2.CreateEntityTypeRequest(
parent=client.featurestore_path(PROJECT_ID, REGION, FEATURESTORE_ID),
entity_type_id="frauds",
entity_type=entity_type_pb2.EntityType(
description="Fraud entity",
),
)
)
## Create features for credit card type (only the first time)
client.batch_create_features(
parent=client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, "creditcards"),
requests=[
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v1",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v2",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v3",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v4",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v5",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v6",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v7",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v8",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v9",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v10",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v11",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v12",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v13",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v14",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v15",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v16",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v17",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v18",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v19",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v20",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v21",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v22",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v23",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v24",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v25",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v26",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v27",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="v28",
),
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="amount",
),
],
).result()
## Create features for fraud type (only the first time)
client.batch_create_features(
parent=client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, "frauds"),
requests=[
featurestore_service_pb2.CreateFeatureRequest(
feature=feature_pb2.Feature(
value_type=feature_pb2.Feature.ValueType.DOUBLE, description="",
),
feature_id="class",
),
],
).result()
## Import features values for credit cards
import_cc_request = aiplatform.gapic.ImportFeatureValuesRequest(
entity_type=client.entity_type_path(
PROJECT_ID, REGION, FEATURESTORE_ID, "creditcards"),
csv_source=aiplatform.gapic.CsvSource(gcs_source=aiplatform.gapic.GcsSource(
uris=["gs://fraud-detection-19102021/dataset/cc_details_train.csv"])),
entity_id_field="cc_id",
feature_specs=[
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v1"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v2"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v3"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v4"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v5"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v6"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v7"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v8"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v9"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v10"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v11"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v12"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v13"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v14"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v15"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v16"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v17"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v18"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v19"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v20"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v21"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v22"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v23"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v24"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v25"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v26"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v27"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="v28"),
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="amount"),
],
feature_time_field='time',
worker_count=1,
)
## Start to import
ingestion_lro = client.import_feature_values(import_cc_request)
## Polls for the LRO status and prints when the LRO has completed
ingestion_lro.result()
## Import features values for frauds
import_fraud_request = aiplatform.gapic.ImportFeatureValuesRequest(
entity_type=client.entity_type_path(
PROJECT_ID, REGION, FEATURESTORE_ID, "frauds"),
csv_source=aiplatform.gapic.CsvSource(gcs_source=aiplatform.gapic.GcsSource(
uris=["gs://fraud-detection-19102021/dataset/data_fraud_train.csv"])),
entity_id_field="fraud_id",
feature_specs=[
aiplatform.gapic.ImportFeatureValuesRequest.FeatureSpec(id="class"),
],
feature_time_field='time',
worker_count=1,
)
## Start to import
ingestion_lro = client.import_feature_values(import_fraud_request)
## Polls for the LRO status and prints when the LRO has completed
ingestion_lro.result()
当我从 Google Cloud Console 的 Feature
部分检查 Ingestion Jobs
时,我看到作业已完成,但没有值添加到我的功能中。
任何建议都非常宝贵。
谢谢大家
编辑 1
在下图中,有一个我用作输入的 csv 文件第一行的示例 (cc_details_train.csv
)。所有看不见的特征都是相似的,特征 class
可以取 0 或 1 值。
注入作业持续大约 5 分钟以导入(理想情况下)3000 行,但它结束时没有错误并且没有导入任何值。
使用 CSV 导入值/使用 ImportFeatureValuesRequest 时的 VERTEX AI 推荐
使用此功能时,您可能最终无法导入任何数据。您必须注意您使用的时间字段,因为它必须符合 google 时间格式。
- feature_time_field,必须遵循google设定的时间约束规则,即RFC3339,即:'2021-04-15T08:28:14Z'。您可以查看字段 here and details about timestamp format can be found here. 的详细信息
- 其他列、字段必须匹配设计值。一个例外是字段 entity_id_field,因为它可以是任何值。
注意:我的测试发现,如果我没有将时间字段正确设置为 google 推荐的日期格式,它根本不会上传任何特征值。
test.csv
cc_id,time,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v23,v24,v25,v26,v27,v28,amount
100,2021-04-15T08:28:14Z,-1.359807,-0.072781,2.534897,1.872351,2.596267,0.465238,0.923123,0.347986,0.987354,1.234657,2.128645,1.958237,0.876123,-1.712984,-0.876436,1.74699,-1.645877,-0.936121,1.456327,0.087623,1.900872,2.876234,1.874123,0.923451,0.123432,0.000012,1.212121,0.010203,1000
输出:
imported_entity_count: 1
imported_feature_value_count: 29
关于优化和使用功能
您可以查看官方文档 here 以查看建议处理的记录的最小和最大数量。作为一条建议,您应该只使用 运行 的实际工作特性和它的推荐值。
查看您的 运行ning 摄取作业
如果您使用 VertexUI 或代码来生成摄取的作业。您可以通过进入 UI 到此路径来跟踪其 运行:
VertexAI > Features > View Ingested Jobs