Azure ML 推理架构 - "List index out of range" 错误

Question

我在 Azure ML Studio 上部署了一个 ML 模型，我正在使用推理架构更新它以允许与 Power BI 兼容，如 here 所述。

通过 REST api 向模型发送数据时（在添加此推理模式之前），一切正常，我得到了返回结果。但是，一旦按照上面链接的说明添加模式并对我的数据进行个性化设置，通过 REST api 发送的相同数据仅 returns 错误“列表索引超出范围”。部署进展顺利，被指定为“健康”，没有任何错误消息。

如有任何帮助，我们将不胜感激。谢谢

编辑：

入口脚本：

 import numpy as np
 import pandas as pd
 import joblib
 from azureml.core.model import Model
    
 from inference_schema.schema_decorators import input_schema, output_schema
 from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
 from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
 from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
    
 def init():
     global model
     #Model name is the name of the model registered under the workspace
     model_path = Model.get_model_path(model_name = 'databricksmodelpowerbi2')
     model = joblib.load(model_path)
    
 #Provide 3 sample inputs for schema generation for 2 rows of data
 numpy_sample_input = NumpyParameterType(np.array([[2400.0, 78.26086956521739, 11100.0, 3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 73.88059701492537, 44.0, 0.0]], dtype = 'float64'))
 pandas_sample_input = PandasParameterType(pd.DataFrame({'1': [2400.0, 368.55], '2': [78.26086956521739, 96.88311688311687], '3': [11100.0, 709681.1600000012], '4': [3.612565445026178, 73.88059701492537], '5': [3.0, 44.0], '6': [0.0, 0.0]}))
 standard_sample_input = StandardPythonParameterType(0.0)
    
 # This is a nested input sample, any item wrapped by `ParameterType` will be described by schema
 sample_input = StandardPythonParameterType({'input1': numpy_sample_input, 
                                             'input2': pandas_sample_input, 
                                             'input3': standard_sample_input})
    
 sample_global_parameters = StandardPythonParameterType(1.0) #this is optional
 sample_output = StandardPythonParameterType([1.0, 1.0])
    
 @input_schema('inputs', sample_input)
 @input_schema('global_parameters', sample_global_parameters) #this is optional
 @output_schema(sample_output)
    
 def run(inputs, global_parameters):
     try:
         data = inputs['input1']
         # data will be convert to target format
         assert isinstance(data, np.ndarray)
         result = model.predict(data)
         return result.tolist()
     except Exception as e:
         error = str(e)
         return error

预测脚本：

 import requests
 import json
 from ast import literal_eval
    
 # URL for the web service
 scoring_uri = ''
 ## If the service is authenticated, set the key or token
 #key = '<your key or token>'
    
 # Two sets of data to score, so we get two results back
 data = {"data": [[2400.0, 78.26086956521739, 11100.0, 3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 73.88059701492537, 44.0, 0.0]]}
 # Convert to JSON string
 input_data = json.dumps(data)
    
 # Set the content type
 headers = {'Content-Type': 'application/json'}
 ## If authentication is enabled, set the authorization header
 #headers['Authorization'] = f'Bearer {key}'
    
 # Make the request and display the response
 resp = requests.post(scoring_uri, input_data, headers=headers)
 print(resp.text)
    
 result = literal_eval(resp.text)

Answer 1

我不确定你是否已经弄明白了，但我遇到了类似的问题，我无法让 Power BI 查看我的 ML 模型。最后，我使用以下架构专门为 Power BI（pandas df 类型）创建了一个服务：

import json
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.ensemble import RandomForestClassifier

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType

import pickle
import azureml.train.automl


# Called when the service is loaded
def init():
    # AZUREML_MODEL_DIR is an environment variable created during deployment. Join this path with the filename of the model file.
    # It holds the path to the directory that contains the deployed model (./azureml-models/$MODEL_NAME/$VERSION).
    # If there are multiple models, this value is the path to the directory containing all deployed models (./azureml-models).
    global model

    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')

    # Get the path to the deployed model file and load it
    # Deserialize the model file back into a sklearn model
    model = joblib.load(model_path)


input_sample = PandasParameterType(pd.DataFrame({
    'input1': [0.0, 20.0],
    'input2': [0.0, 20.0], 
    'input2': [0.0, 20.0]
    }))


output_sample = PandasParameterType(pd.DataFrame([0.8, 0.2]))

# Called when a request is received
@input_schema('data', input_sample)
@output_schema(output_sample)
def run(data):
    try:
        result = model.predict(data)
        # You can return any data type, as long as it is JSON serializable.
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

Answer 2

Microsoft documentation 说：“为了为自动化 Web 服务消费生成符合标准的 swagger，评分脚本运行() 函数必须具有 API 形状：

A first parameter of type "StandardPythonParameterType", named Inputs and nested.

An optional second parameter of type "StandardPythonParameterType", named GlobalParameters.

Return a dictionary of type "StandardPythonParameterType" named Results and nested."

我已经测试过了，它区分大小写所以它会是这样的：

import numpy as np
import pandas as pd
import joblib

from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import 
    StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType

def init():
    global model
    # Model name is the name of the model registered under the workspace
    model_path = Model.get_model_path(model_name = 'databricksmodelpowerbi2')
    model = joblib.load(model_path)

# Provide 3 sample inputs for schema generation for 2 rows of data
numpy_sample_input = NumpyParameterType(np.array([[2400.0, 78.26086956521739, 11100.0, 
3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 
73.88059701492537, 44.0, 0.0]], dtype = 'float64'))

pandas_sample_input = PandasParameterType(pd.DataFrame({'value': [2400.0, 368.55], 
'delayed_percent': [78.26086956521739, 96.88311688311687], 'total_value_delayed': 
[11100.0, 709681.1600000012], 'num_invoices_per30_dealing_days': [3.612565445026178, 
73.88059701492537], 'delayed_streak': [3.0, 44.0], 'prompt_streak': [0.0, 0.0]}))

standard_sample_input = StandardPythonParameterType(0.0)

# This is a nested input sample, any item wrapped by `ParameterType` will be described 
by schema
sample_input = StandardPythonParameterType({'input1': numpy_sample_input, 
                                         'input2': pandas_sample_input, 
                                         'input3': standard_sample_input})

sample_global_parameters = StandardPythonParameterType(1.0) #this is optional

numpy_sample_output = NumpyParameterType(np.array([1.0, 2.0]))

# 'Results' is case sensitive
sample_output = StandardPythonParameterType({'Results': numpy_sample_output})

# 'Inputs' is case sensitive
@input_schema('Inputs', sample_input)
@input_schema('global_parameters', sample_global_parameters) #this is optional
@output_schema(sample_output)
def run(Inputs, global_parameters):
    try:
        data = inputs['input1']
        # data will be convert to target format
        assert isinstance(data, np.ndarray)
        result = model.predict(data)
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

`

Azure ML 推理架构 - "List index out of range" 错误

Azure ML Inference Schema - "List index out of range" error

schema

azure

endpoint

azure-machine-learning-studio

powerbi