Azure ML 推理架构 - "List index out of range" 错误
Azure ML Inference Schema - "List index out of range" error
我在 Azure ML Studio 上部署了一个 ML 模型,我正在使用推理架构更新它以允许与 Power BI 兼容,如 here 所述。
通过 REST api 向模型发送数据时(在添加此推理模式之前),一切正常,我得到了返回结果。但是,一旦按照上面链接的说明添加模式并对我的数据进行个性化设置,通过 REST api 发送的相同数据仅 returns 错误“列表索引超出范围”。部署进展顺利,被指定为“健康”,没有任何错误消息。
如有任何帮助,我们将不胜感激。谢谢
编辑:
入口脚本:
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
def init():
global model
#Model name is the name of the model registered under the workspace
model_path = Model.get_model_path(model_name = 'databricksmodelpowerbi2')
model = joblib.load(model_path)
#Provide 3 sample inputs for schema generation for 2 rows of data
numpy_sample_input = NumpyParameterType(np.array([[2400.0, 78.26086956521739, 11100.0, 3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 73.88059701492537, 44.0, 0.0]], dtype = 'float64'))
pandas_sample_input = PandasParameterType(pd.DataFrame({'1': [2400.0, 368.55], '2': [78.26086956521739, 96.88311688311687], '3': [11100.0, 709681.1600000012], '4': [3.612565445026178, 73.88059701492537], '5': [3.0, 44.0], '6': [0.0, 0.0]}))
standard_sample_input = StandardPythonParameterType(0.0)
# This is a nested input sample, any item wrapped by `ParameterType` will be described by schema
sample_input = StandardPythonParameterType({'input1': numpy_sample_input,
'input2': pandas_sample_input,
'input3': standard_sample_input})
sample_global_parameters = StandardPythonParameterType(1.0) #this is optional
sample_output = StandardPythonParameterType([1.0, 1.0])
@input_schema('inputs', sample_input)
@input_schema('global_parameters', sample_global_parameters) #this is optional
@output_schema(sample_output)
def run(inputs, global_parameters):
try:
data = inputs['input1']
# data will be convert to target format
assert isinstance(data, np.ndarray)
result = model.predict(data)
return result.tolist()
except Exception as e:
error = str(e)
return error
预测脚本:
import requests
import json
from ast import literal_eval
# URL for the web service
scoring_uri = ''
## If the service is authenticated, set the key or token
#key = '<your key or token>'
# Two sets of data to score, so we get two results back
data = {"data": [[2400.0, 78.26086956521739, 11100.0, 3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 73.88059701492537, 44.0, 0.0]]}
# Convert to JSON string
input_data = json.dumps(data)
# Set the content type
headers = {'Content-Type': 'application/json'}
## If authentication is enabled, set the authorization header
#headers['Authorization'] = f'Bearer {key}'
# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)
result = literal_eval(resp.text)
我不确定你是否已经弄明白了,但我遇到了类似的问题,我无法让 Power BI 查看我的 ML 模型。最后,我使用以下架构专门为 Power BI(pandas df 类型)创建了一个服务:
import json
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.ensemble import RandomForestClassifier
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
import pickle
import azureml.train.automl
# Called when the service is loaded
def init():
# AZUREML_MODEL_DIR is an environment variable created during deployment. Join this path with the filename of the model file.
# It holds the path to the directory that contains the deployed model (./azureml-models/$MODEL_NAME/$VERSION).
# If there are multiple models, this value is the path to the directory containing all deployed models (./azureml-models).
global model
model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
# Get the path to the deployed model file and load it
# Deserialize the model file back into a sklearn model
model = joblib.load(model_path)
input_sample = PandasParameterType(pd.DataFrame({
'input1': [0.0, 20.0],
'input2': [0.0, 20.0],
'input2': [0.0, 20.0]
}))
output_sample = PandasParameterType(pd.DataFrame([0.8, 0.2]))
# Called when a request is received
@input_schema('data', input_sample)
@output_schema(output_sample)
def run(data):
try:
result = model.predict(data)
# You can return any data type, as long as it is JSON serializable.
return result.tolist()
except Exception as e:
error = str(e)
return error
Microsoft documentation 说:“为了为自动化 Web 服务消费生成符合标准的 swagger,评分脚本 运行() 函数必须具有 API 形状:
A first parameter of type "StandardPythonParameterType", named
Inputs and nested.
An optional second parameter of type "StandardPythonParameterType",
named GlobalParameters.
Return a dictionary of type "StandardPythonParameterType" named
Results and nested."
我已经测试过了,它区分大小写
所以它会是这样的:
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import
StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
def init():
global model
# Model name is the name of the model registered under the workspace
model_path = Model.get_model_path(model_name = 'databricksmodelpowerbi2')
model = joblib.load(model_path)
# Provide 3 sample inputs for schema generation for 2 rows of data
numpy_sample_input = NumpyParameterType(np.array([[2400.0, 78.26086956521739, 11100.0,
3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012,
73.88059701492537, 44.0, 0.0]], dtype = 'float64'))
pandas_sample_input = PandasParameterType(pd.DataFrame({'value': [2400.0, 368.55],
'delayed_percent': [78.26086956521739, 96.88311688311687], 'total_value_delayed':
[11100.0, 709681.1600000012], 'num_invoices_per30_dealing_days': [3.612565445026178,
73.88059701492537], 'delayed_streak': [3.0, 44.0], 'prompt_streak': [0.0, 0.0]}))
standard_sample_input = StandardPythonParameterType(0.0)
# This is a nested input sample, any item wrapped by `ParameterType` will be described
by schema
sample_input = StandardPythonParameterType({'input1': numpy_sample_input,
'input2': pandas_sample_input,
'input3': standard_sample_input})
sample_global_parameters = StandardPythonParameterType(1.0) #this is optional
numpy_sample_output = NumpyParameterType(np.array([1.0, 2.0]))
# 'Results' is case sensitive
sample_output = StandardPythonParameterType({'Results': numpy_sample_output})
# 'Inputs' is case sensitive
@input_schema('Inputs', sample_input)
@input_schema('global_parameters', sample_global_parameters) #this is optional
@output_schema(sample_output)
def run(Inputs, global_parameters):
try:
data = inputs['input1']
# data will be convert to target format
assert isinstance(data, np.ndarray)
result = model.predict(data)
return result.tolist()
except Exception as e:
error = str(e)
return error
`
我在 Azure ML Studio 上部署了一个 ML 模型,我正在使用推理架构更新它以允许与 Power BI 兼容,如 here 所述。
通过 REST api 向模型发送数据时(在添加此推理模式之前),一切正常,我得到了返回结果。但是,一旦按照上面链接的说明添加模式并对我的数据进行个性化设置,通过 REST api 发送的相同数据仅 returns 错误“列表索引超出范围”。部署进展顺利,被指定为“健康”,没有任何错误消息。
如有任何帮助,我们将不胜感激。谢谢
编辑:
入口脚本:
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
def init():
global model
#Model name is the name of the model registered under the workspace
model_path = Model.get_model_path(model_name = 'databricksmodelpowerbi2')
model = joblib.load(model_path)
#Provide 3 sample inputs for schema generation for 2 rows of data
numpy_sample_input = NumpyParameterType(np.array([[2400.0, 78.26086956521739, 11100.0, 3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 73.88059701492537, 44.0, 0.0]], dtype = 'float64'))
pandas_sample_input = PandasParameterType(pd.DataFrame({'1': [2400.0, 368.55], '2': [78.26086956521739, 96.88311688311687], '3': [11100.0, 709681.1600000012], '4': [3.612565445026178, 73.88059701492537], '5': [3.0, 44.0], '6': [0.0, 0.0]}))
standard_sample_input = StandardPythonParameterType(0.0)
# This is a nested input sample, any item wrapped by `ParameterType` will be described by schema
sample_input = StandardPythonParameterType({'input1': numpy_sample_input,
'input2': pandas_sample_input,
'input3': standard_sample_input})
sample_global_parameters = StandardPythonParameterType(1.0) #this is optional
sample_output = StandardPythonParameterType([1.0, 1.0])
@input_schema('inputs', sample_input)
@input_schema('global_parameters', sample_global_parameters) #this is optional
@output_schema(sample_output)
def run(inputs, global_parameters):
try:
data = inputs['input1']
# data will be convert to target format
assert isinstance(data, np.ndarray)
result = model.predict(data)
return result.tolist()
except Exception as e:
error = str(e)
return error
预测脚本:
import requests
import json
from ast import literal_eval
# URL for the web service
scoring_uri = ''
## If the service is authenticated, set the key or token
#key = '<your key or token>'
# Two sets of data to score, so we get two results back
data = {"data": [[2400.0, 78.26086956521739, 11100.0, 3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 73.88059701492537, 44.0, 0.0]]}
# Convert to JSON string
input_data = json.dumps(data)
# Set the content type
headers = {'Content-Type': 'application/json'}
## If authentication is enabled, set the authorization header
#headers['Authorization'] = f'Bearer {key}'
# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)
result = literal_eval(resp.text)
我不确定你是否已经弄明白了,但我遇到了类似的问题,我无法让 Power BI 查看我的 ML 模型。最后,我使用以下架构专门为 Power BI(pandas df 类型)创建了一个服务:
import json
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.ensemble import RandomForestClassifier
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
import pickle
import azureml.train.automl
# Called when the service is loaded
def init():
# AZUREML_MODEL_DIR is an environment variable created during deployment. Join this path with the filename of the model file.
# It holds the path to the directory that contains the deployed model (./azureml-models/$MODEL_NAME/$VERSION).
# If there are multiple models, this value is the path to the directory containing all deployed models (./azureml-models).
global model
model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
# Get the path to the deployed model file and load it
# Deserialize the model file back into a sklearn model
model = joblib.load(model_path)
input_sample = PandasParameterType(pd.DataFrame({
'input1': [0.0, 20.0],
'input2': [0.0, 20.0],
'input2': [0.0, 20.0]
}))
output_sample = PandasParameterType(pd.DataFrame([0.8, 0.2]))
# Called when a request is received
@input_schema('data', input_sample)
@output_schema(output_sample)
def run(data):
try:
result = model.predict(data)
# You can return any data type, as long as it is JSON serializable.
return result.tolist()
except Exception as e:
error = str(e)
return error
Microsoft documentation 说:“为了为自动化 Web 服务消费生成符合标准的 swagger,评分脚本 运行() 函数必须具有 API 形状:
A first parameter of type "StandardPythonParameterType", named Inputs and nested.
An optional second parameter of type "StandardPythonParameterType", named GlobalParameters.
Return a dictionary of type "StandardPythonParameterType" named Results and nested."
我已经测试过了,它区分大小写 所以它会是这样的:
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import
StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
def init():
global model
# Model name is the name of the model registered under the workspace
model_path = Model.get_model_path(model_name = 'databricksmodelpowerbi2')
model = joblib.load(model_path)
# Provide 3 sample inputs for schema generation for 2 rows of data
numpy_sample_input = NumpyParameterType(np.array([[2400.0, 78.26086956521739, 11100.0,
3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012,
73.88059701492537, 44.0, 0.0]], dtype = 'float64'))
pandas_sample_input = PandasParameterType(pd.DataFrame({'value': [2400.0, 368.55],
'delayed_percent': [78.26086956521739, 96.88311688311687], 'total_value_delayed':
[11100.0, 709681.1600000012], 'num_invoices_per30_dealing_days': [3.612565445026178,
73.88059701492537], 'delayed_streak': [3.0, 44.0], 'prompt_streak': [0.0, 0.0]}))
standard_sample_input = StandardPythonParameterType(0.0)
# This is a nested input sample, any item wrapped by `ParameterType` will be described
by schema
sample_input = StandardPythonParameterType({'input1': numpy_sample_input,
'input2': pandas_sample_input,
'input3': standard_sample_input})
sample_global_parameters = StandardPythonParameterType(1.0) #this is optional
numpy_sample_output = NumpyParameterType(np.array([1.0, 2.0]))
# 'Results' is case sensitive
sample_output = StandardPythonParameterType({'Results': numpy_sample_output})
# 'Inputs' is case sensitive
@input_schema('Inputs', sample_input)
@input_schema('global_parameters', sample_global_parameters) #this is optional
@output_schema(sample_output)
def run(Inputs, global_parameters):
try:
data = inputs['input1']
# data will be convert to target format
assert isinstance(data, np.ndarray)
result = model.predict(data)
return result.tolist()
except Exception as e:
error = str(e)
return error
`