无法创建 PMML,因为未指定输入要素的数量

PMML can not be created because the number of input features is not specified

我无法将以下管道转换为 pmml,因为 "the number of input features is not specified"。


import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn2pmml import sklearn2pmml
from sklearn2pmml.pipeline import PMMLPipeline

if __name__ == '__main__':

    data_dict = {
        'age': [1, 2, 3],
        'day_of_week': ['monday', 'tuesday', 'wednesday'],
        'y': [5, 6, 7]

    data = pd.DataFrame(data_dict, columns=data_dict)

    numeric_features = ['age']
    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())])

    categorical_features = ['day_of_week']
    categorical_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(handle_unknown='ignore', categories='auto'))])

    preprocessor = ColumnTransformer(
            ('numerical', numeric_transformer, numeric_features),
            ('categorical', categorical_transformer, categorical_features)])

    pipeline = Pipeline(
            ('preprocessor', preprocessor),
            ('classifier', RandomForestRegressor(n_estimators=60))])

    X = data.drop(labels=['y'], axis=1)
    y = data['y']

    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=30)

    trained_model = pipeline.fit(X=X_train, y=y_train)

    pmml_pipeline = PMMLPipeline([
        ("pipeline", pipeline)

    sklearn2pmml(pipeline=pmml_pipeline, pmml='RandomForestRegressor2.pmml', with_repr=True)

我从 sklearn2pmml 得到的 Java 错误信息是:

Standard output is empty
Standard error:
May 25, 2020 9:37:56 AM org.jpmml.sklearn.Main run
INFO: Parsing PKL..
May 25, 2020 9:38:07 AM org.jpmml.sklearn.Main run
INFO: Parsed PKL in 11453 ms.
May 25, 2020 9:38:07 AM org.jpmml.sklearn.Main run
INFO: Converting..
May 25, 2020 9:38:07 AM sklearn2pmml.pipeline.PMMLPipeline initTargetFields
WARNING: Attribute 'sklearn2pmml.pipeline.PMMLPipeline.target_fields' is not set. Assuming y as the name of the target field
May 25, 2020 9:38:07 AM org.jpmml.sklearn.Main run
SEVERE: Failed to convert
java.lang.IllegalArgumentException: The transformer object of the first step (Python class sklearn.pipeline.Pipeline) does not specify the number of input features
    at sklearn2pmml.pipeline.PMMLPipeline.initActiveFields(PMMLPipeline.java:522)
    at sklearn2pmml.pipeline.PMMLPipeline.encodePMML(PMMLPipeline.java:214)
    at org.jpmml.sklearn.Main.run(Main.java:228)
    at org.jpmml.sklearn.Main.main(Main.java:148)

Exception in thread "main" java.lang.IllegalArgumentException: The transformer object of the first step (Python class sklearn.pipeline.Pipeline) does not specify the number of input features
    at sklearn2pmml.pipeline.PMMLPipeline.initActiveFields(PMMLPipeline.java:522)
    at sklearn2pmml.pipeline.PMMLPipeline.encodePMML(PMMLPipeline.java:214)
    at org.jpmml.sklearn.Main.run(Main.java:228)
    at org.jpmml.sklearn.Main.main(Main.java:148)

Process finished with exit code 1

基于 sklearn.pipeline.Pipeline 创建单步 sklearn2pmml.pipline.PMMLPipeline 有什么意义?


pipeline = PMMLPipeline(
        ('preprocessor', preprocessor),
        ('classifier', RandomForestRegressor(n_estimators=60))])
pipeline.fit(X=X_train, y=y_train)
sklearn2pmml(pipeline, "RandomForestRegressor2.pmml")