是否可以将转换代码添加到 pmml?
Is it possible to add transformation code to pmml?
我运行一个线性回归代码并生成了一个pmml。在我的 pmml 代码中有数据类型和回归 table 信息。我有这样的 pmml 代码
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<PMML version="4.2"
xmlns="http://www.dmg.org/PMML-4_2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Header>
<Application name="SAS(r)" version="9.4"/>
<Timestamp>2016-05-27 09:58:01</Timestamp>
</Header>
<DataDictionary numberOfFields="10">
<DataField name="Invoice" optype="continuous" dataType="double"/>
<DataField name="ZINT_Cylinders" optype="continuous" dataType="double"/>
<DataField name="ZINT_EngineSize" optype="continuous" dataType="double"/>
</DataDictionary>
<TransformationDictionary/>
<RegressionModel functionName="regression" targetFieldName="Invoice" normalizationMethod="none">
<MiningSchema>
<MiningField name="ZINT_Cylinders" usageType="active" optype="continuous"/>
<MiningField name="ZINT_EngineSize" usageType="active" optype="continuous"/>
</MiningSchema>
<Output>
<OutputField name="P_Invoice" displayName="Predicted: Invoice" optype="continuous" dataType="double" targetField="Invoice" feature="predictedValue"/>
</Output>
<Targets>
<Target field="Invoice" optype="continuous">
<TargetValue defaultValue="30014.700935"/>
</Target>
</Targets>
<LocalTransformations/>
<RegressionTable intercept="-4919.70174">
<NumericPredictor name="ZINT_Cylinders" coefficient="-0.007378626"/>
<NumericPredictor name="ZINT_EngineSize" coefficient="-0.147331595"/>
</RegressionTable>
</RegressionModel>
</PMML>
在回归步骤之前,我想转换我的源数据。例如,我想将这些 case when structure
添加到我的 pmml 中。可以这样做吗?
SELECT Invoice,
CASE
WHEN EngineSize < 2.9 THEN 20304.5142857143
WHEN EngineSize < 4.1 THEN 30378.8789808917
WHEN EngineSize >= 4.1 THEN 47119.9791666667
ELSE 30014.7009345794
END AS ZINT_EngineSize,
CASE
WHEN Cylinders < 4.5 THEN 18349.4452554745
WHEN Cylinders < 7 THEN 29472.3819095477
WHEN Cylinders >= 7 THEN 48558.847826087
ELSE 30014.7009345794
END AS ZINT_Cylinders
FROM MYSOURCE.MYTABLE;
您可以使用 Discretize
transformation 的作业:
<DerivedField name="ZINT_EngineSize" dataType="double" optype="continuous">
<Discretize field="EngineSize">
<DiscretizeBin binValue="20304.5142857143">
<!-- EngineSize < 2.9 -->
<Interval closure="openOpen" rightMargin="2.9"/>
</DiscretizeBin>
<DiscretizeBin binValue="30378.8789808917">
<!-- EngineSize >= 2.9 && EngineSize < 4.1 -->
<Interval closure="closedOpen" leftMargin="2.9" rightMargin="4.1"/>
</DiscretizeBin>
<DiscretizeBin binValue="47119.9791666667">
<!-- EngineSize >= 4.1 -->
<Interval closure="closedOpen" leftMargin="4.1"/>
</DiscretizeBin>
</Discretize>
</DerivedField>
您可以为 SAS 脚本构建解析器,并使用 JPMML-Model library.
自动生成和嵌入 Discretize
元素
我运行一个线性回归代码并生成了一个pmml。在我的 pmml 代码中有数据类型和回归 table 信息。我有这样的 pmml 代码
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<PMML version="4.2"
xmlns="http://www.dmg.org/PMML-4_2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<Header>
<Application name="SAS(r)" version="9.4"/>
<Timestamp>2016-05-27 09:58:01</Timestamp>
</Header>
<DataDictionary numberOfFields="10">
<DataField name="Invoice" optype="continuous" dataType="double"/>
<DataField name="ZINT_Cylinders" optype="continuous" dataType="double"/>
<DataField name="ZINT_EngineSize" optype="continuous" dataType="double"/>
</DataDictionary>
<TransformationDictionary/>
<RegressionModel functionName="regression" targetFieldName="Invoice" normalizationMethod="none">
<MiningSchema>
<MiningField name="ZINT_Cylinders" usageType="active" optype="continuous"/>
<MiningField name="ZINT_EngineSize" usageType="active" optype="continuous"/>
</MiningSchema>
<Output>
<OutputField name="P_Invoice" displayName="Predicted: Invoice" optype="continuous" dataType="double" targetField="Invoice" feature="predictedValue"/>
</Output>
<Targets>
<Target field="Invoice" optype="continuous">
<TargetValue defaultValue="30014.700935"/>
</Target>
</Targets>
<LocalTransformations/>
<RegressionTable intercept="-4919.70174">
<NumericPredictor name="ZINT_Cylinders" coefficient="-0.007378626"/>
<NumericPredictor name="ZINT_EngineSize" coefficient="-0.147331595"/>
</RegressionTable>
</RegressionModel>
</PMML>
在回归步骤之前,我想转换我的源数据。例如,我想将这些 case when structure
添加到我的 pmml 中。可以这样做吗?
SELECT Invoice,
CASE
WHEN EngineSize < 2.9 THEN 20304.5142857143
WHEN EngineSize < 4.1 THEN 30378.8789808917
WHEN EngineSize >= 4.1 THEN 47119.9791666667
ELSE 30014.7009345794
END AS ZINT_EngineSize,
CASE
WHEN Cylinders < 4.5 THEN 18349.4452554745
WHEN Cylinders < 7 THEN 29472.3819095477
WHEN Cylinders >= 7 THEN 48558.847826087
ELSE 30014.7009345794
END AS ZINT_Cylinders
FROM MYSOURCE.MYTABLE;
您可以使用 Discretize
transformation 的作业:
<DerivedField name="ZINT_EngineSize" dataType="double" optype="continuous">
<Discretize field="EngineSize">
<DiscretizeBin binValue="20304.5142857143">
<!-- EngineSize < 2.9 -->
<Interval closure="openOpen" rightMargin="2.9"/>
</DiscretizeBin>
<DiscretizeBin binValue="30378.8789808917">
<!-- EngineSize >= 2.9 && EngineSize < 4.1 -->
<Interval closure="closedOpen" leftMargin="2.9" rightMargin="4.1"/>
</DiscretizeBin>
<DiscretizeBin binValue="47119.9791666667">
<!-- EngineSize >= 4.1 -->
<Interval closure="closedOpen" leftMargin="4.1"/>
</DiscretizeBin>
</Discretize>
</DerivedField>
您可以为 SAS 脚本构建解析器,并使用 JPMML-Model library.
自动生成和嵌入Discretize
元素