如何使用 mlr 使用包 PMML 构建 XML 文件?
How to build XML file with package PMML using mlr?
我想转换 mlr-package directly into a XML-file using the package pmml 构建的逻辑模型。问题是 mlr 包装器构建的 model.learner 不包括列表中的模型 link,就像它在正常的 stats::glm 函数中一样。所以这是一个例子:
library(dplyr)
library(titanic)
library(pmml)
library(ParamHelpers)
library(mlr)
Titanic_data = select(titanic_train, Survived, Pclass, Sex, Age)
Titanic_data$Survived = as.factor(Titanic_data$Survived)
Titanic_data$Sex = as.factor(Titanic_data$Sex)
Titanic_data$Pclass = as.factor(Titanic_data$Pclass)
Titanic_data = na.omit(Titanic_data)
lrn <- makeLearner("classif.logreg", predict.type = "prob")
task = makeClassifTask(data = Titanic_data, target = "Survived", positive = "1")
model = train(lrn, task)
model_glm = glm(Survived ~ ., data = Titanic_data, family = "binomial")
str(model$learner.model) # list of 29
str(model_glm) # list of 30
如您所见,这两个模型的结构都是不同元素的列表,它们都是相同的,除了包装器中缺少模型这一事实。因此,我使用 pmml:
收到一条错误消息
pmml(model_glm)
# Error in pmml.glm(model$learner.model) : object 'model.link' not found
stats::glm 构建的正在运行:
pmml(model)
<PMML version="4.4" xmlns="http://www.dmg.org/PMML-4_4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_4 http://www.dmg.org/pmml/v4-4/pmml-4-4.xsd">
<Header copyright="Copyright (c) 2020 TBeige" description="Generalized Linear Regression Model">
<Extension name="user" value="TBeige" extender="SoftwareAG PMML Generator"/>
<Application name="SoftwareAG PMML Generator" version="2.3.1"/>
<Timestamp>2020-05-12 09:50:15</Timestamp>
</Header>
<DataDictionary numberOfFields="4">
<DataField name="Survived" optype="categorical" dataType="string">
<Value value="0"/>
<Value value="1"/>
</DataField>
<DataField name="Pclass" optype="categorical" dataType="string">
<Value value="1"/>
<Value value="2"/>
<Value value="3"/>
</DataField>
<DataField name="Sex" optype="categorical" dataType="string">
<Value value="female"/>
<Value value="male"/>
</DataField>
<DataField name="Age" optype="continuous" dataType="double"/>
</DataDictionary>
<GeneralRegressionModel modelName="General_Regression_Model" modelType="generalizedLinear" functionName="classification" algorithmName="glm" distribution="binomial" linkFunction="logit">
<MiningSchema>
<MiningField name="Survived" usageType="predicted" invalidValueTreatment="returnInvalid"/>
<MiningField name="Pclass" usageType="active" invalidValueTreatment="returnInvalid"/>
<MiningField name="Sex" usageType="active" invalidValueTreatment="returnInvalid"/>
<MiningField name="Age" usageType="active" invalidValueTreatment="returnInvalid"/>
</MiningSchema>
<Output>
<OutputField name="Probability_1" targetField="Survived" feature="probability" value="1" optype="continuous" dataType="double"/>
<OutputField name="Predicted_Survived" feature="predictedValue" optype="categorical" dataType="string"/>
</Output>
<ParameterList>
<Parameter name="p0" label="(Intercept)"/>
<Parameter name="p1" label="Pclass2"/>
<Parameter name="p2" label="Pclass3"/>
<Parameter name="p3" label="Sexmale"/>
<Parameter name="p4" label="Age"/>
</ParameterList>
<FactorList>
<Predictor name="Pclass"/>
<Predictor name="Sex"/>
</FactorList>
<CovariateList>
<Predictor name="Age"/>
</CovariateList>
<PPMatrix>
<PPCell value="2" predictorName="Pclass" parameterName="p1"/>
<PPCell value="3" predictorName="Pclass" parameterName="p2"/>
<PPCell value="male" predictorName="Sex" parameterName="p3"/>
<PPCell value="1" predictorName="Age" parameterName="p4"/>
</PPMatrix>
<ParamMatrix>
<PCell targetCategory="1" parameterName="p0" df="1" beta="3.77701265255885"/>
<PCell targetCategory="1" parameterName="p1" df="1" beta="-1.30979926778885"/>
<PCell targetCategory="1" parameterName="p2" df="1" beta="-2.58062531749203"/>
<PCell targetCategory="1" parameterName="p3" df="1" beta="-2.52278091988034"/>
<PCell targetCategory="1" parameterName="p4" df="1" beta="-0.0369852655754339"/>
</ParamMatrix>
</GeneralRegressionModel>
</PMML>
知道如何使用 mlr 并使用 pmml 创建 xml 查找吗?
问题好像在里面pmml
来自pmml::pmml.glm
:
if (model$call[[1]] == "glm") {
model.type <- model$family$family
model.link <- model$family$link
}
else {
model.type <- "unknown"
}
在 mlr 模型中我们有
model$learner.model$call[[1]]
# stats::glm
所以你可以破解
model$learner.model$call[[1]] = "glm"
然后
pmml(model$learner.model)
有效。
老实说,pmml
包中的代码似乎很奇怪。
我想转换 mlr-package directly into a XML-file using the package pmml 构建的逻辑模型。问题是 mlr 包装器构建的 model.learner 不包括列表中的模型 link,就像它在正常的 stats::glm 函数中一样。所以这是一个例子:
library(dplyr)
library(titanic)
library(pmml)
library(ParamHelpers)
library(mlr)
Titanic_data = select(titanic_train, Survived, Pclass, Sex, Age)
Titanic_data$Survived = as.factor(Titanic_data$Survived)
Titanic_data$Sex = as.factor(Titanic_data$Sex)
Titanic_data$Pclass = as.factor(Titanic_data$Pclass)
Titanic_data = na.omit(Titanic_data)
lrn <- makeLearner("classif.logreg", predict.type = "prob")
task = makeClassifTask(data = Titanic_data, target = "Survived", positive = "1")
model = train(lrn, task)
model_glm = glm(Survived ~ ., data = Titanic_data, family = "binomial")
str(model$learner.model) # list of 29
str(model_glm) # list of 30
如您所见,这两个模型的结构都是不同元素的列表,它们都是相同的,除了包装器中缺少模型这一事实。因此,我使用 pmml:
收到一条错误消息pmml(model_glm)
# Error in pmml.glm(model$learner.model) : object 'model.link' not found
stats::glm 构建的正在运行:
pmml(model)
<PMML version="4.4" xmlns="http://www.dmg.org/PMML-4_4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_4 http://www.dmg.org/pmml/v4-4/pmml-4-4.xsd">
<Header copyright="Copyright (c) 2020 TBeige" description="Generalized Linear Regression Model">
<Extension name="user" value="TBeige" extender="SoftwareAG PMML Generator"/>
<Application name="SoftwareAG PMML Generator" version="2.3.1"/>
<Timestamp>2020-05-12 09:50:15</Timestamp>
</Header>
<DataDictionary numberOfFields="4">
<DataField name="Survived" optype="categorical" dataType="string">
<Value value="0"/>
<Value value="1"/>
</DataField>
<DataField name="Pclass" optype="categorical" dataType="string">
<Value value="1"/>
<Value value="2"/>
<Value value="3"/>
</DataField>
<DataField name="Sex" optype="categorical" dataType="string">
<Value value="female"/>
<Value value="male"/>
</DataField>
<DataField name="Age" optype="continuous" dataType="double"/>
</DataDictionary>
<GeneralRegressionModel modelName="General_Regression_Model" modelType="generalizedLinear" functionName="classification" algorithmName="glm" distribution="binomial" linkFunction="logit">
<MiningSchema>
<MiningField name="Survived" usageType="predicted" invalidValueTreatment="returnInvalid"/>
<MiningField name="Pclass" usageType="active" invalidValueTreatment="returnInvalid"/>
<MiningField name="Sex" usageType="active" invalidValueTreatment="returnInvalid"/>
<MiningField name="Age" usageType="active" invalidValueTreatment="returnInvalid"/>
</MiningSchema>
<Output>
<OutputField name="Probability_1" targetField="Survived" feature="probability" value="1" optype="continuous" dataType="double"/>
<OutputField name="Predicted_Survived" feature="predictedValue" optype="categorical" dataType="string"/>
</Output>
<ParameterList>
<Parameter name="p0" label="(Intercept)"/>
<Parameter name="p1" label="Pclass2"/>
<Parameter name="p2" label="Pclass3"/>
<Parameter name="p3" label="Sexmale"/>
<Parameter name="p4" label="Age"/>
</ParameterList>
<FactorList>
<Predictor name="Pclass"/>
<Predictor name="Sex"/>
</FactorList>
<CovariateList>
<Predictor name="Age"/>
</CovariateList>
<PPMatrix>
<PPCell value="2" predictorName="Pclass" parameterName="p1"/>
<PPCell value="3" predictorName="Pclass" parameterName="p2"/>
<PPCell value="male" predictorName="Sex" parameterName="p3"/>
<PPCell value="1" predictorName="Age" parameterName="p4"/>
</PPMatrix>
<ParamMatrix>
<PCell targetCategory="1" parameterName="p0" df="1" beta="3.77701265255885"/>
<PCell targetCategory="1" parameterName="p1" df="1" beta="-1.30979926778885"/>
<PCell targetCategory="1" parameterName="p2" df="1" beta="-2.58062531749203"/>
<PCell targetCategory="1" parameterName="p3" df="1" beta="-2.52278091988034"/>
<PCell targetCategory="1" parameterName="p4" df="1" beta="-0.0369852655754339"/>
</ParamMatrix>
</GeneralRegressionModel>
</PMML>
知道如何使用 mlr 并使用 pmml 创建 xml 查找吗?
问题好像在里面pmml
来自pmml::pmml.glm
:
if (model$call[[1]] == "glm") {
model.type <- model$family$family
model.link <- model$family$link
}
else {
model.type <- "unknown"
}
在 mlr 模型中我们有
model$learner.model$call[[1]]
# stats::glm
所以你可以破解
model$learner.model$call[[1]] = "glm"
然后
pmml(model$learner.model)
有效。
老实说,pmml
包中的代码似乎很奇怪。