如何在预测阶段用mlr3包解码JSON数据?
How to decode JSON data with mlr3 package in the prediction phase?
我已经用 mlr3
包开发了一个 graphlearner
,我想在 Rplumber
服务中发布它。但是,当我收到数据进行预测时(JSON 格式的数据),graphlearner
无法识别数据,因为 jsonlite
的 fromJSON
函数无法推断出正确的类型(在其上学习了图形)。你有解决办法吗?是否有一种机制可以在预测阶段管理 mlr3 中的 JSON 数据?
学习步骤
library(mlr3)
imp_missind = po("missind")
imp_fct = po("imputenewlvl", param_vals =list(affect_columns = selector_type("factor")))
imp_num = po("imputehist", param_vals =list(affect_columns = selector_type("numeric")))
learner = lrn('regr.ranger')
graph = po("copy", 2) %>>%
gunion(list(imp_missind, imp_num %>>% imp_fct)) %>>%
po("featureunion") %>>%
po(learner)
t1 = tsk("boston_housing")
g1 = GraphLearner$new(graph)
g1$train(t1)
saveRDS(g1,'my-model')
预测步骤:有效(将数据模拟为预测,删除目标列)
data=t1$data()[1:1,-1]
model = readRDS('my-model')
model$predict_newdata(newdata=data)
预测步骤:它不起作用(模拟JSON数据进行预测)
model = readRDS('my-model')
data = t1$data()[1:1,-1]
json = fromJSON(toJSON(data, na="string"))
model$predict_newdata(newdata=json)
和错误:
错误:无法绑定任务:类型与列不匹配:cmedv(数字!=整数)
更新 可重现的例子
library(mlr3learners)
library(mlr3)
library(mlr3pipelines)
library(jsonlite)
imp_missind = po("missind")
imp_fct = po("imputenewlvl", param_vals =list(affect_columns = selector_type("factor")))
imp_num = po("imputehist", param_vals =list(affect_columns = selector_type("numeric")))
learner = lrn('regr.ranger')
graph = po("copy", 2) %>>%
gunion(list(imp_missind, imp_num %>>% imp_fct)) %>>%
po("featureunion") %>>%
po(learner)
task = tsk("boston_housing")
graphlearner = GraphLearner$new(graph)
#train model
graphlearner$train(task)
# create data to predict (juste one observation)
data= task$data()
data[1:1, chas := NA]
data = data[1:1,-1]
# look the the types of columns
str(data)
# predictin, this works fine
predict(graphlearner, data)
# simulate the case when json data is received
json_data = toJSON(data, na="string")
print(json_data)
# get R data from json formatted data
data_from_json = fromJSON(json_data)
# look the types of columns, some are different numeric != integer, factor != char
str(data_from_json)
# try to predict, this does not work, get erro : cmedv (numeric != integer)
predict(graphlearner,data_from_json)
我想我们可能想在某个时候修复这个问题,但在修复可用之前,我建议通过修复您保存的架构来解决问题 task$feature_types
:
library(mlr3misc)
repair_schema = function(data, feature_types) {
imap_dtc(data, function(v, k) {
ft_type = feature_types[id == k,][["type"]]
if (typeof(v) != ft_type) {
fn = switch(ft_type,
"character" = as.character,
"factor" = as.factor,
"numeric" = as.numeric,
"integer" = as.integer
)
v = fn(v)
}
return(v)
})
}
data_from_json2 = repair_schema(data_from_json, task$feature_types)
predict(graphlearner,data_from_json2)
这种方法还可以为您提供更大的灵活性,因为您可能会遇到一系列无法总是预料到的编码问题。
我已经用 mlr3
包开发了一个 graphlearner
,我想在 Rplumber
服务中发布它。但是,当我收到数据进行预测时(JSON 格式的数据),graphlearner
无法识别数据,因为 jsonlite
的 fromJSON
函数无法推断出正确的类型(在其上学习了图形)。你有解决办法吗?是否有一种机制可以在预测阶段管理 mlr3 中的 JSON 数据?
学习步骤
library(mlr3)
imp_missind = po("missind")
imp_fct = po("imputenewlvl", param_vals =list(affect_columns = selector_type("factor")))
imp_num = po("imputehist", param_vals =list(affect_columns = selector_type("numeric")))
learner = lrn('regr.ranger')
graph = po("copy", 2) %>>%
gunion(list(imp_missind, imp_num %>>% imp_fct)) %>>%
po("featureunion") %>>%
po(learner)
t1 = tsk("boston_housing")
g1 = GraphLearner$new(graph)
g1$train(t1)
saveRDS(g1,'my-model')
预测步骤:有效(将数据模拟为预测,删除目标列)
data=t1$data()[1:1,-1]
model = readRDS('my-model')
model$predict_newdata(newdata=data)
预测步骤:它不起作用(模拟JSON数据进行预测)
model = readRDS('my-model')
data = t1$data()[1:1,-1]
json = fromJSON(toJSON(data, na="string"))
model$predict_newdata(newdata=json)
和错误:
错误:无法绑定任务:类型与列不匹配:cmedv(数字!=整数)
更新 可重现的例子
library(mlr3learners)
library(mlr3)
library(mlr3pipelines)
library(jsonlite)
imp_missind = po("missind")
imp_fct = po("imputenewlvl", param_vals =list(affect_columns = selector_type("factor")))
imp_num = po("imputehist", param_vals =list(affect_columns = selector_type("numeric")))
learner = lrn('regr.ranger')
graph = po("copy", 2) %>>%
gunion(list(imp_missind, imp_num %>>% imp_fct)) %>>%
po("featureunion") %>>%
po(learner)
task = tsk("boston_housing")
graphlearner = GraphLearner$new(graph)
#train model
graphlearner$train(task)
# create data to predict (juste one observation)
data= task$data()
data[1:1, chas := NA]
data = data[1:1,-1]
# look the the types of columns
str(data)
# predictin, this works fine
predict(graphlearner, data)
# simulate the case when json data is received
json_data = toJSON(data, na="string")
print(json_data)
# get R data from json formatted data
data_from_json = fromJSON(json_data)
# look the types of columns, some are different numeric != integer, factor != char
str(data_from_json)
# try to predict, this does not work, get erro : cmedv (numeric != integer)
predict(graphlearner,data_from_json)
我想我们可能想在某个时候修复这个问题,但在修复可用之前,我建议通过修复您保存的架构来解决问题 task$feature_types
:
library(mlr3misc)
repair_schema = function(data, feature_types) {
imap_dtc(data, function(v, k) {
ft_type = feature_types[id == k,][["type"]]
if (typeof(v) != ft_type) {
fn = switch(ft_type,
"character" = as.character,
"factor" = as.factor,
"numeric" = as.numeric,
"integer" = as.integer
)
v = fn(v)
}
return(v)
})
}
data_from_json2 = repair_schema(data_from_json, task$feature_types)
predict(graphlearner,data_from_json2)
这种方法还可以为您提供更大的灵活性,因为您可能会遇到一系列无法总是预料到的编码问题。