Microsoft SQL 和 R、存储过程和 k-means
Microsoft SQL and R, stored procedure and k-means
我是新来的,希望得到帮助和帮助。
但是,我正在开发新的 Microsoft Sql Server Management Studio (2016),使用它的新功能,这意味着与 R 的集成。
首先,我的目标是创建一个存储过程,该过程使用 x 和 y 列执行 K-Means 聚类。
问题是我被卡在了中间,因为我无法拒绝对我的案例的在线文档。
这里是脚本
CREATE TABLE [dbo].[ModelTable]
(
column_name1 varchar(8000)
)
;
CREATE TABLE [dbo].[ResultTable]
(
column_name1 varchar(8000),
column_name2 varchar(8000),
column_name3 varchar(8000),
column_name4 varchar(8000)
)
;
CREATE PROCEDURE [dbo].[kmean]
AS
BEGIN
DECLARE @inquery nvarchar(max) = N'
select name,x,y FROM [dbtable]
'
-- then I decide to insert the model in a table: this is similar to the documentation, but I am not sure it fits well.
INSERT INTO [dbo].[ModelTable]
EXEC sp_execute_external_script @language = N'R',
@script = N'
## Here I create model: this is one of the biggest problem, because I tried to create a data frame with the data, but I do not know if here,
## in the R code, the data are read in this way. Generally in "pure" R, I write data.frame(sourcedata$x,sourcedata$y), but here, where is source of data?
## In the documentation it is used ImputDataSet, so maybe I could do:
trained_model <- kmeans(data.frame(ImputDataSet$x,ImputDataSet$y),8)
-- If everything is ok (doubtfully) I should have the model. And here, the part that I really cannot handle.
-- I'd like to have a table [ResultTable] with name, variable x, variable y, and trainedmodel$cluster.
',
@input_data_1 = @inquery,
@output_data_1_name = N'trained_model'
;
END
GO
EXEC kmean
嗯,还有很多问题等等,由于这是MSSMS中比较新的功能,所以网上的帮助等也不是很多。
提前致谢
我们可以尝试以下方法:
CREATE TABLE #tempData (x float not null, y float not null);
INSERT INTO #tempData VALUES (0, 0), (0.1, 0.1), (1, 1), (1.1, 1.1);
CREATE TABLE #output (x float, y float, Cluster int);
INSERT INTO #output
EXECUTE sp_execute_external_script
@language = N'R'
, @script = N'
trained_model <- kmeans(df[, c("x", "y")], 2)
df$cluster <- trained_model$cluster
'
, @input_data_1 = N'SELECT * from #tempData'
, @output_data_1_name = N'df'
, @input_data_1_name = N'df';
SELECT *
FROM #output
输出:
x y Cluster
0 0 1
0.1 0.1 1
1 1 2
1.1 1.1 2
请注意,我将输入和输出数据指定为 df
。默认值是 InputDataSet
和 OutputDataSet
.
如果您有更长的 R 脚本:我建议您在 R 环境中编写和测试它们,然后将它们保存在一个包中,然后只需加载并调用它们。
我是新来的,希望得到帮助和帮助。
但是,我正在开发新的 Microsoft Sql Server Management Studio (2016),使用它的新功能,这意味着与 R 的集成。 首先,我的目标是创建一个存储过程,该过程使用 x 和 y 列执行 K-Means 聚类。
问题是我被卡在了中间,因为我无法拒绝对我的案例的在线文档。
这里是脚本
CREATE TABLE [dbo].[ModelTable]
(
column_name1 varchar(8000)
)
;
CREATE TABLE [dbo].[ResultTable]
(
column_name1 varchar(8000),
column_name2 varchar(8000),
column_name3 varchar(8000),
column_name4 varchar(8000)
)
;
CREATE PROCEDURE [dbo].[kmean]
AS
BEGIN
DECLARE @inquery nvarchar(max) = N'
select name,x,y FROM [dbtable]
'
-- then I decide to insert the model in a table: this is similar to the documentation, but I am not sure it fits well.
INSERT INTO [dbo].[ModelTable]
EXEC sp_execute_external_script @language = N'R',
@script = N'
## Here I create model: this is one of the biggest problem, because I tried to create a data frame with the data, but I do not know if here,
## in the R code, the data are read in this way. Generally in "pure" R, I write data.frame(sourcedata$x,sourcedata$y), but here, where is source of data?
## In the documentation it is used ImputDataSet, so maybe I could do:
trained_model <- kmeans(data.frame(ImputDataSet$x,ImputDataSet$y),8)
-- If everything is ok (doubtfully) I should have the model. And here, the part that I really cannot handle.
-- I'd like to have a table [ResultTable] with name, variable x, variable y, and trainedmodel$cluster.
',
@input_data_1 = @inquery,
@output_data_1_name = N'trained_model'
;
END
GO
EXEC kmean
嗯,还有很多问题等等,由于这是MSSMS中比较新的功能,所以网上的帮助等也不是很多。 提前致谢
我们可以尝试以下方法:
CREATE TABLE #tempData (x float not null, y float not null);
INSERT INTO #tempData VALUES (0, 0), (0.1, 0.1), (1, 1), (1.1, 1.1);
CREATE TABLE #output (x float, y float, Cluster int);
INSERT INTO #output
EXECUTE sp_execute_external_script
@language = N'R'
, @script = N'
trained_model <- kmeans(df[, c("x", "y")], 2)
df$cluster <- trained_model$cluster
'
, @input_data_1 = N'SELECT * from #tempData'
, @output_data_1_name = N'df'
, @input_data_1_name = N'df';
SELECT *
FROM #output
输出:
x y Cluster
0 0 1
0.1 0.1 1
1 1 2
1.1 1.1 2
请注意,我将输入和输出数据指定为 df
。默认值是 InputDataSet
和 OutputDataSet
.
如果您有更长的 R 脚本:我建议您在 R 环境中编写和测试它们,然后将它们保存在一个包中,然后只需加载并调用它们。