从sql自动传输数据到R
Automatic transfer data from the sql to R
我从sql服务器获取数据进行回归分析,然后将回归结果ireturn返回到另一个sqltable.
library("RODBC")
library(sqldf)
dbHandle <- odbcDriverConnect("driver={SQL Server};server=MYSERVER;database=MYBASE;trusted_connection=true")
sql <-
"select
Dt
,CustomerName
,ItemRelation
,SaleCount
,DocumentNum
,DocumentYear
,IsPromo
from dbo.mytable"
df <- sqlQuery(dbHandle, sql)
reg=lm(SaleCount~IsPromo,data=df)
#save to sql table
sqlSave(dbHandle, as.data.frame(reg), "dbo.mytableforecast", verbose = TRUE) # use "append = TRUE" to add rows to an existing table
odbcClose(dbHandle)
问题:
脚本自动运行,即在调度程序中有任务在特定时间启动脚本。
下次如何做到这一点,当脚本运行时,它不能与所有 table 一起工作,而只能与 sql 中加载的数据一起工作?
例如,今天加载了 100 个观测值。
从 01.01.2017-10.04.2017
脚本执行回归并将数据 return 编辑为 sql table。
明天将加载新的 100 个观测值。
11.04.2017-20.07.2017
IE。明天将加载数据并且脚本将在晚上 10 点开始,它必须仅适用于 11.04.2017-20.07.2017 的数据,而不是 01.01.2017-20.07.2017
我该怎么做?
来自 sql
的数据示例
df=structure(list(Dt = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L,
15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L,
18L, 19L), .Label = c("2017-10-12 00:00:00.000", "2017-10-13 00:00:00.000",
"2017-10-14 00:00:00.000", "2017-10-15 00:00:00.000", "2017-10-16 00:00:00.000",
"2017-10-17 00:00:00.000", "2017-10-18 00:00:00.000", "2017-10-19 00:00:00.000",
"2017-10-20 00:00:00.000", "2017-10-21 00:00:00.000", "2017-10-22 00:00:00.000",
"2017-10-23 00:00:00.000", "2017-10-24 00:00:00.000", "2017-10-25 00:00:00.000",
"2017-10-26 00:00:00.000", "2017-10-27 00:00:00.000", "2017-10-28 00:00:00.000",
"2017-10-29 00:00:00.000", "2017-10-30 00:00:00.000"), class = "factor"),
CustomerName = structure(c(1L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), .Label = c("x1", "x10", "x11", "x12", "x13", "x14",
"x15", "x16", "x17", "x18", "x2", "x3", "x4", "x5", "x6",
"x7", "x8", "x9"), class = "factor"), ItemRelation = c(13322L,
13322L, 13322L, 13322L, 13322L, 13322L, 13322L, 11706L, 13322L,
11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 11706L, 13322L,
11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 11706L, 13163L,
13322L, 158010L, 11706L, 13163L, 13322L, 158010L, 11706L,
13163L, 13322L, 158010L, 11706L), SaleCount = c(10L, 3L,
1L, 0L, 9L, 5L, 5L, 11L, 7L, 0L, 5L, 11L, 1L, 0L, 0L, 19L,
10L, 0L, 1L, 12L, 1L, 11L, 6L, 0L, 167L, 7L, 0L, 16L, 165L,
1L, 0L, 0L, 29L, 0L, 0L, 11L), DocumentNum = c(36L, 36L,
36L, 36L, 36L, 36L, 36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L,
36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 131L, 36L,
89L, 51L, 131L, 36L, 89L, 51L, 131L, 36L, 89L, 51L), DocumentYear = c(2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L),
IsPromo = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Dt", "CustomerName",
"ItemRelation", "SaleCount", "DocumentNum", "DocumentYear", "IsPromo"
), class = "data.frame", row.names = c(NA, -36L))
在结果中添加日期 sql table dbo.mytableforecast
更改 select SQL 语句,使其在
之后只有 selects 数据
select
Dt
,CustomerName
,ItemRelation
,SaleCount
,DocumentNum
,DocumentYear
,IsPromo
from dbo.mytable
where Dt > (select max(Dt) from dbo.mytableforecast)
我从sql服务器获取数据进行回归分析,然后将回归结果ireturn返回到另一个sqltable.
library("RODBC")
library(sqldf)
dbHandle <- odbcDriverConnect("driver={SQL Server};server=MYSERVER;database=MYBASE;trusted_connection=true")
sql <-
"select
Dt
,CustomerName
,ItemRelation
,SaleCount
,DocumentNum
,DocumentYear
,IsPromo
from dbo.mytable"
df <- sqlQuery(dbHandle, sql)
reg=lm(SaleCount~IsPromo,data=df)
#save to sql table
sqlSave(dbHandle, as.data.frame(reg), "dbo.mytableforecast", verbose = TRUE) # use "append = TRUE" to add rows to an existing table
odbcClose(dbHandle)
问题:
脚本自动运行,即在调度程序中有任务在特定时间启动脚本。 下次如何做到这一点,当脚本运行时,它不能与所有 table 一起工作,而只能与 sql 中加载的数据一起工作?
例如,今天加载了 100 个观测值。 从 01.01.2017-10.04.2017 脚本执行回归并将数据 return 编辑为 sql table。 明天将加载新的 100 个观测值。 11.04.2017-20.07.2017 IE。明天将加载数据并且脚本将在晚上 10 点开始,它必须仅适用于 11.04.2017-20.07.2017 的数据,而不是 01.01.2017-20.07.2017
我该怎么做?
来自 sql
的数据示例df=structure(list(Dt = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
8L, 9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L,
15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L,
18L, 19L), .Label = c("2017-10-12 00:00:00.000", "2017-10-13 00:00:00.000",
"2017-10-14 00:00:00.000", "2017-10-15 00:00:00.000", "2017-10-16 00:00:00.000",
"2017-10-17 00:00:00.000", "2017-10-18 00:00:00.000", "2017-10-19 00:00:00.000",
"2017-10-20 00:00:00.000", "2017-10-21 00:00:00.000", "2017-10-22 00:00:00.000",
"2017-10-23 00:00:00.000", "2017-10-24 00:00:00.000", "2017-10-25 00:00:00.000",
"2017-10-26 00:00:00.000", "2017-10-27 00:00:00.000", "2017-10-28 00:00:00.000",
"2017-10-29 00:00:00.000", "2017-10-30 00:00:00.000"), class = "factor"),
CustomerName = structure(c(1L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), .Label = c("x1", "x10", "x11", "x12", "x13", "x14",
"x15", "x16", "x17", "x18", "x2", "x3", "x4", "x5", "x6",
"x7", "x8", "x9"), class = "factor"), ItemRelation = c(13322L,
13322L, 13322L, 13322L, 13322L, 13322L, 13322L, 11706L, 13322L,
11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 11706L, 13322L,
11706L, 13322L, 11706L, 13322L, 11706L, 13322L, 11706L, 13163L,
13322L, 158010L, 11706L, 13163L, 13322L, 158010L, 11706L,
13163L, 13322L, 158010L, 11706L), SaleCount = c(10L, 3L,
1L, 0L, 9L, 5L, 5L, 11L, 7L, 0L, 5L, 11L, 1L, 0L, 0L, 19L,
10L, 0L, 1L, 12L, 1L, 11L, 6L, 0L, 167L, 7L, 0L, 16L, 165L,
1L, 0L, 0L, 29L, 0L, 0L, 11L), DocumentNum = c(36L, 36L,
36L, 36L, 36L, 36L, 36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L,
36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 36L, 51L, 131L, 36L,
89L, 51L, 131L, 36L, 89L, 51L, 131L, 36L, 89L, 51L), DocumentYear = c(2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L),
IsPromo = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Dt", "CustomerName",
"ItemRelation", "SaleCount", "DocumentNum", "DocumentYear", "IsPromo"
), class = "data.frame", row.names = c(NA, -36L))
在结果中添加日期 sql table dbo.mytableforecast
更改 select SQL 语句,使其在
之后只有 selects 数据select
Dt
,CustomerName
,ItemRelation
,SaleCount
,DocumentNum
,DocumentYear
,IsPromo
from dbo.mytable
where Dt > (select max(Dt) from dbo.mytableforecast)