从 R 更新到 MS SQL 服务器语法
Upsert from R to MS SQL Server Syntax
我希望能够使用更新插入,在其中更新现有的条件更改行或在新行可用时追加它们。我想在 R studio 中对此进行编码,并与 MS SQL 服务器配对。我已经使用 iris 数据集复制了下面的工作流程。我想我快到了,但还不能完全完成 SQL 查询。此外,欢迎就工作流程提出建议。
> pacman::p_load(DBI, dbplyr, dplyr, odbc)
>
> # Connection
> con <- DBI::dbConnect(odbc::odbc(),
+ Driver = "SQL Server",
+ Server = "localhost\SQLEXPRESS",
+ Database = "master",
+ Trusted_Connection = "True")
>
> Data.DWH <- dbReadTable(con, "iris") %>%
+ unite("Lookup_Key", colnames(select(., - ID)), remove = FALSE)
>
> # Data in data warehouse
> head(Data.DWH)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_0.2_setosa 5.1 3.5 1.4 0.2 setosa 1
2 4.9_3_1.4_0.2_setosa 4.9 3.0 1.4 0.2 setosa 2
3 4.7_3.2_1.3_0.2_setosa 4.7 3.2 1.3 0.2 setosa 3
4 4.6_3.1_1.5_0.2_setosa 4.6 3.1 1.5 0.2 setosa 4
5 5_3.6_1.4_0.2_setosa 5.0 3.6 1.4 0.2 setosa 5
6 5.4_3.9_1.7_0.4_setosa 5.4 3.9 1.7 0.4 setosa 6
>
> # New data example (created), 1 entry to append, 1 entry to ignore, 1 entry to update
>
> New.Data.Raw <- data.frame(stringsAsFactors=FALSE,
+ Sepal.Length = c(5.1, 1, 4.9),
+ Sepal.Width = c(3.5, 2, 3),
+ Petal.Length = c(1.4, 3, 1.4),
+ Petal.Width = c(2, 4, 0.2),
+ Species = c("setosa", "setosa", "setosa"),
+ ID = c(1, 151, 2)
+ ) %>% unite("Lookup_Key", colnames(select(., - ID)), remove = FALSE)
>
> head(New.Data.Raw)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_2_setosa 5.1 3.5 1.4 2.0 setosa 1
2 1_2_3_4_setosa 1.0 2.0 3.0 4.0 setosa 151
3 4.9_3_1.4_0.2_setosa 4.9 3.0 1.4 0.2 setosa 2
>
> # Ready for insert/update
> # check for changes in the look up key (mash up of row values) or new entries according to ID
> New.Data <- New.Data.Raw %>%
+ filter(!ID %in% Data.DWH$ID |
+ (ID %in% Data.DWH$ID & !Lookup_Key %in% Data.DWH$Lookup_Key))
>
> head(New.Data)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_2_setosa 5.1 3.5 1.4 2 setosa 1
2 1_2_3_4_setosa 1.0 2.0 3.0 4 setosa 151
>
> # Construct sql query for ms sql ===========================================================================
>
> # construct columns for query - produces 1 string
> cols <- paste0('(',paste0(colnames(New.Data), collapse=', '),')')
>
> # construct values for query - produce 1 string
> vals <- paste0(
+ apply(New.Data,1,function(x) paste0("('", paste0(x, collapse = "', '"), "')")), collapse = ", ")
>
> # construct update values for query
> insertVals <- paste0('(',paste0('s.',colnames(New.Data), collapse=', '),')')
>
> # construct update set for query
> updateSet <- paste0(colnames(New.Data%>%select(-ID)),
+ ' = s.',colnames(New.Data%>%select(-ID)), collapse=', ')
>
> # construct upsert query (does not currently work!)
> queryNew.Data <- paste0('MERGE iris AS t ',
+ 'USING (VALUES ',vals,') AS s',cols,
+ ' ON t.ID = s.ID ',
+ 'WHEN MATCHED THEN ',
+ 'UPDATE SET ',updateSet,
+ ' WHEN NOT MATCHED THEN ',
+ 'INSERT',cols,
+ ' VALUES',insertVals,';')
>
> queryNew.Data <- gsub("\b'\b","",queryNew.Data)
> queryNew.Data <- gsub("'NA'", 'NULL', queryNew.Data)
>
> # send the query to the database (no lucky currently)
> DBI::dbGetQuery(con, queryNew.Data)
解决了,代码工作正常。只需要修复列的命名约定。 SQL 不处理 '.'在名称中,因此更改为“_”,代码将按需要工作。
我希望能够使用更新插入,在其中更新现有的条件更改行或在新行可用时追加它们。我想在 R studio 中对此进行编码,并与 MS SQL 服务器配对。我已经使用 iris 数据集复制了下面的工作流程。我想我快到了,但还不能完全完成 SQL 查询。此外,欢迎就工作流程提出建议。
> pacman::p_load(DBI, dbplyr, dplyr, odbc)
>
> # Connection
> con <- DBI::dbConnect(odbc::odbc(),
+ Driver = "SQL Server",
+ Server = "localhost\SQLEXPRESS",
+ Database = "master",
+ Trusted_Connection = "True")
>
> Data.DWH <- dbReadTable(con, "iris") %>%
+ unite("Lookup_Key", colnames(select(., - ID)), remove = FALSE)
>
> # Data in data warehouse
> head(Data.DWH)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_0.2_setosa 5.1 3.5 1.4 0.2 setosa 1
2 4.9_3_1.4_0.2_setosa 4.9 3.0 1.4 0.2 setosa 2
3 4.7_3.2_1.3_0.2_setosa 4.7 3.2 1.3 0.2 setosa 3
4 4.6_3.1_1.5_0.2_setosa 4.6 3.1 1.5 0.2 setosa 4
5 5_3.6_1.4_0.2_setosa 5.0 3.6 1.4 0.2 setosa 5
6 5.4_3.9_1.7_0.4_setosa 5.4 3.9 1.7 0.4 setosa 6
>
> # New data example (created), 1 entry to append, 1 entry to ignore, 1 entry to update
>
> New.Data.Raw <- data.frame(stringsAsFactors=FALSE,
+ Sepal.Length = c(5.1, 1, 4.9),
+ Sepal.Width = c(3.5, 2, 3),
+ Petal.Length = c(1.4, 3, 1.4),
+ Petal.Width = c(2, 4, 0.2),
+ Species = c("setosa", "setosa", "setosa"),
+ ID = c(1, 151, 2)
+ ) %>% unite("Lookup_Key", colnames(select(., - ID)), remove = FALSE)
>
> head(New.Data.Raw)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_2_setosa 5.1 3.5 1.4 2.0 setosa 1
2 1_2_3_4_setosa 1.0 2.0 3.0 4.0 setosa 151
3 4.9_3_1.4_0.2_setosa 4.9 3.0 1.4 0.2 setosa 2
>
> # Ready for insert/update
> # check for changes in the look up key (mash up of row values) or new entries according to ID
> New.Data <- New.Data.Raw %>%
+ filter(!ID %in% Data.DWH$ID |
+ (ID %in% Data.DWH$ID & !Lookup_Key %in% Data.DWH$Lookup_Key))
>
> head(New.Data)
Lookup_Key Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID
1 5.1_3.5_1.4_2_setosa 5.1 3.5 1.4 2 setosa 1
2 1_2_3_4_setosa 1.0 2.0 3.0 4 setosa 151
>
> # Construct sql query for ms sql ===========================================================================
>
> # construct columns for query - produces 1 string
> cols <- paste0('(',paste0(colnames(New.Data), collapse=', '),')')
>
> # construct values for query - produce 1 string
> vals <- paste0(
+ apply(New.Data,1,function(x) paste0("('", paste0(x, collapse = "', '"), "')")), collapse = ", ")
>
> # construct update values for query
> insertVals <- paste0('(',paste0('s.',colnames(New.Data), collapse=', '),')')
>
> # construct update set for query
> updateSet <- paste0(colnames(New.Data%>%select(-ID)),
+ ' = s.',colnames(New.Data%>%select(-ID)), collapse=', ')
>
> # construct upsert query (does not currently work!)
> queryNew.Data <- paste0('MERGE iris AS t ',
+ 'USING (VALUES ',vals,') AS s',cols,
+ ' ON t.ID = s.ID ',
+ 'WHEN MATCHED THEN ',
+ 'UPDATE SET ',updateSet,
+ ' WHEN NOT MATCHED THEN ',
+ 'INSERT',cols,
+ ' VALUES',insertVals,';')
>
> queryNew.Data <- gsub("\b'\b","",queryNew.Data)
> queryNew.Data <- gsub("'NA'", 'NULL', queryNew.Data)
>
> # send the query to the database (no lucky currently)
> DBI::dbGetQuery(con, queryNew.Data)
解决了,代码工作正常。只需要修复列的命名约定。 SQL 不处理 '.'在名称中,因此更改为“_”,代码将按需要工作。