Microsoft R 服务器解析错误
Microsoft R Server Parsing errors
你不能只是 copy/paste 从 RStudio 无缝地编码到 SQL 服务器存储过程,这真的很蹩脚,但是,唉,否则就不会是微软了,对吧?
基本上,我在解析时遇到了非常奇怪的错误:
Error in source(revoScriptConnection) :
revoScriptConnection:116:111: unexpected INCOMPLETE_STRING
116: dtCumulativeResults[,c("State_Name_RAW","City_Name_MDM","County_Name_MDM","State_Name_MDM","City_Address_MDM","City_
这是实际的代码行(之前包含了几行):
}
}
##save dud results if no geocoding occurred
dtCumulativeResults <- data.table(City_Name_RAW=NA)
dtCumulativeResults[,c("State_Name_RAW","City_Name_MDM","County_Name_MDM","State_Name_MDM","City_Address_MDM","City_Latitude","City_Longitude","index"):=character()]
在此之前的唯一行是 for 循环和 if 语句的结束括号。
此代码在 RStudio 中运行良好。不幸的是,如果我只是隔离这几行,存储过程就会运行。所以它与上面的几行有关,但我可能不知道从哪里开始。我确实知道一件事:解析大括号 {
或 }
时遇到问题。如果我不缩进大括号,它会抛出错误。但并非总是如此。
无论如何,我希望有人已经意识到银弹是什么。也许把大括号放在一条线上。等等。我在下面添加了完整的 SPROC,但同样,这是一些奇怪的格式问题。
USE master
GO
/****** Object: StoredProcedure [dbo].[Rscript_geocodeUSACities] Script Date: 8/8/2017 10:31:15 AM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[Rscript_geocodeUSACities]
@usrOutputFilePath varchar(150)
,@usrOutputFileName varchar(150)
AS
BEGIN
SET NOCOUNT ON;
DECLARE @rScript nvarchar(max) = N'
#### USER INPUTS ####
usrOutputFile <- "' + @usrOutputFilePath + @usrOutputFileName + '"
#### ESTABLISH ENVIRONMENT ####
library(data.table)
library(foreach)
library(XML)
library(RCurl)
library(RJSONIO)
##turn off scientific notation
options(scipen=999)
#### GEOCODE ####
##assign input data set
dtInputData <- data.table(InputDataSet)
##initialize cumulative results data table
dtCumulativeResults <- dtInputData[0,]
##find out where to start
startindex <- 1
##if a temp file exists - load it up and count the rows
if (file.exists(usrOutputFile)) {
print("Found temp file - resuming from index")
dtCumulativeResults <- fread(usrOutputFile,colClasses="character")
startindex <- ifelse(is.na(max(as.numeric(dtCumulativeResults$index),na.rm=T)),1,max(as.numeric(dtCumulativeResults$index),na.rm=T)+1)
print(startindex)
} else { print("No temp file") }
if (startindex<nrow(dtInputData) | startindex==nrow(dtInputData)) {
for (ii in seq(startindex,nrow(dtInputData))) {
##initialize temporary results data table
dtTempResults <- dtInputData[ii,]
##geocode address
vcSearchInput <- paste0(gsub("[[:punct:]]","",unlist(strsplit(paste(dtInputData$City_Name_RAW[ii],dtInputData$State_Name_RAW[ii],"USA")," "))),collapse="+")
vcSearchURL <- URLencode(paste("http://maps.google.com/maps/api/geocode/json?address=",vcSearchInput,"&sensor=false",sep=""))
vcSearchOutput <- getURL(vcSearchURL)
vcSearchOutput <- fromJSON(vcSearchOutput,simplify=F)
if (vcSearchOutput$status=="OVER_QUERY_LIMIT") {
dtTempResults[1,c("City_Name_MDM","County_Name_MDM","State_Name_MDM","City_Address_MDM","City_Latitude","City_Longitude","index"):=NA]
#save temporary results as we are going along
dtCumulativeResults <- rbind(dtCumulativeResults,dtTempResults[0,],fill=T)
write.csv(dtCumulativeResults,usrOutputFile,row.names=F)
break
}
if (vcSearchOutput$status=="OK" & !(tolower(dtTempResults$City_Name_RAW) %in% tolower(state.name)) & !(tolower(dtTempResults$City_Name_RAW) %in% tolower(state.abb)) & !(tolower(vcSearchOutput$results[[1]]$address_components[[1]]$long_name) %in% tolower(state.name))) {
dtTempResults[1,City_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[1]]$long_name]
if (regexpr("County|Parish|Borough",vcSearchOutput$results[[1]]$address_components[[2]]$long_name)>0) {
dtTempResults[1,County_Name_MDM:=gsub(" County","",vcSearchOutput$results[[1]]$address_components[[2]]$long_name)]
dtTempResults[1,State_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[3]]$long_name]
} else if (regexpr("United States",vcSearchOutput$results[[1]]$address_components[[3]]$long_name)>0) {
dtTempResults[1,County_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[1]]$long_name]
dtTempResults[1,State_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[2]]$long_name]
} else {
dtTempResults[1,County_Name_MDM:=gsub(" County","",vcSearchOutput$results[[1]]$address_components[[3]]$long_name)]
dtTempResults[1,State_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[4]]$long_name]
}
dtTempResults[1,City_Address_MDM:=vcSearchOutput$results[[1]]$formatted_address]
dtTempResults[1,City_Latitude:=vcSearchOutput$results[[1]]$geometry$location$lat]
dtTempResults[1,City_Longitude:=vcSearchOutput$results[[1]]$geometry$location$lng]
dtTempResults[1,index:=ii]
}
##save temporary results as we are going along
dtCumulativeResults <- rbind(dtCumulativeResults,dtTempResults,fill=T)
write.csv(dtCumulativeResults,usrOutputFile,row.names=F)
}
} else {
##save dud results if no geocoding occurred
dtCumulativeResults <- data.table(City_Name_RAW=NA)
dtCumulativeResults[,c("State_Name_RAW","City_Name_MDM","County_Name_MDM","State_Name_MDM","City_Address_MDM","City_Latitude","City_Longitude","index"):=character()]
write.csv(dtCumulativeResults[0,],usrOutputFile,row.names=F)
}
'
EXECUTE sp_execute_external_script
@language = N'R'
, @script = @rScript
,@input_data_1 =N'select distinct [City_Name_RAW]
,[State_Name_RAW]
from [External].[mdm].[dim_USA_Cities_Map]
where Mod_DT is null'
;
END
GO
请注意,您的大部分脚本都指定为非 Unicode 字符串文字。
之后的每一天
usrOutputFile <- "' + @usrOutputFilePath + @usrOutputFileName + '" . . .
因为在 '+' 之后开始下一个没有 N' 的字符串文字。应该是
usrOutputFile <- "' + @usrOutputFilePath + @usrOutputFileName + N'" . . .
要将结果插入 table,在 TSQL 中使用 INSERT ... EXEC 。参见 https://docs.microsoft.com/en-us/sql/t-sql/statements/insert-transact-sql
你不能只是 copy/paste 从 RStudio 无缝地编码到 SQL 服务器存储过程,这真的很蹩脚,但是,唉,否则就不会是微软了,对吧?
基本上,我在解析时遇到了非常奇怪的错误:
Error in source(revoScriptConnection) :
revoScriptConnection:116:111: unexpected INCOMPLETE_STRING
116: dtCumulativeResults[,c("State_Name_RAW","City_Name_MDM","County_Name_MDM","State_Name_MDM","City_Address_MDM","City_
这是实际的代码行(之前包含了几行):
}
}
##save dud results if no geocoding occurred
dtCumulativeResults <- data.table(City_Name_RAW=NA)
dtCumulativeResults[,c("State_Name_RAW","City_Name_MDM","County_Name_MDM","State_Name_MDM","City_Address_MDM","City_Latitude","City_Longitude","index"):=character()]
在此之前的唯一行是 for 循环和 if 语句的结束括号。
此代码在 RStudio 中运行良好。不幸的是,如果我只是隔离这几行,存储过程就会运行。所以它与上面的几行有关,但我可能不知道从哪里开始。我确实知道一件事:解析大括号 {
或 }
时遇到问题。如果我不缩进大括号,它会抛出错误。但并非总是如此。
无论如何,我希望有人已经意识到银弹是什么。也许把大括号放在一条线上。等等。我在下面添加了完整的 SPROC,但同样,这是一些奇怪的格式问题。
USE master
GO
/****** Object: StoredProcedure [dbo].[Rscript_geocodeUSACities] Script Date: 8/8/2017 10:31:15 AM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[Rscript_geocodeUSACities]
@usrOutputFilePath varchar(150)
,@usrOutputFileName varchar(150)
AS
BEGIN
SET NOCOUNT ON;
DECLARE @rScript nvarchar(max) = N'
#### USER INPUTS ####
usrOutputFile <- "' + @usrOutputFilePath + @usrOutputFileName + '"
#### ESTABLISH ENVIRONMENT ####
library(data.table)
library(foreach)
library(XML)
library(RCurl)
library(RJSONIO)
##turn off scientific notation
options(scipen=999)
#### GEOCODE ####
##assign input data set
dtInputData <- data.table(InputDataSet)
##initialize cumulative results data table
dtCumulativeResults <- dtInputData[0,]
##find out where to start
startindex <- 1
##if a temp file exists - load it up and count the rows
if (file.exists(usrOutputFile)) {
print("Found temp file - resuming from index")
dtCumulativeResults <- fread(usrOutputFile,colClasses="character")
startindex <- ifelse(is.na(max(as.numeric(dtCumulativeResults$index),na.rm=T)),1,max(as.numeric(dtCumulativeResults$index),na.rm=T)+1)
print(startindex)
} else { print("No temp file") }
if (startindex<nrow(dtInputData) | startindex==nrow(dtInputData)) {
for (ii in seq(startindex,nrow(dtInputData))) {
##initialize temporary results data table
dtTempResults <- dtInputData[ii,]
##geocode address
vcSearchInput <- paste0(gsub("[[:punct:]]","",unlist(strsplit(paste(dtInputData$City_Name_RAW[ii],dtInputData$State_Name_RAW[ii],"USA")," "))),collapse="+")
vcSearchURL <- URLencode(paste("http://maps.google.com/maps/api/geocode/json?address=",vcSearchInput,"&sensor=false",sep=""))
vcSearchOutput <- getURL(vcSearchURL)
vcSearchOutput <- fromJSON(vcSearchOutput,simplify=F)
if (vcSearchOutput$status=="OVER_QUERY_LIMIT") {
dtTempResults[1,c("City_Name_MDM","County_Name_MDM","State_Name_MDM","City_Address_MDM","City_Latitude","City_Longitude","index"):=NA]
#save temporary results as we are going along
dtCumulativeResults <- rbind(dtCumulativeResults,dtTempResults[0,],fill=T)
write.csv(dtCumulativeResults,usrOutputFile,row.names=F)
break
}
if (vcSearchOutput$status=="OK" & !(tolower(dtTempResults$City_Name_RAW) %in% tolower(state.name)) & !(tolower(dtTempResults$City_Name_RAW) %in% tolower(state.abb)) & !(tolower(vcSearchOutput$results[[1]]$address_components[[1]]$long_name) %in% tolower(state.name))) {
dtTempResults[1,City_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[1]]$long_name]
if (regexpr("County|Parish|Borough",vcSearchOutput$results[[1]]$address_components[[2]]$long_name)>0) {
dtTempResults[1,County_Name_MDM:=gsub(" County","",vcSearchOutput$results[[1]]$address_components[[2]]$long_name)]
dtTempResults[1,State_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[3]]$long_name]
} else if (regexpr("United States",vcSearchOutput$results[[1]]$address_components[[3]]$long_name)>0) {
dtTempResults[1,County_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[1]]$long_name]
dtTempResults[1,State_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[2]]$long_name]
} else {
dtTempResults[1,County_Name_MDM:=gsub(" County","",vcSearchOutput$results[[1]]$address_components[[3]]$long_name)]
dtTempResults[1,State_Name_MDM:=vcSearchOutput$results[[1]]$address_components[[4]]$long_name]
}
dtTempResults[1,City_Address_MDM:=vcSearchOutput$results[[1]]$formatted_address]
dtTempResults[1,City_Latitude:=vcSearchOutput$results[[1]]$geometry$location$lat]
dtTempResults[1,City_Longitude:=vcSearchOutput$results[[1]]$geometry$location$lng]
dtTempResults[1,index:=ii]
}
##save temporary results as we are going along
dtCumulativeResults <- rbind(dtCumulativeResults,dtTempResults,fill=T)
write.csv(dtCumulativeResults,usrOutputFile,row.names=F)
}
} else {
##save dud results if no geocoding occurred
dtCumulativeResults <- data.table(City_Name_RAW=NA)
dtCumulativeResults[,c("State_Name_RAW","City_Name_MDM","County_Name_MDM","State_Name_MDM","City_Address_MDM","City_Latitude","City_Longitude","index"):=character()]
write.csv(dtCumulativeResults[0,],usrOutputFile,row.names=F)
}
'
EXECUTE sp_execute_external_script
@language = N'R'
, @script = @rScript
,@input_data_1 =N'select distinct [City_Name_RAW]
,[State_Name_RAW]
from [External].[mdm].[dim_USA_Cities_Map]
where Mod_DT is null'
;
END
GO
请注意,您的大部分脚本都指定为非 Unicode 字符串文字。
之后的每一天usrOutputFile <- "' + @usrOutputFilePath + @usrOutputFileName + '" . . .
因为在 '+' 之后开始下一个没有 N' 的字符串文字。应该是
usrOutputFile <- "' + @usrOutputFilePath + @usrOutputFileName + N'" . . .
要将结果插入 table,在 TSQL 中使用 INSERT ... EXEC 。参见 https://docs.microsoft.com/en-us/sql/t-sql/statements/insert-transact-sql