RODBC:为什么在 sqlQuery() 中值 "NA" 为空而 space-only 值?
RODBC: Why are the values "NA" for empty and space-only values in sqlQuery()?
我是 R 的新手。我正在查看从以下返回的 RODBC 结果:
> library(RODBC)
> dbcon <- odbcDriverConnect("DRIVER={SQL SERVER};SERVER=MYSERV;DATABASE=SOME", tabQuote='', colQuote='')
> sqlQuery(dbcon, "SELECT 3, 'a', ' ', '', NULL")
.1 .2 .3 .4
1 3 a NA NA NA
为什么 ' '
和 ''
返回 NA
?我在文档中找不到有关此行为的任何内容。我错过了什么?
所以决定查看 RODBC 的 sqlQuery
方法的源代码,因为我记得它是开源的:
sqlQuery <-
function(channel, query, errors = TRUE, ..., rows_at_time)
{
if(!odbcValidChannel(channel))
stop("first argument is not an open RODBC channel")
if(missing(query))
stop("missing argument 'query'")
## could argue that 'max' should restrict rows_at_time
rows_at_time <- if(missing(rows_at_time)) attr(channel, "rows_at_time")
else max(1, min(1024, rows_at_time))
stat <- odbcQuery(channel, query, rows_at_time)
if(stat == -1L) {
if(errors) return(odbcGetErrMsg(channel))
else return(invisible(stat))
} else return(sqlGetResults(channel, errors = errors, ...))
}
所以它调用 sqlGetResults
:
sqlGetResults <-
function (channel, as.is = FALSE,
errors = FALSE, max = 0, buffsize = 1000,
nullstring = NA_character_, na.strings = "NA",
believeNRows = TRUE, dec = getOption("dec"),
stringsAsFactors = default.stringsAsFactors())
{
if(!odbcValidChannel(channel))
stop("first argument is not an open RODBC channel")
as.df <- function(value, colnames) {
for(i in seq_along(value))
if(is.list(value[[i]])) class(value[[i]]) <- "ODBC_binary"
## convert list to data frame
class(value) <- "data.frame"
names(value) <- make.unique(colnames)
row.names(value) <- seq(along=value[[1L]])
value
}
cols <- .Call(C_RODBCNumCols, attr(channel, "handle_ptr"))
## FIXME: should this be <= 0L?
if(cols < 0L) {
if(errors) return("No data")
else return(invisible(-1L))
}
cData <- .Call(C_RODBCColData, attr(channel, "handle_ptr"))
dbdata <- odbcFetchRows(channel,
max = max,
buffsize = buffsize,
nullstring = nullstring,
believeNRows = believeNRows)
if(dbdata$stat < 0L) {
if(errors) return(odbcGetErrMsg(channel))
else return(invisible(dbdata$stat))
}
data <- as.df(dbdata$data, cData$names)
if(nrow(data) > 0L) {
cols <- ncol(data)
enc <- attr(channel, "encoding")
if(length(na.strings))
for (i in 1L:cols)
if(is.character(data[,i]))
data[data[,i] %in% na.strings, i] <- NA
if(is.logical(as.is)) {
as.is <- rep(as.is, length = cols)
} else if(is.numeric(as.is)) {
if(any(as.is < 1 | as.is > cols))
stop("invalid numeric 'as.is' expression")
i <- rep(FALSE, cols)
i[as.is] <- TRUE
as.is <- i
} else if(length(as.is) != cols)
stop("'as.is' has the wrong length ", length(as.is),
" != cols = ", cols)
for (i in seq_len(cols)) {
if(is.character(data[[i]]) && nchar(enc))
data[[i]] <- iconv(data[[i]], from = enc)
if(as.is[i] || is.list(data[[i]])) next
if(is.numeric(data[[i]])) next
if(cData$type[i] == "date")
data[[i]] <- as.Date(data[[i]])
else if(cData$type[i] == "timestamp")
data[[i]] <- as.POSIXct(data[[i]])
else
data[[i]] <- type.convert(as.character(data[[i]]),
na.strings = na.strings,
as.is = !stringsAsFactors,
dec = dec)
}
}
data
}
所以对于 string/varchar 值,它全部归结为:
data[[i]] <- type.convert(as.character(data[[i]]),
na.strings = na.strings,
as.is = !stringsAsFactors,
dec = dec)
所以让我们试试 type.convert
:
> type.convert("a")
[1] a
好的,这是预期的。现在让我们试试奇怪的情况:
> type.convert("")
[1] NA
嗯...好的,这就是我们如何得到 NA
> type.convert(" ")
[1] NA
好的,那也是 NA
。
现在,为什么 as.is
不是 return NA
?
if(as.is[i] || is.list(data[[i]])) next
啊哈,它只是在设置 as.is
并且不调用 type.convert()
时退出
这解释了为什么在设置 as.is
标志时 return NA
我是 R 的新手。我正在查看从以下返回的 RODBC 结果:
> library(RODBC)
> dbcon <- odbcDriverConnect("DRIVER={SQL SERVER};SERVER=MYSERV;DATABASE=SOME", tabQuote='', colQuote='')
> sqlQuery(dbcon, "SELECT 3, 'a', ' ', '', NULL")
.1 .2 .3 .4
1 3 a NA NA NA
为什么 ' '
和 ''
返回 NA
?我在文档中找不到有关此行为的任何内容。我错过了什么?
所以决定查看 RODBC 的 sqlQuery
方法的源代码,因为我记得它是开源的:
sqlQuery <-
function(channel, query, errors = TRUE, ..., rows_at_time)
{
if(!odbcValidChannel(channel))
stop("first argument is not an open RODBC channel")
if(missing(query))
stop("missing argument 'query'")
## could argue that 'max' should restrict rows_at_time
rows_at_time <- if(missing(rows_at_time)) attr(channel, "rows_at_time")
else max(1, min(1024, rows_at_time))
stat <- odbcQuery(channel, query, rows_at_time)
if(stat == -1L) {
if(errors) return(odbcGetErrMsg(channel))
else return(invisible(stat))
} else return(sqlGetResults(channel, errors = errors, ...))
}
所以它调用 sqlGetResults
:
sqlGetResults <-
function (channel, as.is = FALSE,
errors = FALSE, max = 0, buffsize = 1000,
nullstring = NA_character_, na.strings = "NA",
believeNRows = TRUE, dec = getOption("dec"),
stringsAsFactors = default.stringsAsFactors())
{
if(!odbcValidChannel(channel))
stop("first argument is not an open RODBC channel")
as.df <- function(value, colnames) {
for(i in seq_along(value))
if(is.list(value[[i]])) class(value[[i]]) <- "ODBC_binary"
## convert list to data frame
class(value) <- "data.frame"
names(value) <- make.unique(colnames)
row.names(value) <- seq(along=value[[1L]])
value
}
cols <- .Call(C_RODBCNumCols, attr(channel, "handle_ptr"))
## FIXME: should this be <= 0L?
if(cols < 0L) {
if(errors) return("No data")
else return(invisible(-1L))
}
cData <- .Call(C_RODBCColData, attr(channel, "handle_ptr"))
dbdata <- odbcFetchRows(channel,
max = max,
buffsize = buffsize,
nullstring = nullstring,
believeNRows = believeNRows)
if(dbdata$stat < 0L) {
if(errors) return(odbcGetErrMsg(channel))
else return(invisible(dbdata$stat))
}
data <- as.df(dbdata$data, cData$names)
if(nrow(data) > 0L) {
cols <- ncol(data)
enc <- attr(channel, "encoding")
if(length(na.strings))
for (i in 1L:cols)
if(is.character(data[,i]))
data[data[,i] %in% na.strings, i] <- NA
if(is.logical(as.is)) {
as.is <- rep(as.is, length = cols)
} else if(is.numeric(as.is)) {
if(any(as.is < 1 | as.is > cols))
stop("invalid numeric 'as.is' expression")
i <- rep(FALSE, cols)
i[as.is] <- TRUE
as.is <- i
} else if(length(as.is) != cols)
stop("'as.is' has the wrong length ", length(as.is),
" != cols = ", cols)
for (i in seq_len(cols)) {
if(is.character(data[[i]]) && nchar(enc))
data[[i]] <- iconv(data[[i]], from = enc)
if(as.is[i] || is.list(data[[i]])) next
if(is.numeric(data[[i]])) next
if(cData$type[i] == "date")
data[[i]] <- as.Date(data[[i]])
else if(cData$type[i] == "timestamp")
data[[i]] <- as.POSIXct(data[[i]])
else
data[[i]] <- type.convert(as.character(data[[i]]),
na.strings = na.strings,
as.is = !stringsAsFactors,
dec = dec)
}
}
data
}
所以对于 string/varchar 值,它全部归结为:
data[[i]] <- type.convert(as.character(data[[i]]),
na.strings = na.strings,
as.is = !stringsAsFactors,
dec = dec)
所以让我们试试 type.convert
:
> type.convert("a")
[1] a
好的,这是预期的。现在让我们试试奇怪的情况:
> type.convert("")
[1] NA
嗯...好的,这就是我们如何得到 NA
> type.convert(" ")
[1] NA
好的,那也是 NA
。
现在,为什么 as.is
不是 return NA
?
if(as.is[i] || is.list(data[[i]])) next
啊哈,它只是在设置 as.is
并且不调用 type.convert()
时退出
这解释了为什么在设置 as.is
标志时 return NA