SQL - 在包含数字和字母的列中搜索特定范围的最佳方法是什么?
SQL -What is the best way to search for a specific range in a column with numbers and letters?
我有一个 table,代码中包含字母和数字,特别是 aa00 到 ZZ99。我无法确定在该列中搜索 dd01 - GG99 等范围的最佳方法。最好的方法是什么? (我在 RStudio 中使用 sqldf)
我试过使用 between 语句,但结果不是我想要的。事实上它显示的是相反的并且没有小写字母:
SELECT prodcode
FROM data
WHERE prodcat BETWEEN 'GG99' AND 'dd01';
评论编辑太长:
library(ggvis)
library(readr)
library(dplyr)
library(knitr)
library(sqldf)
library(tidyr)
data <- read_csv("C:/Users/name/Documents/test1.csv")
compn <-read_csv("C:/Users/name/Documents/test2.csv")
prodcode <- expand.grid(x1 = LETTERS,
x2 = letters,
x3 = 0:9,
x4 = 0:9)
prodcode$prodcat <- apply(data, 1, paste0, collapse = "")
test <- sqldf("SELECT prod
FROM data, compn
WHERE data.cono = compn.cono
AND (SELECT * FROM prodcode
WHERE (SUBSTR(UPPER(prodcat), 1, 2) >= 'DD' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) >= 00 ) AND
(SUBSTR(UPPER(prodcat), 1, 2) <= 'GG' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) <= 99);
GROUP BY prod
ORDER BY prod ASC;")
test
这里是 SQL 代码,可以实现您的建议。在SQL中,需要将字母和数字分开进行比较。由于您的数字具有固定宽度,因此您可以不转换为 INT
。如果您有非固定宽度的数值,则必须决定适当的排序行为是什么。
prodcode <- expand.grid(x1 = LETTERS,
x2 = letters,
x3 = 0:9,
x4 = 0:9)
prodcode$prodcat <- apply(prodcode, 1, paste0, collapse = "")
library(sqldf)
sqldf(
"SELECT * FROM prodcode
WHERE (SUBSTR(UPPER(prodcat), 1, 2) >= 'DD' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) >= 00 ) AND
(SUBSTR(UPPER(prodcat), 1, 2) <= 'GG' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) <= 99)"
)
在子查询中使用
proddata <- data.frame(prodcode = c("DD15", "BB08", "FQ17", "NN11"),
value = rnorm(4, 100, 15))
prodcode <- expand.grid(x1 = LETTERS,
x2 = letters,
x3 = 0:9,
x4 = 0:9)
prodcode$prodcat <- apply(prodcode, 1, paste0, collapse = "")
library(sqldf)
sqldf(
"SELECT *
FROM proddata
WHERE prodcode IN (SELECT UPPER(prodcat) FROM prodcode
WHERE (SUBSTR(UPPER(prodcat), 1, 2) >= 'DD' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) >= 00 ) AND
(SUBSTR(UPPER(prodcat), 1, 2) <= 'GG' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) <= 99))"
)
您可能会考虑 grepl
和一个适当的伴随正则表达式公式,与 dplyr select 命令配对。如果无法查看 "test" 数据框实际包含的内容,目前很难为您提供帮助。 (IE,没有可用的可重现示例)
我有一个 table,代码中包含字母和数字,特别是 aa00 到 ZZ99。我无法确定在该列中搜索 dd01 - GG99 等范围的最佳方法。最好的方法是什么? (我在 RStudio 中使用 sqldf)
我试过使用 between 语句,但结果不是我想要的。事实上它显示的是相反的并且没有小写字母:
SELECT prodcode
FROM data
WHERE prodcat BETWEEN 'GG99' AND 'dd01';
评论编辑太长:
library(ggvis)
library(readr)
library(dplyr)
library(knitr)
library(sqldf)
library(tidyr)
data <- read_csv("C:/Users/name/Documents/test1.csv")
compn <-read_csv("C:/Users/name/Documents/test2.csv")
prodcode <- expand.grid(x1 = LETTERS,
x2 = letters,
x3 = 0:9,
x4 = 0:9)
prodcode$prodcat <- apply(data, 1, paste0, collapse = "")
test <- sqldf("SELECT prod
FROM data, compn
WHERE data.cono = compn.cono
AND (SELECT * FROM prodcode
WHERE (SUBSTR(UPPER(prodcat), 1, 2) >= 'DD' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) >= 00 ) AND
(SUBSTR(UPPER(prodcat), 1, 2) <= 'GG' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) <= 99);
GROUP BY prod
ORDER BY prod ASC;")
test
这里是 SQL 代码,可以实现您的建议。在SQL中,需要将字母和数字分开进行比较。由于您的数字具有固定宽度,因此您可以不转换为 INT
。如果您有非固定宽度的数值,则必须决定适当的排序行为是什么。
prodcode <- expand.grid(x1 = LETTERS,
x2 = letters,
x3 = 0:9,
x4 = 0:9)
prodcode$prodcat <- apply(prodcode, 1, paste0, collapse = "")
library(sqldf)
sqldf(
"SELECT * FROM prodcode
WHERE (SUBSTR(UPPER(prodcat), 1, 2) >= 'DD' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) >= 00 ) AND
(SUBSTR(UPPER(prodcat), 1, 2) <= 'GG' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) <= 99)"
)
在子查询中使用
proddata <- data.frame(prodcode = c("DD15", "BB08", "FQ17", "NN11"),
value = rnorm(4, 100, 15))
prodcode <- expand.grid(x1 = LETTERS,
x2 = letters,
x3 = 0:9,
x4 = 0:9)
prodcode$prodcat <- apply(prodcode, 1, paste0, collapse = "")
library(sqldf)
sqldf(
"SELECT *
FROM proddata
WHERE prodcode IN (SELECT UPPER(prodcat) FROM prodcode
WHERE (SUBSTR(UPPER(prodcat), 1, 2) >= 'DD' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) >= 00 ) AND
(SUBSTR(UPPER(prodcat), 1, 2) <= 'GG' AND
CAST(SUBSTR(prodcat, 3, 2) AS INT) <= 99))"
)
您可能会考虑 grepl
和一个适当的伴随正则表达式公式,与 dplyr select 命令配对。如果无法查看 "test" 数据框实际包含的内容,目前很难为您提供帮助。 (IE,没有可用的可重现示例)