在 SQLite 中使用子查询更新 table
Update a table using subquery in SQLite
我想使用 ALTER TABLE
和 UPDATE
语句向我的 table 添加一列,而不是重新创建完整的 table.
在我的 UPDATE
语句中使用子查询时,我没有得到预期的输出。
构建可重现的数据
library(dplyr)
library(dbplyr)
library(DBI)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, iris[c(1,2,51),],"iris")
tbl(con,"iris")
# # Source: table<iris> [?? x 5]
# # Database: sqlite 3.19.3 []
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# <dbl> <dbl> <dbl> <dbl> <chr>
# 1 5.1 3.5 1.4 0.2 setosa
# 2 4.9 3.0 1.4 0.2 setosa
# 3 7.0 3.2 4.7 1.4 versicolor
在单独的 table
中创建新列
DBI::dbSendQuery(con, "CREATE TABLE new_table AS SELECT t2.new_col from
iris t1 inner join
(SELECT Species, sum(`Sepal.Width`) as new_col FROM iris GROUP BY Species) t2
on t1.Species = t2.Species")
tbl(con,"new_table")
# # Source: table<new_table> [?? x 1]
# # Database: sqlite 3.19.3 []
# new_col
# <dbl>
# 1 6.5
# 2 6.5
# 3 3.2
在旧 table
中创建新列
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN new_col DOUBLE")
尝试从 new_table
那里插入新列
DBI::dbSendQuery(con, "UPDATE iris SET new_col = (SELECT new_col FROM new_table)")
tbl(con,"iris")
# # Source: table<iris> [?? x 6]
# # Database: sqlite 3.19.3 []
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species new_col
# <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
# 1 5.1 3.5 1.4 0.2 setosa 6.5
# 2 4.9 3.0 1.4 0.2 setosa 6.5
# 3 7.0 3.2 4.7 1.4 versicolor 6.5
如您所见,我的 new_col
仅包含值 6.5
,而我希望在最后一行具有 3.2
。我该如何解决这个问题?
SQL 数据库中的 table 中的行没有固有顺序。因此,您不能像在 R 中那样分配 "vector" 个值。但是,您可以稍微修改查询:
library(dplyr)
library(DBI)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, iris[c(1,2,51),],"iris")
使用聚合数据创建一个单独的table
DBI::dbSendQuery(con, "CREATE TABLE new_table AS
SELECT Species, sum(`Sepal.Width`) as new_col FROM iris GROUP BY Species")
tbl(con,"new_table")
#> # Source: table<new_table> [?? x 2]
#> # Database: sqlite 3.22.0 []
#> Species new_col
#> <chr> <dbl>
#> 1 setosa 6.5
#> 2 versicolor 3.2
在旧 table
中创建新列
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN new_col DOUBLE")
使用相关子查询
将数据移动到原始table
DBI::dbSendQuery(con, "UPDATE iris SET new_col = (SELECT new_col FROM new_table t2
WHERE iris.Species = t2.Species)")
tbl(con,"iris")
#> # Source: table<iris> [?? x 6]
#> # Database: sqlite 3.22.0 []
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species new_col
#> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 5.1 3.5 1.4 0.2 setosa 6.5
#> 2 4.9 3 1.4 0.2 setosa 6.5
#> 3 7 3.2 4.7 1.4 versicolor 3.2
如果您有多个计算列,您可以像这样使用 UPDATE ... SET (c1, c2, ...) = (...)
:
library(dplyr)
library(dbplyr)
library(DBI)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, iris[c(1,2,51),],"iris")
DBI::dbSendQuery(con, "CREATE TABLE aggs AS
SELECT Species,
SUM(`Sepal.Width`) AS sw_sum,
AVG(`Sepal.Width`) AS sw_avg
FROM iris GROUP BY Species")
tbl(con,"aggs")
#> # Source: table<aggs> [?? x 3]
#> # Database: sqlite 3.22.0 []
#> Species sw_sum sw_avg
#> <chr> <dbl> <dbl>
#> 1 setosa 6.5 3.25
#> 2 versicolor 3.2 3.2
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_sum DOUBLE")
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_avg DOUBLE")
DBI::dbSendQuery(con, "UPDATE iris
SET (sw_sum, sw_avg) = (SELECT sw_sum, sw_avg
FROM aggs WHERE iris.Species = aggs.Species)")
tbl(con,"iris")
#> # Source: table<iris> [?? x 7]
#> # Database: sqlite 3.22.0 []
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species sw_sum sw_avg
#> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
#> 1 5.1 3.5 1.4 0.2 setosa 6.5 3.25
#> 2 4.9 3 1.4 0.2 setosa 6.5 3.25
#> 3 7 3.2 4.7 1.4 versico… 3.2 3.2
这也适用于 Postgres,但可能不适用于 SQL 服务器。
实际上,在这种情况下不需要中间table:
library(dplyr)
library(dbplyr)
library(DBI)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, iris[c(1,2,51),],"iris")
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_sum DOUBLE")
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_avg DOUBLE")
DBI::dbSendQuery(con, "UPDATE iris
SET (sw_sum, sw_avg) =
(SELECT sw_sum, sw_avg FROM
(SELECT Species,
SUM(`Sepal.Width`) AS sw_sum,
AVG(`Sepal.Width`) AS sw_avg
FROM iris GROUP BY Species) aggs
WHERE iris.Species = aggs.Species)")
tbl(con,"iris")
#> # Source: table<iris> [?? x 7]
#> # Database: sqlite 3.22.0 []
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species sw_sum sw_avg
#> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
#> 1 5.1 3.5 1.4 0.2 setosa 6.5 3.25
#> 2 4.9 3 1.4 0.2 setosa 6.5 3.25
#> 3 7 3.2 4.7 1.4 versico… 3.2 3.2
不过,中间 table 在其他情况下可能会有帮助。例如,当它是在链接问题中使用 R 创建时。
我想使用 ALTER TABLE
和 UPDATE
语句向我的 table 添加一列,而不是重新创建完整的 table.
在我的 UPDATE
语句中使用子查询时,我没有得到预期的输出。
构建可重现的数据
library(dplyr)
library(dbplyr)
library(DBI)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, iris[c(1,2,51),],"iris")
tbl(con,"iris")
# # Source: table<iris> [?? x 5]
# # Database: sqlite 3.19.3 []
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# <dbl> <dbl> <dbl> <dbl> <chr>
# 1 5.1 3.5 1.4 0.2 setosa
# 2 4.9 3.0 1.4 0.2 setosa
# 3 7.0 3.2 4.7 1.4 versicolor
在单独的 table
中创建新列DBI::dbSendQuery(con, "CREATE TABLE new_table AS SELECT t2.new_col from
iris t1 inner join
(SELECT Species, sum(`Sepal.Width`) as new_col FROM iris GROUP BY Species) t2
on t1.Species = t2.Species")
tbl(con,"new_table")
# # Source: table<new_table> [?? x 1]
# # Database: sqlite 3.19.3 []
# new_col
# <dbl>
# 1 6.5
# 2 6.5
# 3 3.2
在旧 table
中创建新列DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN new_col DOUBLE")
尝试从 new_table
那里插入新列
DBI::dbSendQuery(con, "UPDATE iris SET new_col = (SELECT new_col FROM new_table)")
tbl(con,"iris")
# # Source: table<iris> [?? x 6]
# # Database: sqlite 3.19.3 []
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species new_col
# <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
# 1 5.1 3.5 1.4 0.2 setosa 6.5
# 2 4.9 3.0 1.4 0.2 setosa 6.5
# 3 7.0 3.2 4.7 1.4 versicolor 6.5
如您所见,我的 new_col
仅包含值 6.5
,而我希望在最后一行具有 3.2
。我该如何解决这个问题?
SQL 数据库中的 table 中的行没有固有顺序。因此,您不能像在 R 中那样分配 "vector" 个值。但是,您可以稍微修改查询:
library(dplyr)
library(DBI)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, iris[c(1,2,51),],"iris")
使用聚合数据创建一个单独的table
DBI::dbSendQuery(con, "CREATE TABLE new_table AS
SELECT Species, sum(`Sepal.Width`) as new_col FROM iris GROUP BY Species")
tbl(con,"new_table")
#> # Source: table<new_table> [?? x 2]
#> # Database: sqlite 3.22.0 []
#> Species new_col
#> <chr> <dbl>
#> 1 setosa 6.5
#> 2 versicolor 3.2
在旧 table
中创建新列DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN new_col DOUBLE")
使用相关子查询
将数据移动到原始tableDBI::dbSendQuery(con, "UPDATE iris SET new_col = (SELECT new_col FROM new_table t2
WHERE iris.Species = t2.Species)")
tbl(con,"iris")
#> # Source: table<iris> [?? x 6]
#> # Database: sqlite 3.22.0 []
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species new_col
#> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 5.1 3.5 1.4 0.2 setosa 6.5
#> 2 4.9 3 1.4 0.2 setosa 6.5
#> 3 7 3.2 4.7 1.4 versicolor 3.2
如果您有多个计算列,您可以像这样使用 UPDATE ... SET (c1, c2, ...) = (...)
:
library(dplyr)
library(dbplyr)
library(DBI)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, iris[c(1,2,51),],"iris")
DBI::dbSendQuery(con, "CREATE TABLE aggs AS
SELECT Species,
SUM(`Sepal.Width`) AS sw_sum,
AVG(`Sepal.Width`) AS sw_avg
FROM iris GROUP BY Species")
tbl(con,"aggs")
#> # Source: table<aggs> [?? x 3]
#> # Database: sqlite 3.22.0 []
#> Species sw_sum sw_avg
#> <chr> <dbl> <dbl>
#> 1 setosa 6.5 3.25
#> 2 versicolor 3.2 3.2
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_sum DOUBLE")
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_avg DOUBLE")
DBI::dbSendQuery(con, "UPDATE iris
SET (sw_sum, sw_avg) = (SELECT sw_sum, sw_avg
FROM aggs WHERE iris.Species = aggs.Species)")
tbl(con,"iris")
#> # Source: table<iris> [?? x 7]
#> # Database: sqlite 3.22.0 []
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species sw_sum sw_avg
#> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
#> 1 5.1 3.5 1.4 0.2 setosa 6.5 3.25
#> 2 4.9 3 1.4 0.2 setosa 6.5 3.25
#> 3 7 3.2 4.7 1.4 versico… 3.2 3.2
这也适用于 Postgres,但可能不适用于 SQL 服务器。
实际上,在这种情况下不需要中间table:
library(dplyr)
library(dbplyr)
library(DBI)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, iris[c(1,2,51),],"iris")
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_sum DOUBLE")
DBI::dbSendQuery(con, "ALTER TABLE iris ADD COLUMN sw_avg DOUBLE")
DBI::dbSendQuery(con, "UPDATE iris
SET (sw_sum, sw_avg) =
(SELECT sw_sum, sw_avg FROM
(SELECT Species,
SUM(`Sepal.Width`) AS sw_sum,
AVG(`Sepal.Width`) AS sw_avg
FROM iris GROUP BY Species) aggs
WHERE iris.Species = aggs.Species)")
tbl(con,"iris")
#> # Source: table<iris> [?? x 7]
#> # Database: sqlite 3.22.0 []
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species sw_sum sw_avg
#> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
#> 1 5.1 3.5 1.4 0.2 setosa 6.5 3.25
#> 2 4.9 3 1.4 0.2 setosa 6.5 3.25
#> 3 7 3.2 4.7 1.4 versico… 3.2 3.2
不过,中间 table 在其他情况下可能会有帮助。例如,当它是在链接问题中使用 R 创建时。