从 Azure SQL DW 中更新?
UPDATE FROM in Azure SQL DW?
我在 Azure SQL DW 中收到一个错误,试图执行 UPDATE FROM 查询。错误是"FROM clause in UPDATE and DELETE statements cannot contain subquery sources or joins"
这是否仅特定于 SQL DW?否则我看不出这个查询有什么问题。如果是SQL DW的限制,还有什么选择?
-- Permanent fact table with 5 billion rows
CREATE TABLE FactTable (Id1 INT, Id2 INT, EmailAddress NVARCHAR(100), Value1 INT)
WITH (DISTRIBUTION = HASH(EmailAddress));
-- Staging fact table with 10 million rows
CREATE TABLE StageTable (Id1 INT, Id2 INT, EmailAddress NVARCHAR(100), Value1 INT)
WITH (DISTRIBUTION = HASH(EmailAddress), HEAP);
-- Add a secondary index that should help with joining to StageTable
CREATE NONCLUSTERED INDEX ix ON FactTable (Id1, Id2);
UPDATE fact
SET
Value1 = CASE WHEN stage.Value1 > fact.Value1 THEN stage.Value1 ELSE fact.Value1 END
FROM FactTable AS fact
INNER JOIN StageTable AS stage ON fact.Id1 = stage.Id1 AND fact.Id2 = stage.Id2
根据 the documentation Azure SQL 数据仓库支持 UPDATE
但不支持 FROM
子句中的 ANSI 连接。您可以使用 CTAS 来变通。简单 two-table 更新:
UPDATE dbo.FactTable
SET
Value1 = CASE WHEN stage.Value1 > dbo.FactTable.Value1 THEN stage.Value1 ELSE dbo.FactTable.Value1 END
FROM dbo.StageTable AS stage
WHERE dbo.FactTable.Id1 = stage.Id1
AND dbo.FactTable.Id2 = stage.Id2;
使用 CTAS 的更复杂的示例,从 main UPDATE documentation page:
中批量复制
-- Create an interim table
CREATE TABLE CTAS_acs
WITH (DISTRIBUTION = ROUND_ROBIN)
AS
SELECT ISNULL(CAST([EnglishProductCategoryName] AS NVARCHAR(50)),0) AS [EnglishProductCategoryName]
, ISNULL(CAST([CalendarYear] AS SMALLINT),0) AS [CalendarYear]
, ISNULL(CAST(SUM([SalesAmount]) AS MONEY),0) AS [TotalSalesAmount]
FROM [dbo].[FactInternetSales] AS s
JOIN [dbo].[DimDate] AS d ON s.[OrderDateKey] = d.[DateKey]
JOIN [dbo].[DimProduct] AS p ON s.[ProductKey] = p.[ProductKey]
JOIN [dbo].[DimProductSubCategory] AS u ON p.[ProductSubcategoryKey] = u.[ProductSubcategoryKey]
JOIN [dbo].[DimProductCategory] AS c ON u.[ProductCategoryKey] = c.[ProductCategoryKey]
WHERE [CalendarYear] = 2004
GROUP BY
[EnglishProductCategoryName]
, [CalendarYear]
;
-- Use an implicit join to perform the update
UPDATE AnnualCategorySales
SET AnnualCategorySales.TotalSalesAmount = CTAS_ACS.TotalSalesAmount
FROM CTAS_acs
WHERE CTAS_acs.[EnglishProductCategoryName] = AnnualCategorySales.[EnglishProductCategoryName]
AND CTAS_acs.[CalendarYear] = AnnualCategorySales.[CalendarYear]
;
--Drop the interim table
DROP TABLE CTAS_acs
;
我发现使用 ASDW(和 APS/PDW)避免像瘟疫一样的批量更新是一个很好的做法。
这是一个纯粹的 CTAS 替代方案,在您更新大量行的情况下速度会更快。
它假设 id1 是一个相对较好的分布键,并且暂存行数少于事实行数,使复制可行。此策略应消除节点之间的数据移动。
如果您有一个非常大的暂存区 table,请在每个 table 中创建一个替代列,它是 id1 和 id2 的组合,然后按散列分配两个 table该列的,将提供更好的性能。
create table FactTable (
id1 int,
id2 int,
value1 int)
with (distribution = hash(id1));
create table StageTable (
id1 int,
id2 int,
value1 int)
with (distribution = replicate);
create table UpdatedFact
with (distribution = hash(id1))
as
select f.id1,
f.id2,
case when s.id1 is not null and s.value1 > f.value1
then s.value1
else f.value1
end as value1
from FactTable f
left outer join StageTable s
on s.id1 = f.id1
and s.id2 = f.id2
truncate table FactTable;
alter table UpdatedFact switch to FactTable;
drop table UpdatedFact;
简化您尝试的内容会奏效。只需删除连接并从另一个更新一个 table 即可。
update FactTable
set this = that
from StageTable s where s.something = FactTable.something
这是否是最佳方法取决于您的情况,但它会执行而不会引发错误。
我在 Azure SQL DW 中收到一个错误,试图执行 UPDATE FROM 查询。错误是"FROM clause in UPDATE and DELETE statements cannot contain subquery sources or joins"
这是否仅特定于 SQL DW?否则我看不出这个查询有什么问题。如果是SQL DW的限制,还有什么选择?
-- Permanent fact table with 5 billion rows
CREATE TABLE FactTable (Id1 INT, Id2 INT, EmailAddress NVARCHAR(100), Value1 INT)
WITH (DISTRIBUTION = HASH(EmailAddress));
-- Staging fact table with 10 million rows
CREATE TABLE StageTable (Id1 INT, Id2 INT, EmailAddress NVARCHAR(100), Value1 INT)
WITH (DISTRIBUTION = HASH(EmailAddress), HEAP);
-- Add a secondary index that should help with joining to StageTable
CREATE NONCLUSTERED INDEX ix ON FactTable (Id1, Id2);
UPDATE fact
SET
Value1 = CASE WHEN stage.Value1 > fact.Value1 THEN stage.Value1 ELSE fact.Value1 END
FROM FactTable AS fact
INNER JOIN StageTable AS stage ON fact.Id1 = stage.Id1 AND fact.Id2 = stage.Id2
根据 the documentation Azure SQL 数据仓库支持 UPDATE
但不支持 FROM
子句中的 ANSI 连接。您可以使用 CTAS 来变通。简单 two-table 更新:
UPDATE dbo.FactTable
SET
Value1 = CASE WHEN stage.Value1 > dbo.FactTable.Value1 THEN stage.Value1 ELSE dbo.FactTable.Value1 END
FROM dbo.StageTable AS stage
WHERE dbo.FactTable.Id1 = stage.Id1
AND dbo.FactTable.Id2 = stage.Id2;
使用 CTAS 的更复杂的示例,从 main UPDATE documentation page:
中批量复制-- Create an interim table
CREATE TABLE CTAS_acs
WITH (DISTRIBUTION = ROUND_ROBIN)
AS
SELECT ISNULL(CAST([EnglishProductCategoryName] AS NVARCHAR(50)),0) AS [EnglishProductCategoryName]
, ISNULL(CAST([CalendarYear] AS SMALLINT),0) AS [CalendarYear]
, ISNULL(CAST(SUM([SalesAmount]) AS MONEY),0) AS [TotalSalesAmount]
FROM [dbo].[FactInternetSales] AS s
JOIN [dbo].[DimDate] AS d ON s.[OrderDateKey] = d.[DateKey]
JOIN [dbo].[DimProduct] AS p ON s.[ProductKey] = p.[ProductKey]
JOIN [dbo].[DimProductSubCategory] AS u ON p.[ProductSubcategoryKey] = u.[ProductSubcategoryKey]
JOIN [dbo].[DimProductCategory] AS c ON u.[ProductCategoryKey] = c.[ProductCategoryKey]
WHERE [CalendarYear] = 2004
GROUP BY
[EnglishProductCategoryName]
, [CalendarYear]
;
-- Use an implicit join to perform the update
UPDATE AnnualCategorySales
SET AnnualCategorySales.TotalSalesAmount = CTAS_ACS.TotalSalesAmount
FROM CTAS_acs
WHERE CTAS_acs.[EnglishProductCategoryName] = AnnualCategorySales.[EnglishProductCategoryName]
AND CTAS_acs.[CalendarYear] = AnnualCategorySales.[CalendarYear]
;
--Drop the interim table
DROP TABLE CTAS_acs
;
我发现使用 ASDW(和 APS/PDW)避免像瘟疫一样的批量更新是一个很好的做法。
这是一个纯粹的 CTAS 替代方案,在您更新大量行的情况下速度会更快。
它假设 id1 是一个相对较好的分布键,并且暂存行数少于事实行数,使复制可行。此策略应消除节点之间的数据移动。
如果您有一个非常大的暂存区 table,请在每个 table 中创建一个替代列,它是 id1 和 id2 的组合,然后按散列分配两个 table该列的,将提供更好的性能。
create table FactTable (
id1 int,
id2 int,
value1 int)
with (distribution = hash(id1));
create table StageTable (
id1 int,
id2 int,
value1 int)
with (distribution = replicate);
create table UpdatedFact
with (distribution = hash(id1))
as
select f.id1,
f.id2,
case when s.id1 is not null and s.value1 > f.value1
then s.value1
else f.value1
end as value1
from FactTable f
left outer join StageTable s
on s.id1 = f.id1
and s.id2 = f.id2
truncate table FactTable;
alter table UpdatedFact switch to FactTable;
drop table UpdatedFact;
简化您尝试的内容会奏效。只需删除连接并从另一个更新一个 table 即可。
update FactTable
set this = that
from StageTable s where s.something = FactTable.something
这是否是最佳方法取决于您的情况,但它会执行而不会引发错误。