使用索引连接临时表
Joining Temp Tables Using Indexes
我刚刚开始了解 INDEX,我听说它可以加快结果,但我的测试却产生了相反的效果。
关于数据的几点:
- 我的公司使用 SQL 服务器,但我对除此之外的了解不多
- 我只是一名员工,没有任何管理员权限,甚至只能查看某些文件夹
- #TABLE1 包含超过 23,000 行并在 1 秒内运行
- #TABLE2 包含超过 310 万行,运行时间约为 1.5 分钟
- 使用连接大约需要 1.75 到 2.5 分钟
- 我尝试使用 INDEX 需要 2.5 到 3 或更多
- 之后我会加入更多的表格,但这 2 个基本上是其他一切的基础
我在下面尝试的查询花费的时间比我执行普通 JOIN 时要长
IF OBJECT_ID('tempdb..#TABLE1') IS NOT NULL DROP TABLE #TABLE1;
IF OBJECT_ID('tempdb..#TABLE2') IS NOT NULL DROP TABLE #TABLE2;
SELECT
cast(T1.[ID] as varchar(20)) as 'ID',
cast(T1.[Division] as int) as 'Division',
cast(T1.[Category] as int) as 'Category',
cast(T1.[Platform] as int) as 'Platform',
cast(T1.[Condition] as tinyint) as 'Condition',
cast(T1.[First Received] as date) as 'First Received',
cast(T1.[Last Received] as date) as 'Last Received'
INTO #TABLE1
FROM
CompanyTable as T1
WHERE
T1.[Name] = 'Canada'
AND T1.[Division] = '100';
CREATE NONCLUSTERED INDEX IX_TABLE1 ON #TABLE1([ID], [Division], [Category], [Platform]);
SELECT DISTINCT
SE.[Date] as 'Date',
SE.[ID] as 'ID',
SE.[Division] as 'Division',
SE.[Category] as 'Category',
SE.[Platform] as 'Platform',
sum(SE.[Units]) as 'Units',
sum(SE.[Sales]) as 'Sales',
sum(SE.[Retail]) as 'Retail',
sum(SE.[Cost]) as 'Cost'
INTO #TABLE2
FROM
(SELECT
cast(S1.[Date] as date) as 'Date',
cast(S1.[ID] as varchar(20)) as 'ID',
cast(S1.[Division] as int) as 'Division',
cast(S1.[Category] as int) as 'Category',
cast(S1.[Platform] as int) as 'Platform',
cast(sum(S1.[Quantity]) * -1 as decimal(38,20)) as 'Units',
cast(sum(S1.[Net Amount]) * -1 as decimal(38,20)) as 'Sales',
cast(sum(S1.[Cost Amount]) * -1 as decimal(38,20)) as 'Cost',
cast(sum(S1.[Price]) as decimal(38,20)) as 'Retail'
FROM
SalesTable1 as S1
WHERE
S1.[Division] = '100'
GROUP BY
S1.[ID], S1.[Date], S1.[Division], S1.[Category], S1.[Platform]
UNION ALL
SELECT
cast(S2.[Date] as date) as 'Date',
cast(S2.[ID] as varchar(20)) as 'ID',
cast(S2.[Division] as int) as 'Division',
cast(S2.[Category] as int) as 'Category',
cast(S2.[Platform] as int) as 'Platform',
cast(sum(S2.[Quantity]) * -1 as decimal(38,20)) as 'Units',
cast(sum(S2.[Net Amount]) * -1 as decimal(38,20)) as 'Sales',
cast(sum(S2.[Cost Amount]) * -1 as decimal(38,20)) as 'Cost',
cast(sum(S2.[Price]) as decimal(38,20)) as 'Retail'
FROM
SalesTable2 as S2
WHERE
S2.[Division] = '100'
GROUP BY
S2.[ID], S2.[Date], S2.[Division], S2.[Category], S2.[Platform]
) as T2
GROUP BY
T2.[ID], T2.[Date], T2.[Division], T2.[Category], SE.[Platform]
CREATE NONCLUSTERED INDEX IX_TABLE2 ON #TABLE2([ID], [Division], [Category], [Platform]);
SELECT *
FROM #TABLE1 as T1
JOIN #TABLE2 as T2 ON T1.[ID] = T2.[ID] AND T1.[Division] = T2.[Division] AND T1.[Category] = T2.[Category] AND T1.[Platform] = T2.[Platform]
DROP INDEX IX_TABLE1 ON #TABLE1;
DROP INDEX IX_TABLE2 ON #TABLE2;
我是不是做错了什么导致速度变慢?
如下所示在#table1 和#table2 上创建 4 个不同的索引,运行 您的查询。
CREATE NONCLUSTERED INDEX IX_TABLE1_id ON #TABLE1([ID]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Div ON #TABLE1([Division]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Cat ON #TABLE1([ [Category]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Plt ON #TABLE1([[Platform]);
CREATE NONCLUSTERED INDEX IX_TABLE1_id ON #TABLE2([ID]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Div ON #TABLE2([Division]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Cat ON #TABLE3[ [Category]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Plt ON #TABLE4([[Platform]);
您的临时表需要一个具有唯一值的主键以用作 CLUSTERED INDEX
。那么您只需要在 JOIN
列上添加一个 NONCLUSTERED INDEX
。假设 #TABLE1
中的 ID
是唯一的,像这样创建索引:
ALTER TABLE #TABLE1 ADD CONSTRAINT PK_TABLE1 PRIMARY KEY CLUSTERED ([ID]);
CREATE NONCLUSTERED INDEX IX_TABLE1 ON #TABLE1([Division], [Category], [Platform]);
同上 #TABLE2
,再次假设 ID
id 唯一:
ALTER TABLE #TABLE2 ADD CONSTRAINT PK_TABLE2 PRIMARY KEY CLUSTERED ([ID]);
CREATE NONCLUSTERED INDEX IX_TABLE2 ON #TABLE2([Division], [Category], [Platform]);
但是,如果实际瓶颈是 non-temp 表,这可能无法解决您的性能问题。您应该 运行 脚本的 SELECT
部分分开。如果他们自己表现良好,请尝试这些指标。如果不是,那么您需要与您的 DBA 联系并找出 SELECT
运行ning 慢的原因。
我刚刚开始了解 INDEX,我听说它可以加快结果,但我的测试却产生了相反的效果。
关于数据的几点:
- 我的公司使用 SQL 服务器,但我对除此之外的了解不多
- 我只是一名员工,没有任何管理员权限,甚至只能查看某些文件夹
- #TABLE1 包含超过 23,000 行并在 1 秒内运行
- #TABLE2 包含超过 310 万行,运行时间约为 1.5 分钟
- 使用连接大约需要 1.75 到 2.5 分钟
- 我尝试使用 INDEX 需要 2.5 到 3 或更多
- 之后我会加入更多的表格,但这 2 个基本上是其他一切的基础
我在下面尝试的查询花费的时间比我执行普通 JOIN 时要长
IF OBJECT_ID('tempdb..#TABLE1') IS NOT NULL DROP TABLE #TABLE1;
IF OBJECT_ID('tempdb..#TABLE2') IS NOT NULL DROP TABLE #TABLE2;
SELECT
cast(T1.[ID] as varchar(20)) as 'ID',
cast(T1.[Division] as int) as 'Division',
cast(T1.[Category] as int) as 'Category',
cast(T1.[Platform] as int) as 'Platform',
cast(T1.[Condition] as tinyint) as 'Condition',
cast(T1.[First Received] as date) as 'First Received',
cast(T1.[Last Received] as date) as 'Last Received'
INTO #TABLE1
FROM
CompanyTable as T1
WHERE
T1.[Name] = 'Canada'
AND T1.[Division] = '100';
CREATE NONCLUSTERED INDEX IX_TABLE1 ON #TABLE1([ID], [Division], [Category], [Platform]);
SELECT DISTINCT
SE.[Date] as 'Date',
SE.[ID] as 'ID',
SE.[Division] as 'Division',
SE.[Category] as 'Category',
SE.[Platform] as 'Platform',
sum(SE.[Units]) as 'Units',
sum(SE.[Sales]) as 'Sales',
sum(SE.[Retail]) as 'Retail',
sum(SE.[Cost]) as 'Cost'
INTO #TABLE2
FROM
(SELECT
cast(S1.[Date] as date) as 'Date',
cast(S1.[ID] as varchar(20)) as 'ID',
cast(S1.[Division] as int) as 'Division',
cast(S1.[Category] as int) as 'Category',
cast(S1.[Platform] as int) as 'Platform',
cast(sum(S1.[Quantity]) * -1 as decimal(38,20)) as 'Units',
cast(sum(S1.[Net Amount]) * -1 as decimal(38,20)) as 'Sales',
cast(sum(S1.[Cost Amount]) * -1 as decimal(38,20)) as 'Cost',
cast(sum(S1.[Price]) as decimal(38,20)) as 'Retail'
FROM
SalesTable1 as S1
WHERE
S1.[Division] = '100'
GROUP BY
S1.[ID], S1.[Date], S1.[Division], S1.[Category], S1.[Platform]
UNION ALL
SELECT
cast(S2.[Date] as date) as 'Date',
cast(S2.[ID] as varchar(20)) as 'ID',
cast(S2.[Division] as int) as 'Division',
cast(S2.[Category] as int) as 'Category',
cast(S2.[Platform] as int) as 'Platform',
cast(sum(S2.[Quantity]) * -1 as decimal(38,20)) as 'Units',
cast(sum(S2.[Net Amount]) * -1 as decimal(38,20)) as 'Sales',
cast(sum(S2.[Cost Amount]) * -1 as decimal(38,20)) as 'Cost',
cast(sum(S2.[Price]) as decimal(38,20)) as 'Retail'
FROM
SalesTable2 as S2
WHERE
S2.[Division] = '100'
GROUP BY
S2.[ID], S2.[Date], S2.[Division], S2.[Category], S2.[Platform]
) as T2
GROUP BY
T2.[ID], T2.[Date], T2.[Division], T2.[Category], SE.[Platform]
CREATE NONCLUSTERED INDEX IX_TABLE2 ON #TABLE2([ID], [Division], [Category], [Platform]);
SELECT *
FROM #TABLE1 as T1
JOIN #TABLE2 as T2 ON T1.[ID] = T2.[ID] AND T1.[Division] = T2.[Division] AND T1.[Category] = T2.[Category] AND T1.[Platform] = T2.[Platform]
DROP INDEX IX_TABLE1 ON #TABLE1;
DROP INDEX IX_TABLE2 ON #TABLE2;
我是不是做错了什么导致速度变慢?
如下所示在#table1 和#table2 上创建 4 个不同的索引,运行 您的查询。
CREATE NONCLUSTERED INDEX IX_TABLE1_id ON #TABLE1([ID]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Div ON #TABLE1([Division]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Cat ON #TABLE1([ [Category]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Plt ON #TABLE1([[Platform]);
CREATE NONCLUSTERED INDEX IX_TABLE1_id ON #TABLE2([ID]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Div ON #TABLE2([Division]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Cat ON #TABLE3[ [Category]);
CREATE NONCLUSTERED INDEX IX_TABLE1_Plt ON #TABLE4([[Platform]);
您的临时表需要一个具有唯一值的主键以用作 CLUSTERED INDEX
。那么您只需要在 JOIN
列上添加一个 NONCLUSTERED INDEX
。假设 #TABLE1
中的 ID
是唯一的,像这样创建索引:
ALTER TABLE #TABLE1 ADD CONSTRAINT PK_TABLE1 PRIMARY KEY CLUSTERED ([ID]);
CREATE NONCLUSTERED INDEX IX_TABLE1 ON #TABLE1([Division], [Category], [Platform]);
同上 #TABLE2
,再次假设 ID
id 唯一:
ALTER TABLE #TABLE2 ADD CONSTRAINT PK_TABLE2 PRIMARY KEY CLUSTERED ([ID]);
CREATE NONCLUSTERED INDEX IX_TABLE2 ON #TABLE2([Division], [Category], [Platform]);
但是,如果实际瓶颈是 non-temp 表,这可能无法解决您的性能问题。您应该 运行 脚本的 SELECT
部分分开。如果他们自己表现良好,请尝试这些指标。如果不是,那么您需要与您的 DBA 联系并找出 SELECT
运行ning 慢的原因。