改进查询查看多个表的存在
Improving Query looking at multiple tables for existence
给定以下 table 定义:
CREATE TABLE [dbo].[ConsolidatedRecords]
(
[SessionKey] UNIQUEIDENTIFIER NOT NULL PRIMARY KEY CLUSTERED
, [EntityID] UNIQUEIDENTIFIER NOT NULL FOREIGN KEY REFERENCES [dbo]. [EntityList] ( EntityID )
, [EntityName] NVARCHAR(128) NOT NULL
, [SurrogateKey] UNIQUEIDENTIFIER NOT NULL
, [RecordID] UNIQUEIDENTIFIER NOT NULL
, PRIMARY KEY ( [SessionKey], [EntityName], [RecordID], [SurrogateKey] ) CLUSTERED
);
GO
CREATE #CurrentSession TABLE ([SessionKey] UNIQUEIDENTIFIER NOT NULL);
INSERT INTO #CurrentSession VALUES (NEWID());
-- ... long, involved process to populate ConsolidatedRecords
我有一个针对 ConsolidatedRecords
table 的查询,它检查是否存在有效的特定实体记录,但很难维护,坦率地说是丑陋的。我正在尝试解决这些问题,但没有成功,所以我求助于您:
SELECT [SessionKey] = records.[SessionKey]
, [SurrogateKey] = records.[SurrogateKey]
FROM [dbo].[ConsolidatedRecords] records
JOIN #CurrentSession session ON records.[SessionKey] = session.[SessionKey]
WHERE ( EXISTS( SELECT 1 FROM [dbo].[Entity1] one WHERE records.RecordID = one.[Entity1ID] AND records.[EntityName] = N'Entity1' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity2] two WHERE records.RecordID = two.[Entity2ID] AND records.[EntityName] = N'Entity2' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity3] three WHERE records.RecordID = two.[Entity3ID] AND records.[EntityName] = N'Entity3' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity4] four WHERE records.RecordID = two.[Entity4ID] AND records.[EntityName] = N'Entity4' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity5] five WHERE records.RecordID = two.[Entity5ID] AND records.[EntityName] = N'Entity5' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity6] six WHERE records.RecordID = two.[Entity6ID] AND records.[EntityName] = N'Entity6' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity7] seven WHERE records.RecordID = two.[Entity7ID] AND records.[EntityName] = N'Entity7' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity8] eight WHERE records.RecordID = two.[Entity8ID] AND records.[EntityName] = N'Entity8' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity9] nine WHERE records.RecordID = two.[Entity9ID] AND records.[EntityName] = N'Entity9' )
);
其中一个问题是,在实际案例中,存在问题的实体数量超过九个。
我尝试了以下方法,但性能更差 - 其中一些实体 table 相当大 - 100,000 条记录或更多。在所有情况下,我都在查询主键。
SELECT [SessionKey] = records.[SessionKey]
, [SurrogateKey] = records.[SurrogateKey]
FROM [dbo].[ConsolidatedRecords] records
JOIN #CurrentSession session ON records.[SessionKey] = session.[SessionKey]
LEFT OUTER JOIN [dbo].[Entity1] one ON records.RecordID = one.[Entity1ID] AND records.[EntityName] = N'Entity1'
LEFT OUTER JOIN [dbo].[Entity2] two ON records.RecordID = two.[Entity2ID] AND records.[EntityName] = N'Entity2'
LEFT OUTER JOIN [dbo].[Entity3] three ON records.RecordID = three.[Entity3ID] AND records.[EntityName] = N'Entity3'
LEFT OUTER JOIN [dbo].[Entity4] four ON records.RecordID = four.[Entity4ID] AND records.[EntityName] = N'Entity4'
LEFT OUTER JOIN [dbo].[Entity5] five ON records.RecordID = five.[Entity5ID] AND records.[EntityName] = N'Entity5'
LEFT OUTER JOIN [dbo].[Entity6] six ON records.RecordID = six.[Entity6D] AND records.[EntityName] = N'Entity6'
LEFT OUTER JOIN [dbo].[Entity7] seven ON records.RecordID = seven.[Entity7ID] AND records.[EntityName] = N'Entity7'
LEFT OUTER JOIN [dbo].[Entity8] eight ON records.RecordID = eight.[Entity8ID] AND records.[EntityName] = N'Entity8'
LEFT OUTER JOIN [dbo].[Entity9] nine ON records.RecordID = nine.[Entity9ID] AND records.[EntityName] = N'Entity9'
WHERE one.[Entity1] IS NOT NULL
OR two.[Entity2] IS NOT NULL
OR three.[Entity2] IS NOT NULL
OR four.[Entity2] IS NOT NULL
OR five.[Entity2] IS NOT NULL
OR six.[Entity2] IS NOT NULL
OR seven.[Entity2] IS NOT NULL
OR eight.[Entity2] IS NOT NULL
OR nine.[Entity2] IS NOT NULL;
你可以使用 UNION ALL
:
SELECT [SessionKey] = records.[SessionKey]
, [SurrogateKey] = records.[SurrogateKey]
FROM [dbo].[ConsolidatedRecords] records
JOIN #CurrentSession session
ON records.[SessionKey] = session.[SessionKey]
WHERE EXISTS(SELECT 1
FROM [dbo].[Entity1] one
WHERE records.RecordID = one.[Entity1ID]
AND records.[EntityName] = N'Entity1'
UNION ALL
SELECT 1
FROM [dbo].[Entity2] two
WHERE records.RecordID = two.[Entity2ID]
AND records.[EntityName] = N'Entity2'
UNION ALL
...
);
您可以使用 UNION ALL 并创建所有实体表的视图。您可以在查询中使用视图。我不确定它是否有助于提高性能,但它变得更易于维护。
如果不知道 table 的定义,则很难判断。 IF Entity1 到 Entity9 具有公共属性(也称为列),然后将它们组合成一个 table 和一个名为 "EntityName" 的额外列,如 ConsolidatedRecords。然后加入 EntityName 并将查询减少到 2 tables。如果 Entity1 到 9 确实是唯一的,那么您可以合并所有单独的 sub-queries!
HTH,
肖恩
如果这是批处理类型的操作(您不经常执行它),我建议创建一个时间(或物理)table,其中包含所有 RecordID
的结果在任何这些实体 table 上记录。这个带有 CLUSTERED INDEX
(或 PRIMARY KEY
)的 table 将比您的原始查询执行得更快。
IF OBJECT_ID('tempdb..#ValidRecords') IS NOT NULL
DROP TABLE #ValidRecords
CREATE TABLE #ValidRecords (
RecordID INT,
EntityType VARCHAR(100),
PRIMARY KEY (RecordID, EntityType))
INSERT INTO #ValidRecords (RecordID, EntityType)
SELECT [Entity1ID] AS RecordID, 'Entity1' AS EntityType FROM [dbo].[Entity1] UNION ALL
SELECT [Entity2ID] AS RecordID, 'Entity2' AS EntityType FROM [dbo].[Entity2] UNION ALL
SELECT [Entity3ID] AS RecordID, 'Entity3' AS EntityType FROM [dbo].[Entity3] UNION ALL
SELECT [Entity4ID] AS RecordID, 'Entity4' AS EntityType FROM [dbo].[Entity4]
-- ......
SELECT
[SessionKey] = records.[SessionKey],
[SurrogateKey] = records.[SurrogateKey]
FROM
[dbo].[ConsolidatedRecords] records
INNER JOIN #CurrentSession session ON records.[SessionKey] = session.[SessionKey]
WHERE
EXISTS (SELECT 'valid record' FROM #ValidRecords AS V WHERE records.RecordID = V.RecordID AND records.[EntityName] = V.EntityType)
另一方面,如果创建 table 需要花费您无法承受的时间,您可以尝试将多个 EXISTS
切换为一个 EXISTS
和多个 [=16] =],尽管性能可能不会提高,具体取决于完整查询的复杂程度。
给定以下 table 定义:
CREATE TABLE [dbo].[ConsolidatedRecords]
(
[SessionKey] UNIQUEIDENTIFIER NOT NULL PRIMARY KEY CLUSTERED
, [EntityID] UNIQUEIDENTIFIER NOT NULL FOREIGN KEY REFERENCES [dbo]. [EntityList] ( EntityID )
, [EntityName] NVARCHAR(128) NOT NULL
, [SurrogateKey] UNIQUEIDENTIFIER NOT NULL
, [RecordID] UNIQUEIDENTIFIER NOT NULL
, PRIMARY KEY ( [SessionKey], [EntityName], [RecordID], [SurrogateKey] ) CLUSTERED
);
GO
CREATE #CurrentSession TABLE ([SessionKey] UNIQUEIDENTIFIER NOT NULL);
INSERT INTO #CurrentSession VALUES (NEWID());
-- ... long, involved process to populate ConsolidatedRecords
我有一个针对 ConsolidatedRecords
table 的查询,它检查是否存在有效的特定实体记录,但很难维护,坦率地说是丑陋的。我正在尝试解决这些问题,但没有成功,所以我求助于您:
SELECT [SessionKey] = records.[SessionKey]
, [SurrogateKey] = records.[SurrogateKey]
FROM [dbo].[ConsolidatedRecords] records
JOIN #CurrentSession session ON records.[SessionKey] = session.[SessionKey]
WHERE ( EXISTS( SELECT 1 FROM [dbo].[Entity1] one WHERE records.RecordID = one.[Entity1ID] AND records.[EntityName] = N'Entity1' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity2] two WHERE records.RecordID = two.[Entity2ID] AND records.[EntityName] = N'Entity2' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity3] three WHERE records.RecordID = two.[Entity3ID] AND records.[EntityName] = N'Entity3' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity4] four WHERE records.RecordID = two.[Entity4ID] AND records.[EntityName] = N'Entity4' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity5] five WHERE records.RecordID = two.[Entity5ID] AND records.[EntityName] = N'Entity5' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity6] six WHERE records.RecordID = two.[Entity6ID] AND records.[EntityName] = N'Entity6' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity7] seven WHERE records.RecordID = two.[Entity7ID] AND records.[EntityName] = N'Entity7' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity8] eight WHERE records.RecordID = two.[Entity8ID] AND records.[EntityName] = N'Entity8' )
OR EXISTS( SELECT 1 FROM [dbo].[Entity9] nine WHERE records.RecordID = two.[Entity9ID] AND records.[EntityName] = N'Entity9' )
);
其中一个问题是,在实际案例中,存在问题的实体数量超过九个。
我尝试了以下方法,但性能更差 - 其中一些实体 table 相当大 - 100,000 条记录或更多。在所有情况下,我都在查询主键。
SELECT [SessionKey] = records.[SessionKey]
, [SurrogateKey] = records.[SurrogateKey]
FROM [dbo].[ConsolidatedRecords] records
JOIN #CurrentSession session ON records.[SessionKey] = session.[SessionKey]
LEFT OUTER JOIN [dbo].[Entity1] one ON records.RecordID = one.[Entity1ID] AND records.[EntityName] = N'Entity1'
LEFT OUTER JOIN [dbo].[Entity2] two ON records.RecordID = two.[Entity2ID] AND records.[EntityName] = N'Entity2'
LEFT OUTER JOIN [dbo].[Entity3] three ON records.RecordID = three.[Entity3ID] AND records.[EntityName] = N'Entity3'
LEFT OUTER JOIN [dbo].[Entity4] four ON records.RecordID = four.[Entity4ID] AND records.[EntityName] = N'Entity4'
LEFT OUTER JOIN [dbo].[Entity5] five ON records.RecordID = five.[Entity5ID] AND records.[EntityName] = N'Entity5'
LEFT OUTER JOIN [dbo].[Entity6] six ON records.RecordID = six.[Entity6D] AND records.[EntityName] = N'Entity6'
LEFT OUTER JOIN [dbo].[Entity7] seven ON records.RecordID = seven.[Entity7ID] AND records.[EntityName] = N'Entity7'
LEFT OUTER JOIN [dbo].[Entity8] eight ON records.RecordID = eight.[Entity8ID] AND records.[EntityName] = N'Entity8'
LEFT OUTER JOIN [dbo].[Entity9] nine ON records.RecordID = nine.[Entity9ID] AND records.[EntityName] = N'Entity9'
WHERE one.[Entity1] IS NOT NULL
OR two.[Entity2] IS NOT NULL
OR three.[Entity2] IS NOT NULL
OR four.[Entity2] IS NOT NULL
OR five.[Entity2] IS NOT NULL
OR six.[Entity2] IS NOT NULL
OR seven.[Entity2] IS NOT NULL
OR eight.[Entity2] IS NOT NULL
OR nine.[Entity2] IS NOT NULL;
你可以使用 UNION ALL
:
SELECT [SessionKey] = records.[SessionKey]
, [SurrogateKey] = records.[SurrogateKey]
FROM [dbo].[ConsolidatedRecords] records
JOIN #CurrentSession session
ON records.[SessionKey] = session.[SessionKey]
WHERE EXISTS(SELECT 1
FROM [dbo].[Entity1] one
WHERE records.RecordID = one.[Entity1ID]
AND records.[EntityName] = N'Entity1'
UNION ALL
SELECT 1
FROM [dbo].[Entity2] two
WHERE records.RecordID = two.[Entity2ID]
AND records.[EntityName] = N'Entity2'
UNION ALL
...
);
您可以使用 UNION ALL 并创建所有实体表的视图。您可以在查询中使用视图。我不确定它是否有助于提高性能,但它变得更易于维护。
如果不知道 table 的定义,则很难判断。 IF Entity1 到 Entity9 具有公共属性(也称为列),然后将它们组合成一个 table 和一个名为 "EntityName" 的额外列,如 ConsolidatedRecords。然后加入 EntityName 并将查询减少到 2 tables。如果 Entity1 到 9 确实是唯一的,那么您可以合并所有单独的 sub-queries!
HTH, 肖恩
如果这是批处理类型的操作(您不经常执行它),我建议创建一个时间(或物理)table,其中包含所有 RecordID
的结果在任何这些实体 table 上记录。这个带有 CLUSTERED INDEX
(或 PRIMARY KEY
)的 table 将比您的原始查询执行得更快。
IF OBJECT_ID('tempdb..#ValidRecords') IS NOT NULL
DROP TABLE #ValidRecords
CREATE TABLE #ValidRecords (
RecordID INT,
EntityType VARCHAR(100),
PRIMARY KEY (RecordID, EntityType))
INSERT INTO #ValidRecords (RecordID, EntityType)
SELECT [Entity1ID] AS RecordID, 'Entity1' AS EntityType FROM [dbo].[Entity1] UNION ALL
SELECT [Entity2ID] AS RecordID, 'Entity2' AS EntityType FROM [dbo].[Entity2] UNION ALL
SELECT [Entity3ID] AS RecordID, 'Entity3' AS EntityType FROM [dbo].[Entity3] UNION ALL
SELECT [Entity4ID] AS RecordID, 'Entity4' AS EntityType FROM [dbo].[Entity4]
-- ......
SELECT
[SessionKey] = records.[SessionKey],
[SurrogateKey] = records.[SurrogateKey]
FROM
[dbo].[ConsolidatedRecords] records
INNER JOIN #CurrentSession session ON records.[SessionKey] = session.[SessionKey]
WHERE
EXISTS (SELECT 'valid record' FROM #ValidRecords AS V WHERE records.RecordID = V.RecordID AND records.[EntityName] = V.EntityType)
另一方面,如果创建 table 需要花费您无法承受的时间,您可以尝试将多个 EXISTS
切换为一个 EXISTS
和多个 [=16] =],尽管性能可能不会提高,具体取决于完整查询的复杂程度。