如何加速使用 DISTINCT 的 SQL 查询
How to speed up a SQL query which is using DISTINCT
据我了解,最好的解决方案似乎是创建一个 INDEX,但我不确定应该为哪些列创建索引。这是我第一次使用 SQL 索引。
如果我从此查询中删除 DISTINCT 调用,我将在一秒钟内获得超过 1000 个结果。然而,使用 DISTINCT 调用它 returns 在 10 秒内得到结果(显然没有重复项)。
如果有人有任何替代解决方案,我会洗耳恭听。
这是查询(第二个SELECT
是调用DISTINCT
函数的地方):
SELECT
Sku,
Name,
ccp.Polygon,
MarketAvailability,
Coverage,
Range
FROM
(SELECT DISTINCT
dbo.CatalogEntry.CatalogEntryId as Id,
dbo.CatalogEntry.Code as Sku,
CoverageNode.Name as Coverage,
RangeNode.Name as [Range],
(SELECT CatalogContentProperty.LongString
FROM CatalogContentProperty
WHERE MetaFieldName = 'ItemChartName'
AND (CatalogContentProperty.LongString IS NOT NULL)
AND (CatalogContentProperty.ObjectId = dbo.CatalogEntry.CatalogEntryId)) AS [Name],
(SELECT CatalogContentProperty.LongString
FROM CatalogContentProperty
WHERE MetaFieldName = 'MarketAvailabilityDetailsCollection'
AND (CatalogContentProperty.LongString IS NOT NULL)
AND (CatalogContentProperty.ObjectId = dbo.CatalogEntry.CatalogEntryId)) AS MarketAvailability
FROM
dbo.CatalogEntry
INNER JOIN
dbo.NodeEntryRelation ON dbo.CatalogEntry.CatalogEntryId = dbo.NodeEntryRelation.CatalogEntryId
INNER JOIN
dbo.CatalogNode AS CoverageNode ON dbo.NodeEntryRelation.CatalogNodeId = CoverageNode.CatalogNodeId
INNER JOIN
dbo.CatalogNode AS RangeNode ON CoverageNode.ParentNodeId = RangeNode.CatalogNodeId
INNER JOIN
dbo.CatalogContentProperty ON dbo.CatalogEntry.CatalogEntryId = dbo.CatalogContentProperty.ObjectId
INNER JOIN
dbo.CatalogNode AS ModelNode ON RangeNode.ParentNodeId = ModelNode.CatalogNodeId
INNER JOIN
dbo.CatalogNode AS BrandNode ON ModelNode.ParentNodeId = BrandNode.CatalogNodeId
WHERE
(dbo.CatalogEntry.ClassTypeId = N'Variation') AND
(dbo.CatalogContentProperty.MetaFieldName = N'ItemIsChart') AND
RangeNode.Name != 'C-MAP' AND
(BrandNode.Name = '' OR '' = '' OR '' IS NULL) AND
(ModelNode.Name = '' OR '' = '' OR '' IS NULL) AND
(CoverageNode.Name = '' OR '' = '' OR '' IS NULL) AND
(RangeNode.Name = '' OR '' = '' OR '' IS NULL)
) AS CmapResults
INNER JOIN
(SELECT
GEOMETRY::STGeomFromText(CatalogContentProperty.LongString,4326) AS PolygonGeometry,
CatalogContentProperty.LongString AS Polygon,
CatalogContentProperty.ObjectId
FROM
CatalogContentProperty
WHERE
MetaFieldName = 'ItemChartCoordinates' AND
(CatalogContentProperty.LongString IS NOT NULL)) ccp ON ccp.ObjectId = CmapResults.Id
WHERE
((GEOGRAPHY::STGeomFromText(PolygonGeometry.MakeValid().STUnion(PolygonGeometry.MakeValid().STStartPoint()).STAsText(), 4326).STDistance(GEOGRAPHY::STGeomFromText('POINT(50.9835929 -1.4205852)', 4326)) / 1609.344) <= 100 OR 'POINT(50.9835929 -1.4205852)' IS NULL)
AND MarketAvailability IS NOT NULL
ORDER BY
GEOGRAPHY::STGeomFromText(PolygonGeometry.MakeValid().STUnion(PolygonGeometry.MakeValid().STStartPoint()).STAsText(), 4326).STArea() DESC;
我正在使用 SQL Server Management Studio 2012。目的是通过对 return 的 DISTINCT 调用在与没有 DISTINCT 调用的查询相同的时间内获取查询。
您的查询看起来有点复杂。这个会产生相同的输出吗?
SELECT DISTINCT ce.Code as Sku
, max(case when ccp.MetaFieldName = 'ItemChartName' then ccp.LongString end) as 'Name'
, max(case when ccp.MetaFieldName = 'MarketAvailabilityDetailsCollection' then ccp.LongString end) as 'MarketAvailability'
, max(case when ccp.MetaFieldName = 'ItemChartCoordinates' then ccp.LongString end) as 'Polygon'
, CoverageNode.Name as Coverage
, RangeNode.Name as 'Range'
FROM dbo.CatalogEntry ce
INNER JOIN dbo.NodeEntryRelation ner ON ce.CatalogEntryId = ner.CatalogEntryId
INNER JOIN dbo.CatalogNode CoverageNode ON dbo.NodeEntryRelation.CatalogNodeId = CoverageNode.CatalogNodeId
INNER JOIN dbo.CatalogNode RangeNode ON CoverageNode.ParentNodeId = RangeNode.CatalogNodeId
INNER JOIN dbo.CatalogContentProperty ccp ON ce.CatalogEntryId = ccp.ObjectId
INNER JOIN dbo.CatalogNode ModelNode ON RangeNode.ParentNodeId = ModelNode.CatalogNodeId
INNER JOIN dbo.CatalogNode BrandNode ON ModelNode.ParentNodeId = BrandNode.CatalogNodeId
WHERE ce.ClassTypeId = N'Variation'
and ccp.MetaFieldName = N'ItemIsChart'
and RangeNode.Name != 'C-MAP'
--and (BrandNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (ModelNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (CoverageNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (RangeNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
and (GEOGRAPHY::STGeomFromText(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STUnion(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STStartPoint()).STAsText(), 4326).STDistance(GEOGRAPHY::STGeomFromText('POINT(50.9835929 -1.4205852)', 4326)) / 1609.344) <= 100 -- OR 'POINT(50.9835929 -1.4205852)' IS NULL -- redundant
)
and max(case when ccp.MetaFieldName = 'MarketAvailabilityDetailsCollection' then ccp.LongString end)
ORDER BY GEOGRAPHY::STGeomFromText(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STUnion(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STStartPoint()).STAsText(), 4326).STArea() DESC;
所以答案是我的情况是删除 DISTINCT 并使用 LINQ 进行重复删除!加载时间从 10-11 秒减少到 4-5 秒
据我了解,最好的解决方案似乎是创建一个 INDEX,但我不确定应该为哪些列创建索引。这是我第一次使用 SQL 索引。
如果我从此查询中删除 DISTINCT 调用,我将在一秒钟内获得超过 1000 个结果。然而,使用 DISTINCT 调用它 returns 在 10 秒内得到结果(显然没有重复项)。
如果有人有任何替代解决方案,我会洗耳恭听。
这是查询(第二个SELECT
是调用DISTINCT
函数的地方):
SELECT
Sku,
Name,
ccp.Polygon,
MarketAvailability,
Coverage,
Range
FROM
(SELECT DISTINCT
dbo.CatalogEntry.CatalogEntryId as Id,
dbo.CatalogEntry.Code as Sku,
CoverageNode.Name as Coverage,
RangeNode.Name as [Range],
(SELECT CatalogContentProperty.LongString
FROM CatalogContentProperty
WHERE MetaFieldName = 'ItemChartName'
AND (CatalogContentProperty.LongString IS NOT NULL)
AND (CatalogContentProperty.ObjectId = dbo.CatalogEntry.CatalogEntryId)) AS [Name],
(SELECT CatalogContentProperty.LongString
FROM CatalogContentProperty
WHERE MetaFieldName = 'MarketAvailabilityDetailsCollection'
AND (CatalogContentProperty.LongString IS NOT NULL)
AND (CatalogContentProperty.ObjectId = dbo.CatalogEntry.CatalogEntryId)) AS MarketAvailability
FROM
dbo.CatalogEntry
INNER JOIN
dbo.NodeEntryRelation ON dbo.CatalogEntry.CatalogEntryId = dbo.NodeEntryRelation.CatalogEntryId
INNER JOIN
dbo.CatalogNode AS CoverageNode ON dbo.NodeEntryRelation.CatalogNodeId = CoverageNode.CatalogNodeId
INNER JOIN
dbo.CatalogNode AS RangeNode ON CoverageNode.ParentNodeId = RangeNode.CatalogNodeId
INNER JOIN
dbo.CatalogContentProperty ON dbo.CatalogEntry.CatalogEntryId = dbo.CatalogContentProperty.ObjectId
INNER JOIN
dbo.CatalogNode AS ModelNode ON RangeNode.ParentNodeId = ModelNode.CatalogNodeId
INNER JOIN
dbo.CatalogNode AS BrandNode ON ModelNode.ParentNodeId = BrandNode.CatalogNodeId
WHERE
(dbo.CatalogEntry.ClassTypeId = N'Variation') AND
(dbo.CatalogContentProperty.MetaFieldName = N'ItemIsChart') AND
RangeNode.Name != 'C-MAP' AND
(BrandNode.Name = '' OR '' = '' OR '' IS NULL) AND
(ModelNode.Name = '' OR '' = '' OR '' IS NULL) AND
(CoverageNode.Name = '' OR '' = '' OR '' IS NULL) AND
(RangeNode.Name = '' OR '' = '' OR '' IS NULL)
) AS CmapResults
INNER JOIN
(SELECT
GEOMETRY::STGeomFromText(CatalogContentProperty.LongString,4326) AS PolygonGeometry,
CatalogContentProperty.LongString AS Polygon,
CatalogContentProperty.ObjectId
FROM
CatalogContentProperty
WHERE
MetaFieldName = 'ItemChartCoordinates' AND
(CatalogContentProperty.LongString IS NOT NULL)) ccp ON ccp.ObjectId = CmapResults.Id
WHERE
((GEOGRAPHY::STGeomFromText(PolygonGeometry.MakeValid().STUnion(PolygonGeometry.MakeValid().STStartPoint()).STAsText(), 4326).STDistance(GEOGRAPHY::STGeomFromText('POINT(50.9835929 -1.4205852)', 4326)) / 1609.344) <= 100 OR 'POINT(50.9835929 -1.4205852)' IS NULL)
AND MarketAvailability IS NOT NULL
ORDER BY
GEOGRAPHY::STGeomFromText(PolygonGeometry.MakeValid().STUnion(PolygonGeometry.MakeValid().STStartPoint()).STAsText(), 4326).STArea() DESC;
我正在使用 SQL Server Management Studio 2012。目的是通过对 return 的 DISTINCT 调用在与没有 DISTINCT 调用的查询相同的时间内获取查询。
您的查询看起来有点复杂。这个会产生相同的输出吗?
SELECT DISTINCT ce.Code as Sku
, max(case when ccp.MetaFieldName = 'ItemChartName' then ccp.LongString end) as 'Name'
, max(case when ccp.MetaFieldName = 'MarketAvailabilityDetailsCollection' then ccp.LongString end) as 'MarketAvailability'
, max(case when ccp.MetaFieldName = 'ItemChartCoordinates' then ccp.LongString end) as 'Polygon'
, CoverageNode.Name as Coverage
, RangeNode.Name as 'Range'
FROM dbo.CatalogEntry ce
INNER JOIN dbo.NodeEntryRelation ner ON ce.CatalogEntryId = ner.CatalogEntryId
INNER JOIN dbo.CatalogNode CoverageNode ON dbo.NodeEntryRelation.CatalogNodeId = CoverageNode.CatalogNodeId
INNER JOIN dbo.CatalogNode RangeNode ON CoverageNode.ParentNodeId = RangeNode.CatalogNodeId
INNER JOIN dbo.CatalogContentProperty ccp ON ce.CatalogEntryId = ccp.ObjectId
INNER JOIN dbo.CatalogNode ModelNode ON RangeNode.ParentNodeId = ModelNode.CatalogNodeId
INNER JOIN dbo.CatalogNode BrandNode ON ModelNode.ParentNodeId = BrandNode.CatalogNodeId
WHERE ce.ClassTypeId = N'Variation'
and ccp.MetaFieldName = N'ItemIsChart'
and RangeNode.Name != 'C-MAP'
--and (BrandNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (ModelNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (CoverageNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
--and (RangeNode.Name = '' OR '' = '' OR '' IS NULL) -- always true
and (GEOGRAPHY::STGeomFromText(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STUnion(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STStartPoint()).STAsText(), 4326).STDistance(GEOGRAPHY::STGeomFromText('POINT(50.9835929 -1.4205852)', 4326)) / 1609.344) <= 100 -- OR 'POINT(50.9835929 -1.4205852)' IS NULL -- redundant
)
and max(case when ccp.MetaFieldName = 'MarketAvailabilityDetailsCollection' then ccp.LongString end)
ORDER BY GEOGRAPHY::STGeomFromText(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STUnion(GEOMETRY::STGeomFromText(ccp.LongString,4326).MakeValid().STStartPoint()).STAsText(), 4326).STArea() DESC;
所以答案是我的情况是删除 DISTINCT 并使用 LINQ 进行重复删除!加载时间从 10-11 秒减少到 4-5 秒