SORT 操作的瓶颈
Bottle neck on SORT operation
我有以下查询,事实证明它的成本非常高,需要 6-8 秒才能执行。查看执行计划,SORT 操作的成本为 79%。我可以在这里得到任何改进吗?

SELECT
A.StageName,
C.Month,
C.MonthName as Label,
C.Year,
isnull(A.Average,0) as Data
FROM
(
SELECT
S.StageName,
MONTH(TimeIn) as MonthNumber,
DATENAME(MONTH,TimeIn) as Month,
YEAR(TimeIn) as Year,
ISNULL(AVG(DATEDIFF(mi,TimeIn,isnull(TimeOut,@TodayDate))),0) as Average
FROM
VisitMovement VM
INNER JOIN Stage S on
VM.StageID = S.StageID
WHERE
(VM.TimeIn >= @StartDate AND
VM.TimeIn < DATEADD (d,1,@EndDate)) AND
(VM.TimeOut < DATEADD (d,1,@EndDate) OR VM.TimeOut IS NULL)
GROUP BY
S.StageNumber,
S.StageName,
MONTH(TimeIn),
DATENAME(MONTH,TimeIn),
YEAR(TimeIn)
) A
RIGHT JOIN (select distinct Month,MonthName,Year from Calendar WHERE DATE >= @StartDate AND DATE < DATEADD (d,1,@EndDate)) C on
A.MonthNumber = C.Month and
A.Month = C.MonthName and
A.Year = C.Year
GROUP BY
A.StageName,
C.Month,
C.MonthName,
C.Year,
A.Average
ORDER BY
CASE WHEN @Ordering = 'asc' THEN C.Year END ASC,
CASE WHEN @Ordering = 'asc' THEN C.Month END ASC,
CASE WHEN @Ordering = 'asc' THEN A.StageName END ASC,
CASE WHEN @Ordering = 'desc' THEN C.Year END DESC,
CASE WHEN @Ordering = 'desc' THEN C.Month END DESC,
CASE WHEN @Ordering = 'desc' THEN A.StageName END DESC
因为 order by
必须评估每一行,我认为它不能优化使用索引。将 order by
替换为 row_number()
进行默认排序,并且仅反转一次顺序应该至少可以防止对单行的 @Ordering
进行多次评估。
在下面的伪代码中,原始查询放在 CTE 中。 Row_number 确定升序排序,在 cte 以下,如果需要则颠倒顺序:
;with cte as
(
SELECT
A.StageName,
C.Month,
...,
row_number() over (order by C.Year,C.Month,A.StageName) sortOrder
FROM
...rest of the query, excluding the order by
)
select * --or list the columns without the sortOrder
from cte
order by sortOrder * case @Ordering when 'desc' then -1 else 1 end
虽然我知道你不能去掉子查询中不同列上的GROUP BY
,但你可以让系统更容易。
目前你有
S.StageNumber,
S.StageName,
MONTH(TimeIn),
DATENAME(MONTH,TimeIn),
YEAR(TimeIn)
我想要处理的数据相当多。请允许我做一些猜测:
S.StageNumber, -- int, 4 bytes
S.StageName, -- string, 20 bytes
MONTH(TimeIn), -- int, 4 bytes
DATENAME(MONTH,TimeIn), -- string 5 bytes
YEAR(TimeIn) -- int, 4 byte
现在,有一些依赖项:
- 如果您知道 MONTH(数字),那么您现在也知道它的名称
- 我猜 StageName + StageNumber 是唯一的并且与 StageID 直接相关。如果不是,您可能需要在外层再次 GROUP BY。
那会把我们带到
S.StageID, -- int, 4 bytes
MONTH(TimeIn), -- int, 4 bytes
YEAR(TimeIn) -- int, 4 byte
这意味着 GROUP BY
的排序必须 运行 通过每条记录仅 12 个字节而不是以前的每条记录 37 个字节,并且数字排序比字符串快很多无论如何(例如,由于 upper/lower 大小写、重音等)
我已尝试相应地重写查询(未经测试!)。我还将月份信息的获取移动到一个单独的临时 table,这应该对查询优化器有一点帮助。
SELECT DISTINCT Month,MonthName,Year
INTO #dates
FROM Calendar
WHERE DATE >= @StartDate AND DATE < DATEADD (d,1,@EndDate)
CREATE UNIQUE CLUSTERED INDEX uq0_#dates ON #dates (Month,Year)
SELECT
A.StageName,
C.Month,
C.MonthName as Label,
C.Year,
isnull(A.Average,0) as Data
FROM
(
SELECT S.StageName,
MonthNumber,
Year,
Average
FROM (
SELECT VM.StageID,
MONTH(TimeIn) as MonthNumber,
YEAR(TimeIn) as Year,
ISNULL(AVG(DATEDIFF(mi,TimeIn,isnull(TimeOut,@TodayDate))),0) as Average
FROM
VisitMovement VM
WHERE
(VM.TimeIn >= @StartDate AND
VM.TimeIn < DATEADD (d,1,@EndDate)) AND
(VM.TimeOut < DATEADD (d,1,@EndDate) OR VM.TimeOut IS NULL)
GROUP BY
VM.StageID,
MONTH(TimeIn),
YEAR(TimeIn)
) grp
JOIN Stage S
ON S.StageID = grp.StageID
)
) A
RIGHT OUTER JOIN #dates C
on
A.MonthNumber = C.Month and
-- A.Month = C.MonthName and
A.Year = C.Year
ORDER BY
CASE WHEN @Ordering = 'asc' THEN C.Year END ASC,
CASE WHEN @Ordering = 'asc' THEN C.Month END ASC,
CASE WHEN @Ordering = 'asc' THEN A.StageName END ASC,
CASE WHEN @Ordering = 'desc' THEN C.Year END DESC,
CASE WHEN @Ordering = 'desc' THEN C.Month END DESC,
CASE WHEN @Ordering = 'desc' THEN A.StageName END DESC
希望对您有所帮助。
我有以下查询,事实证明它的成本非常高,需要 6-8 秒才能执行。查看执行计划,SORT 操作的成本为 79%。我可以在这里得到任何改进吗?
SELECT
A.StageName,
C.Month,
C.MonthName as Label,
C.Year,
isnull(A.Average,0) as Data
FROM
(
SELECT
S.StageName,
MONTH(TimeIn) as MonthNumber,
DATENAME(MONTH,TimeIn) as Month,
YEAR(TimeIn) as Year,
ISNULL(AVG(DATEDIFF(mi,TimeIn,isnull(TimeOut,@TodayDate))),0) as Average
FROM
VisitMovement VM
INNER JOIN Stage S on
VM.StageID = S.StageID
WHERE
(VM.TimeIn >= @StartDate AND
VM.TimeIn < DATEADD (d,1,@EndDate)) AND
(VM.TimeOut < DATEADD (d,1,@EndDate) OR VM.TimeOut IS NULL)
GROUP BY
S.StageNumber,
S.StageName,
MONTH(TimeIn),
DATENAME(MONTH,TimeIn),
YEAR(TimeIn)
) A
RIGHT JOIN (select distinct Month,MonthName,Year from Calendar WHERE DATE >= @StartDate AND DATE < DATEADD (d,1,@EndDate)) C on
A.MonthNumber = C.Month and
A.Month = C.MonthName and
A.Year = C.Year
GROUP BY
A.StageName,
C.Month,
C.MonthName,
C.Year,
A.Average
ORDER BY
CASE WHEN @Ordering = 'asc' THEN C.Year END ASC,
CASE WHEN @Ordering = 'asc' THEN C.Month END ASC,
CASE WHEN @Ordering = 'asc' THEN A.StageName END ASC,
CASE WHEN @Ordering = 'desc' THEN C.Year END DESC,
CASE WHEN @Ordering = 'desc' THEN C.Month END DESC,
CASE WHEN @Ordering = 'desc' THEN A.StageName END DESC
因为 order by
必须评估每一行,我认为它不能优化使用索引。将 order by
替换为 row_number()
进行默认排序,并且仅反转一次顺序应该至少可以防止对单行的 @Ordering
进行多次评估。
在下面的伪代码中,原始查询放在 CTE 中。 Row_number 确定升序排序,在 cte 以下,如果需要则颠倒顺序:
;with cte as
(
SELECT
A.StageName,
C.Month,
...,
row_number() over (order by C.Year,C.Month,A.StageName) sortOrder
FROM
...rest of the query, excluding the order by
)
select * --or list the columns without the sortOrder
from cte
order by sortOrder * case @Ordering when 'desc' then -1 else 1 end
虽然我知道你不能去掉子查询中不同列上的GROUP BY
,但你可以让系统更容易。
目前你有
S.StageNumber,
S.StageName,
MONTH(TimeIn),
DATENAME(MONTH,TimeIn),
YEAR(TimeIn)
我想要处理的数据相当多。请允许我做一些猜测:
S.StageNumber, -- int, 4 bytes
S.StageName, -- string, 20 bytes
MONTH(TimeIn), -- int, 4 bytes
DATENAME(MONTH,TimeIn), -- string 5 bytes
YEAR(TimeIn) -- int, 4 byte
现在,有一些依赖项:
- 如果您知道 MONTH(数字),那么您现在也知道它的名称
- 我猜 StageName + StageNumber 是唯一的并且与 StageID 直接相关。如果不是,您可能需要在外层再次 GROUP BY。
那会把我们带到
S.StageID, -- int, 4 bytes
MONTH(TimeIn), -- int, 4 bytes
YEAR(TimeIn) -- int, 4 byte
这意味着 GROUP BY
的排序必须 运行 通过每条记录仅 12 个字节而不是以前的每条记录 37 个字节,并且数字排序比字符串快很多无论如何(例如,由于 upper/lower 大小写、重音等)
我已尝试相应地重写查询(未经测试!)。我还将月份信息的获取移动到一个单独的临时 table,这应该对查询优化器有一点帮助。
SELECT DISTINCT Month,MonthName,Year
INTO #dates
FROM Calendar
WHERE DATE >= @StartDate AND DATE < DATEADD (d,1,@EndDate)
CREATE UNIQUE CLUSTERED INDEX uq0_#dates ON #dates (Month,Year)
SELECT
A.StageName,
C.Month,
C.MonthName as Label,
C.Year,
isnull(A.Average,0) as Data
FROM
(
SELECT S.StageName,
MonthNumber,
Year,
Average
FROM (
SELECT VM.StageID,
MONTH(TimeIn) as MonthNumber,
YEAR(TimeIn) as Year,
ISNULL(AVG(DATEDIFF(mi,TimeIn,isnull(TimeOut,@TodayDate))),0) as Average
FROM
VisitMovement VM
WHERE
(VM.TimeIn >= @StartDate AND
VM.TimeIn < DATEADD (d,1,@EndDate)) AND
(VM.TimeOut < DATEADD (d,1,@EndDate) OR VM.TimeOut IS NULL)
GROUP BY
VM.StageID,
MONTH(TimeIn),
YEAR(TimeIn)
) grp
JOIN Stage S
ON S.StageID = grp.StageID
)
) A
RIGHT OUTER JOIN #dates C
on
A.MonthNumber = C.Month and
-- A.Month = C.MonthName and
A.Year = C.Year
ORDER BY
CASE WHEN @Ordering = 'asc' THEN C.Year END ASC,
CASE WHEN @Ordering = 'asc' THEN C.Month END ASC,
CASE WHEN @Ordering = 'asc' THEN A.StageName END ASC,
CASE WHEN @Ordering = 'desc' THEN C.Year END DESC,
CASE WHEN @Ordering = 'desc' THEN C.Month END DESC,
CASE WHEN @Ordering = 'desc' THEN A.StageName END DESC
希望对您有所帮助。