为我的用户在 SO 上获取 "rank" 个徽章 - 查询很慢 - 可以加速吗?
Get "rank" of badge on SO for my user - query is slow - speedup possible?
我很好奇有多少人在我之前得到了 - 我能够得到这些信息
python 2019-01-02 09:09:15 Gold 454
用这个(慢运行)query:
(我无法 single/cross 在 Data Explorer 上以我的主要用户登录,因此匿名登录)
-- insert your user id here:
declare @uid int = 7505395
-- get all badges of all users
select Name, Date, [Gold/Silver/Else], [Row#] from (
SELECT Name,
Date,
userId,
case when class = 1 then 'Gold'
when class = 2 then 'Silver'
when class = 3 then 'Bronze'
else convert(varchar(10), class)
end as 'Gold/Silver/Else',
ROW_NUMBER() OVER(PARTITION BY name, class ORDER BY date ASC) AS Row#
FROM badges
WHERE 1 = 1
-- you can restrict this further, f.e. for looking only by gold badges
-- and Class = 1 -- gold == 1, silver == 2, bronze == 3
-- -- or for certain named badges
-- and name like 'python%'
) as tmp
where userID = @uid
ORDER by name asc, Date asc
(按原样查询给了我所有的徽章,以及有多少人在我之前得到它,并且必须对所有可能的徽章进行排序)
问题:
我尝试过 CTE(只有错误,没有成功),但我的 sql 技能生疏了 - 如何加速此查询?
您可以将聚合与过滤一起使用:
select count(*)
from badges b
where b.name = 'python' and b.class = 2 and
b.date < (select b2.date
from badges b2
where b2.name = 'python' and b2.class = 2 and
b2.userID = @uid
);
问题是 Table 似乎没有对此有用的索引。我们得到如下执行计划:
-- 索引扫描不是最优的。我们要索引查找。
不过,您可以通过以下方式将时间缩短几乎一半:
- 正在预选用户徽章。
- 对排名使用相关子查询。
- 使用
Id
作为 Date
的代理。 (Id
是唯一的,递增的,而且通常排序速度更快。)
另请注意:
- 使用the magic
##UserId:INT##
parameter.
Class
列只有 3 个值。
- 您可以通过省略
ORDER BY
子句将查询时间再缩短几秒钟。
无论如何,this query表现更好:
WITH zUsersBadges AS (
SELECT b.Id
, b.UserId
, b.Name
, b.Date
, b.Class
, [Badge Class] = (
CASE WHEN b.Class = 1 THEN 'Gold'
WHEN b.Class = 2 THEN 'Silver'
WHEN b.Class = 3 THEN 'Bronze'
END
)
, [Is tag badge] = IIF (b.TagBased = 1, 'Yes', 'No')
FROM Badges b
WHERE b.UserId = ##UserId:INT##
)
SELECT ub.Name AS [Badge Name]
, ub.[Badge Class]
, ub.[Is tag badge]
, ub.Date AS [Date Earned]
, [In Top N of earners] = (
SELECT COUNT (ob.ID)
FROM Badges ob
WHERE (ob.Name = ub.Name AND ob.Class = ub.Class AND ob.Id <= Ub.Id) -- Faster but may give slightly higher rank
--WHERE (ob.Name = ub.Name AND ob.Class = ub.Class AND ob.Date <= Ub.Date) -- Slower, but gives exact rank.
)
FROM zUsersBadges ub
ORDER BY ub.Name, ub.Date
更新:This query 表现更好,因为它聚合了 multiply-earned 个徽章:
WITH zUsersBadges AS (
SELECT b.UserId
, b.Name
, minId = MIN (b.Id)
, [First Earned] = MIN (b.Date)
, [Earned N times] = COUNT (b.Date)
, b.Class
, [Badge Class] = (
CASE WHEN b.Class = 1 THEN 'Gold'
WHEN b.Class = 2 THEN 'Silver'
WHEN b.Class = 3 THEN 'Bronze'
END
)
, [Is tag badge] = IIF (b.TagBased = 1, 'Yes', 'No')
FROM Badges b
WHERE b.UserId = ##UserId:INT##
GROUP BY b.UserId, b.Class, b.Name, b.TagBased
)
SELECT ub.Name AS [Badge Name]
, ub.[Badge Class]
, ub.[Is tag badge]
, ub.[First Earned]
, ub.[Earned N times]
, [In Top N of earners] = (
SELECT COUNT (ob.ID)
FROM Badges ob
WHERE (ob.Class = ub.Class AND ob.Id <= Ub.minId AND ob.Name = ub.Name) -- Faster but may give slightly higher rank
--WHERE (ob.Class = ub.Class AND ob.Date <= Ub.[First Earned] AND ob.Name = ub.Name) -- Faster but may give slightly higher rank
)
FROM zUsersBadges ub
ORDER BY ub.Name, ub.[First Earned]
我很好奇有多少人在我之前得到了
python 2019-01-02 09:09:15 Gold 454
用这个(慢运行)query:
(我无法 single/cross 在 Data Explorer 上以我的主要用户登录,因此匿名登录)
-- insert your user id here:
declare @uid int = 7505395
-- get all badges of all users
select Name, Date, [Gold/Silver/Else], [Row#] from (
SELECT Name,
Date,
userId,
case when class = 1 then 'Gold'
when class = 2 then 'Silver'
when class = 3 then 'Bronze'
else convert(varchar(10), class)
end as 'Gold/Silver/Else',
ROW_NUMBER() OVER(PARTITION BY name, class ORDER BY date ASC) AS Row#
FROM badges
WHERE 1 = 1
-- you can restrict this further, f.e. for looking only by gold badges
-- and Class = 1 -- gold == 1, silver == 2, bronze == 3
-- -- or for certain named badges
-- and name like 'python%'
) as tmp
where userID = @uid
ORDER by name asc, Date asc
(按原样查询给了我所有的徽章,以及有多少人在我之前得到它,并且必须对所有可能的徽章进行排序)
问题:
我尝试过 CTE(只有错误,没有成功),但我的 sql 技能生疏了 - 如何加速此查询?
您可以将聚合与过滤一起使用:
select count(*)
from badges b
where b.name = 'python' and b.class = 2 and
b.date < (select b2.date
from badges b2
where b2.name = 'python' and b2.class = 2 and
b2.userID = @uid
);
问题是 Table 似乎没有对此有用的索引。我们得到如下执行计划:
-- 索引扫描不是最优的。我们要索引查找。
不过,您可以通过以下方式将时间缩短几乎一半:
- 正在预选用户徽章。
- 对排名使用相关子查询。
- 使用
Id
作为Date
的代理。 (Id
是唯一的,递增的,而且通常排序速度更快。)
另请注意:
- 使用the magic
##UserId:INT##
parameter. Class
列只有 3 个值。- 您可以通过省略
ORDER BY
子句将查询时间再缩短几秒钟。
无论如何,this query表现更好:
WITH zUsersBadges AS (
SELECT b.Id
, b.UserId
, b.Name
, b.Date
, b.Class
, [Badge Class] = (
CASE WHEN b.Class = 1 THEN 'Gold'
WHEN b.Class = 2 THEN 'Silver'
WHEN b.Class = 3 THEN 'Bronze'
END
)
, [Is tag badge] = IIF (b.TagBased = 1, 'Yes', 'No')
FROM Badges b
WHERE b.UserId = ##UserId:INT##
)
SELECT ub.Name AS [Badge Name]
, ub.[Badge Class]
, ub.[Is tag badge]
, ub.Date AS [Date Earned]
, [In Top N of earners] = (
SELECT COUNT (ob.ID)
FROM Badges ob
WHERE (ob.Name = ub.Name AND ob.Class = ub.Class AND ob.Id <= Ub.Id) -- Faster but may give slightly higher rank
--WHERE (ob.Name = ub.Name AND ob.Class = ub.Class AND ob.Date <= Ub.Date) -- Slower, but gives exact rank.
)
FROM zUsersBadges ub
ORDER BY ub.Name, ub.Date
更新:This query 表现更好,因为它聚合了 multiply-earned 个徽章:
WITH zUsersBadges AS (
SELECT b.UserId
, b.Name
, minId = MIN (b.Id)
, [First Earned] = MIN (b.Date)
, [Earned N times] = COUNT (b.Date)
, b.Class
, [Badge Class] = (
CASE WHEN b.Class = 1 THEN 'Gold'
WHEN b.Class = 2 THEN 'Silver'
WHEN b.Class = 3 THEN 'Bronze'
END
)
, [Is tag badge] = IIF (b.TagBased = 1, 'Yes', 'No')
FROM Badges b
WHERE b.UserId = ##UserId:INT##
GROUP BY b.UserId, b.Class, b.Name, b.TagBased
)
SELECT ub.Name AS [Badge Name]
, ub.[Badge Class]
, ub.[Is tag badge]
, ub.[First Earned]
, ub.[Earned N times]
, [In Top N of earners] = (
SELECT COUNT (ob.ID)
FROM Badges ob
WHERE (ob.Class = ub.Class AND ob.Id <= Ub.minId AND ob.Name = ub.Name) -- Faster but may give slightly higher rank
--WHERE (ob.Class = ub.Class AND ob.Date <= Ub.[First Earned] AND ob.Name = ub.Name) -- Faster but may give slightly higher rank
)
FROM zUsersBadges ub
ORDER BY ub.Name, ub.[First Earned]