如何优化查询,使手动工作可以动态和优化的方式进行
How to optimize the query so the manual work can be dyanmic and in optimized way
我有以下 table 喜欢:SQL fiddle
我可以通过 XML 获得此输出,但我不确定如何为更多用户(大约 200 万用户)正确获得低于输出的值。
稍后我想通过每个 id 的计数获得前 3 个名称,因此 RANK 或 OrderBy 子句将进入 SQL 并且不确定当数据量很大时需要多少次迭代用户。
我试过的工作代码:
-----------SQL Raw Table Creation------------------------
CREATE TABLE tb
(
Id INT,
Name VARCHAR(50) NOT NULL
);
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'bb');
INSERT INTO tb (Id, Name) VALUES (1, 'cc');
INSERT INTO tb (Id, Name) VALUES (1, 'cc');
INSERT INTO tb (Id, Name) VALUES (1, 'dd');
INSERT INTO tb (Id, Name) VALUES (1, 'dd');
INSERT INTO tb (Id, Name) VALUES (1, 'dd');
INSERT INTO tb (Id, Name) VALUES (2, 'aa');
INSERT INTO tb (Id, Name) VALUES (2, 'bb');
INSERT INTO tb (Id, Name) VALUES (2, 'bb');
INSERT INTO tb (Id, Name) VALUES (2, 'ee');
INSERT INTO tb (Id, Name) VALUES (2, 'ee');
INSERT INTO tb (Id, Name) VALUES (2, 'ee');
INSERT INTO tb (Id, Name) VALUES (2, 'ee');
INSERT INTO tb (Id, Name) VALUES (3, 'aa');
INSERT INTO tb (Id, Name) VALUES (3, 'bb');
INSERT INTO tb (Id, Name) VALUES (3, 'cc');
INSERT INTO tb (Id, Name) VALUES (3, 'dd');
INSERT INTO tb (Id, Name) VALUES (3, 'dd');
INSERT INTO tb (Id, Name) VALUES (3, 'dd');
-----------------Want to RANK or get only top 3 rows for each Id when group by Name--------------
select f.* into #t1
from(
select f.*
from(
select f.*
from (
select top 3 id,name,count(name) as total
from tb
where id = 1
group by id,name
order by id,total desc
)f
Union
select top 3 id,name,count(name) as total
from tb
where id = 2
group by id,name
order by id,total desc
)f
Union
select top 3 id,name,count(name) as total
from tb
where id = 3
group by id,name
order by id,total desc
) f
/* Output is moved in temp table #t1 which looks like
id name total
1 aa 5
1 cc 2
1 dd 3
2 aa 1
2 bb 2
2 ee 4
3 bb 1
3 cc 1
3 dd 3
*/
---------Final Joining for each Top3Names and RespectiveTotal -----
select a.id as ID, a.listStr as Top3Names , b.Total as RespectiveTotal
from
(
SELECT id,STUFF((SELECT ',' + name
FROM #t1 EE
WHERE EE.id=E.id
FOR XML PATH('')), 1, 1, '') AS listStr
FROM #t1 E
GROUP BY E.id
)a
left Join
(
SELECT id,STUFF((SELECT ',' + cast(total as Varchar)
FROM #t1 EE
WHERE EE.id=E.id
FOR XML PATH('')), 1, 1, '') AS Total
FROM #t1 E
GROUP BY E.id
)b
on a.id=b.id
输出:
ID Top3Names RespectiveTotal
1 aa,cc,dd 5,2,3
2 aa,bb,ee 1,2,4
3 bb,cc,dd 1,1,3
这里我为每个 ID 使用 UNION,这是不正确的做法。我想要一个优化的方式。我还使用临时 table 来存储我的结果。这是一个好方法吗?让我知道任何正确的解决方案或替代方案,以便我可以在更大的设备上进行测试。
在我的 SQL 服务器机器上,对于给定的示例数据,您的查询统计信息如下所示:
- 逻辑读取总数:13
- 总 CPU 时间:00:00:00.007
如果您使用的是 SQL SERVER 2017+,您可以使用 STRING_AGG
功能:
SELECT
id
, STRING_AGG(name,',') WITHIN GROUP (order by name asc) Top3Names
, STRING_AGG(countx,',') WITHIN GROUP (order by name asc) RespectiveTotal
FROM (
SELECT
id
, name
, count(*) countx
, ROW_NUMBER() over (partition by id order by count(*) desc) rownumber
FROM tb
GROUP BY name, id
) result1
WHERE
result1.rownumber < 4
GROUP BY id
统计数据如下:
- 逻辑读取总数:1
- 总 CPU 时间:00:00:00.000
对于 SQL 服务器 2016- :
select id
, STUFF((
SELECT ',' + t1.Name
FROM cte t1
WHERE t1.id = t2.id
and t1.rownumber < 4
ORDER BY t1.name
FOR XML PATH('')), 1, LEN(','), '') AS Top3Names
, STUFF((
SELECT ',' + cast(t1.countx as varchar(50))
FROM cte t1
WHERE t1.id = t2.id --and t1.name = t2.name
and t1.rownumber < 4
ORDER BY t1.name
FOR XML PATH('')), 1, LEN(','), '') AS RespectiveTotal
from cte t2
group by id
统计数据如下:
- 逻辑读取总数:7
- 总CPU时间:00:00:00.006
因此无论 sql 服务器版本如何,它都会提高性能,如果您使用 sql 服务器 2017 或更高版本使用上面的查询,您将获得最佳性能。
我有以下 table 喜欢:SQL fiddle
我可以通过 XML 获得此输出,但我不确定如何为更多用户(大约 200 万用户)正确获得低于输出的值。
稍后我想通过每个 id 的计数获得前 3 个名称,因此 RANK 或 OrderBy 子句将进入 SQL 并且不确定当数据量很大时需要多少次迭代用户。
我试过的工作代码:
-----------SQL Raw Table Creation------------------------
CREATE TABLE tb
(
Id INT,
Name VARCHAR(50) NOT NULL
);
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'aa');
INSERT INTO tb (Id, Name) VALUES (1, 'bb');
INSERT INTO tb (Id, Name) VALUES (1, 'cc');
INSERT INTO tb (Id, Name) VALUES (1, 'cc');
INSERT INTO tb (Id, Name) VALUES (1, 'dd');
INSERT INTO tb (Id, Name) VALUES (1, 'dd');
INSERT INTO tb (Id, Name) VALUES (1, 'dd');
INSERT INTO tb (Id, Name) VALUES (2, 'aa');
INSERT INTO tb (Id, Name) VALUES (2, 'bb');
INSERT INTO tb (Id, Name) VALUES (2, 'bb');
INSERT INTO tb (Id, Name) VALUES (2, 'ee');
INSERT INTO tb (Id, Name) VALUES (2, 'ee');
INSERT INTO tb (Id, Name) VALUES (2, 'ee');
INSERT INTO tb (Id, Name) VALUES (2, 'ee');
INSERT INTO tb (Id, Name) VALUES (3, 'aa');
INSERT INTO tb (Id, Name) VALUES (3, 'bb');
INSERT INTO tb (Id, Name) VALUES (3, 'cc');
INSERT INTO tb (Id, Name) VALUES (3, 'dd');
INSERT INTO tb (Id, Name) VALUES (3, 'dd');
INSERT INTO tb (Id, Name) VALUES (3, 'dd');
-----------------Want to RANK or get only top 3 rows for each Id when group by Name--------------
select f.* into #t1
from(
select f.*
from(
select f.*
from (
select top 3 id,name,count(name) as total
from tb
where id = 1
group by id,name
order by id,total desc
)f
Union
select top 3 id,name,count(name) as total
from tb
where id = 2
group by id,name
order by id,total desc
)f
Union
select top 3 id,name,count(name) as total
from tb
where id = 3
group by id,name
order by id,total desc
) f
/* Output is moved in temp table #t1 which looks like
id name total
1 aa 5
1 cc 2
1 dd 3
2 aa 1
2 bb 2
2 ee 4
3 bb 1
3 cc 1
3 dd 3
*/
---------Final Joining for each Top3Names and RespectiveTotal -----
select a.id as ID, a.listStr as Top3Names , b.Total as RespectiveTotal
from
(
SELECT id,STUFF((SELECT ',' + name
FROM #t1 EE
WHERE EE.id=E.id
FOR XML PATH('')), 1, 1, '') AS listStr
FROM #t1 E
GROUP BY E.id
)a
left Join
(
SELECT id,STUFF((SELECT ',' + cast(total as Varchar)
FROM #t1 EE
WHERE EE.id=E.id
FOR XML PATH('')), 1, 1, '') AS Total
FROM #t1 E
GROUP BY E.id
)b
on a.id=b.id
输出:
ID Top3Names RespectiveTotal
1 aa,cc,dd 5,2,3
2 aa,bb,ee 1,2,4
3 bb,cc,dd 1,1,3
这里我为每个 ID 使用 UNION,这是不正确的做法。我想要一个优化的方式。我还使用临时 table 来存储我的结果。这是一个好方法吗?让我知道任何正确的解决方案或替代方案,以便我可以在更大的设备上进行测试。
在我的 SQL 服务器机器上,对于给定的示例数据,您的查询统计信息如下所示:
- 逻辑读取总数:13
- 总 CPU 时间:00:00:00.007
如果您使用的是 SQL SERVER 2017+,您可以使用 STRING_AGG
功能:
SELECT
id
, STRING_AGG(name,',') WITHIN GROUP (order by name asc) Top3Names
, STRING_AGG(countx,',') WITHIN GROUP (order by name asc) RespectiveTotal
FROM (
SELECT
id
, name
, count(*) countx
, ROW_NUMBER() over (partition by id order by count(*) desc) rownumber
FROM tb
GROUP BY name, id
) result1
WHERE
result1.rownumber < 4
GROUP BY id
统计数据如下:
- 逻辑读取总数:1
- 总 CPU 时间:00:00:00.000
对于 SQL 服务器 2016- :
select id
, STUFF((
SELECT ',' + t1.Name
FROM cte t1
WHERE t1.id = t2.id
and t1.rownumber < 4
ORDER BY t1.name
FOR XML PATH('')), 1, LEN(','), '') AS Top3Names
, STUFF((
SELECT ',' + cast(t1.countx as varchar(50))
FROM cte t1
WHERE t1.id = t2.id --and t1.name = t2.name
and t1.rownumber < 4
ORDER BY t1.name
FOR XML PATH('')), 1, LEN(','), '') AS RespectiveTotal
from cte t2
group by id
统计数据如下:
- 逻辑读取总数:7
- 总CPU时间:00:00:00.006
因此无论 sql 服务器版本如何,它都会提高性能,如果您使用 sql 服务器 2017 或更高版本使用上面的查询,您将获得最佳性能。