我怎样才能避免 xml 路径多次使用东西?
How can i avoid a multiple using stuff for xml path?
我在 sql 查询时遇到问题。我想要做的是获取商店列表,其中包含分配给用户的任务。带有用户名的单独列中的每个角色。我正在使用 STUFF 来获取连接的行,但对于 tables.
中的 100K+ 行来说这似乎是个坏主意
这是简化的结构:
用户table
CREATE TABLE #temp_Users(
[id] [int] IDENTITY(1,1) NOT NULL,
[user_name] [nvarchar](250) NULL,
CONSTRAINT [PK_Users] PRIMARY KEY CLUSTERED ([id] ASC)
)
insert into #temp_Users (user_name) values ('Joe'),('Jeff'),('Jimm')
任务table
create table #temp_Tasks (
[id] [int] IDENTITY(1,1) NOT NULL,
[id_user] [int] NULL,
[id_store] [int] NULL,
[id_role] [int] NULL,
CONSTRAINT [PK_Tasks] PRIMARY KEY CLUSTERED ([id] ASC)
)
insert into #temp_Tasks ([id_user],[id_store],[id_role])
values (1,1,0),(1,2,0),(2,1,0),(2,2,0),(1,1,1),(2,2,1),(3,1,0),(3,2,0),(3,2,1)
Select
SELECT distinct t.id_store,
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 0 FOR XML PATH('')),1,2,'' ) as 'role_0',
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 1 FOR XML PATH('')),1,2,'' ) as 'role_1'
FROM #temp_Tasks t
结果
问题是当 table 填充得越来越多时,select 变得越来越慢,因为每个 STUFF 都是另一个嵌套循环。如果我需要在这个 select 中添加一些“角色”,我必须添加另一个 STUFF,如下所示:
SELECT distinct t.id_store,
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 0 FOR XML PATH('')),1,2,'' ) as 'role_0',
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 1 FOR XML PATH('')),1,2,'' ) as 'role_1'
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 2 FOR XML PATH('')),1,2,'' ) as 'role_2'
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 3 FOR XML PATH('')),1,2,'' ) as 'role_3'
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 4 FOR XML PATH('')),1,2,'' ) as 'role_4'
FROM #temp_Tasks t
问题我可以避免使用多个 STUFF 函数吗?
或者我可能需要在 tables 任务上创建非聚集索引?也许为每个角色过滤索引?
提前致谢!
编辑:我正在使用 MSSQL 2016。
正如 Gordon Linoff 所建议的那样,我已经将 Select 从 Distinct 更改为 Group By,如下所示:
SELECT t.id_store,
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 0 FOR XML PATH('')),1,2,'' ) as 'role_0',
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 1 FOR XML PATH('')),1,2,'' ) as 'role_1'
FROM #temp_Tasks t
group by t.id_store
并且执行时间从 20 秒减少到 2 秒。
所以问题不在于 Stuff,而在于 Distinct。
从 SQL Server 2017 开始,您使用 string_agg()
?
SELECT t.id_store,
STRING_AGG(CASE WHEN t.id_role = 0 THEN u.user_name END, ',') WITHIN GROUP (ORDER BY u.user_name) as role_0,
STRING_AGG(CASE WHEN t.id_role = 1 THEN u.user_name END, ',') WITHIN GROUP (ORDER BY u.user_name) as role_1
FROM #temp_Tasks t JOIN
#temp_Users u
ON u.id = t2.id_users
GROUP BY t.id_store;
在旧版本中,您可能会受益于使用 GROUP BY
而不是 SELECT DISTINCT
作为 Aaron Bertrand explains。我还将子查询中的 LEFT JOIN
s 替换为 INNER JOIN
:
SELECT t.id_store,
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 0 FOR XML PATH('')),1,2,'' ) as 'role_0',
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 1 FOR XML PATH('')),1,2,'' ) as 'role_1'
FROM #temp_Tasks t
GROUP BY t.id_store;
我在 sql 查询时遇到问题。我想要做的是获取商店列表,其中包含分配给用户的任务。带有用户名的单独列中的每个角色。我正在使用 STUFF 来获取连接的行,但对于 tables.
中的 100K+ 行来说这似乎是个坏主意这是简化的结构:
用户table
CREATE TABLE #temp_Users(
[id] [int] IDENTITY(1,1) NOT NULL,
[user_name] [nvarchar](250) NULL,
CONSTRAINT [PK_Users] PRIMARY KEY CLUSTERED ([id] ASC)
)
insert into #temp_Users (user_name) values ('Joe'),('Jeff'),('Jimm')
任务table
create table #temp_Tasks (
[id] [int] IDENTITY(1,1) NOT NULL,
[id_user] [int] NULL,
[id_store] [int] NULL,
[id_role] [int] NULL,
CONSTRAINT [PK_Tasks] PRIMARY KEY CLUSTERED ([id] ASC)
)
insert into #temp_Tasks ([id_user],[id_store],[id_role])
values (1,1,0),(1,2,0),(2,1,0),(2,2,0),(1,1,1),(2,2,1),(3,1,0),(3,2,0),(3,2,1)
Select
SELECT distinct t.id_store,
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 0 FOR XML PATH('')),1,2,'' ) as 'role_0',
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 1 FOR XML PATH('')),1,2,'' ) as 'role_1'
FROM #temp_Tasks t
结果
问题是当 table 填充得越来越多时,select 变得越来越慢,因为每个 STUFF 都是另一个嵌套循环。如果我需要在这个 select 中添加一些“角色”,我必须添加另一个 STUFF,如下所示:
SELECT distinct t.id_store,
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 0 FOR XML PATH('')),1,2,'' ) as 'role_0',
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 1 FOR XML PATH('')),1,2,'' ) as 'role_1'
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 2 FOR XML PATH('')),1,2,'' ) as 'role_2'
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 3 FOR XML PATH('')),1,2,'' ) as 'role_3'
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 4 FOR XML PATH('')),1,2,'' ) as 'role_4'
FROM #temp_Tasks t
问题我可以避免使用多个 STUFF 函数吗? 或者我可能需要在 tables 任务上创建非聚集索引?也许为每个角色过滤索引? 提前致谢!
编辑:我正在使用 MSSQL 2016。
正如 Gordon Linoff 所建议的那样,我已经将 Select 从 Distinct 更改为 Group By,如下所示:
SELECT t.id_store,
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 0 FOR XML PATH('')),1,2,'' ) as 'role_0',
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 left outer join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 1 FOR XML PATH('')),1,2,'' ) as 'role_1'
FROM #temp_Tasks t
group by t.id_store
并且执行时间从 20 秒减少到 2 秒。 所以问题不在于 Stuff,而在于 Distinct。
从 SQL Server 2017 开始,您使用 string_agg()
?
SELECT t.id_store,
STRING_AGG(CASE WHEN t.id_role = 0 THEN u.user_name END, ',') WITHIN GROUP (ORDER BY u.user_name) as role_0,
STRING_AGG(CASE WHEN t.id_role = 1 THEN u.user_name END, ',') WITHIN GROUP (ORDER BY u.user_name) as role_1
FROM #temp_Tasks t JOIN
#temp_Users u
ON u.id = t2.id_users
GROUP BY t.id_store;
在旧版本中,您可能会受益于使用 GROUP BY
而不是 SELECT DISTINCT
作为 Aaron Bertrand explains。我还将子查询中的 LEFT JOIN
s 替换为 INNER JOIN
:
SELECT t.id_store,
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 0 FOR XML PATH('')),1,2,'' ) as 'role_0',
stuff( (SELECT ', ' + u.[user_name] FROM #temp_Tasks t2 join #temp_Users u on u.id = t2.id_user WHERE t2.id_store = t.id_store and t2.id_role = 1 FOR XML PATH('')),1,2,'' ) as 'role_1'
FROM #temp_Tasks t
GROUP BY t.id_store;