在 SQL 服务器中使用 STRING_AGG 获取唯一值
Get unique values using STRING_AGG in SQL Server
下面查询returns结果如下图:
SELECT
ProjectID, newID.value
FROM
[dbo].[Data] WITH(NOLOCK)
CROSS APPLY
STRING_SPLIT([bID],';') AS newID
WHERE
newID.value IN ('O95833', 'Q96NY7-2')
结果:
ProjectID value
---------------------
2 Q96NY7-2
2 O95833
2 O95833
2 Q96NY7-2
2 O95833
2 Q96NY7-2
4 Q96NY7-2
4 Q96NY7-2
使用新添加的 STRING_AGG
函数(在 SQL Server 2017 中),如下面的查询所示,我能够得到下面的结果集。
SELECT
ProjectID,
STRING_AGG( newID.value, ',') WITHIN GROUP (ORDER BY newID.value) AS
NewField
FROM
[dbo].[Data] WITH(NOLOCK)
CROSS APPLY
STRING_SPLIT([bID],';') AS newID
WHERE
newID.value IN ('O95833', 'Q96NY7-2')
GROUP BY
ProjectID
ORDER BY
ProjectID
结果:
ProjectID NewField
-------------------------------------------------------------
2 O95833,O95833,O95833,Q96NY7-2,Q96NY7-2,Q96NY7-2
4 Q96NY7-2,Q96NY7-2
我希望我的最终输出只有如下独特的元素:
ProjectID NewField
-------------------------------
2 O95833, Q96NY7-2
4 Q96NY7-2
关于如何得到这个结果有什么建议吗?如果需要,请随时 refine/redesign 从头开始我的查询。
在合并结果之前,在子查询中使用 DISTINCT
关键字删除重复项:SQL Fiddle
SELECT
ProjectID
,STRING_AGG(value, ',') WITHIN GROUP (ORDER BY value) AS
NewField
from (
select distinct ProjectId, newId.value
FROM [dbo].[Data] WITH(NOLOCK)
CROSS APPLY STRING_SPLIT([bID],';') AS newID
WHERE newID.value IN ( 'O95833' , 'Q96NY7-2' )
) x
GROUP BY ProjectID
ORDER BY ProjectID
您可以在用于 apply
:
的子查询中使用 distinct
SELECT d.ProjectID,
STRING_AGG( newID.value, ',') WITHIN GROUP (ORDER BY newID.value) AS
NewField
FROM [dbo].[Data] d CROSS APPLY
(select distinct value
from STRING_SPLIT(d.[bID], ';') AS newID
) newID
WHERE newID.value IN ( 'O95833' , 'Q96NY7-2' )
group by projectid;
正如@SeanLange 在评论中指出的那样,这是提取数据的糟糕方式,但如果您不得不,只需将其设为 2 个单独的查询,如下所示:
SELECT
ProjectID
,STRING_AGG( val, ',') WITHIN GROUP (ORDER BY val) AS NewField
FROM
(
SELECT DISTINCT
ProjectID
,newID.value AS val
FROM
[dbo].[Data] WITH(NOLOCK)
CROSS APPLY STRING_SPLIT([bID],';') AS newID
WHERE
newID.value IN ('O95833' , 'Q96NY7-2')
) t
GROUP BY
ProjectID
应该这样做。
您可以对包含聚合值的 table 进行不同的视图,这甚至更简单:
Create Table Test (field1 varchar(1), field2 varchar(1));
go
Create View DistinctTest as (Select distinct field1, field2 from test group by field1,field2);
go
insert into Test Select 'A', '1';
insert into Test Select 'A', '2';
insert into Test Select 'A', '2';
insert into Test Select 'A', '2';
insert into Test Select 'D', '1';
insert into Test Select 'D', '1';
select string_agg(field1, ',') from Test where field2 = '1'; /* duplicates: A,D,D */;
select string_agg(field1, ',') from DistinctTest where field2 = '1'; /* no duplicates: A,D */;
从 STRING_AGG
获取唯一字符串的另一种可能性是在获取逗号分隔的字符串后执行以下三个步骤:
- 拆分字符串 (
STRING_SPLIT
)
- Select
DISTINCT
来自拆分
- 再次将
STRING_AGG
应用到一个 select 和一个键上的组
示例:
(select STRING_AGG(CAST(value as VARCHAR(MAX)), ',')
from (SELECT distinct 1 single_key, value
FROM STRING_SPLIT(STRING_AGG(CAST(customer_division as VARCHAR(MAX)), ','), ','))
q group by single_key) as customer_division
这是我编写的函数,用于回答 OP 标题:
欢迎改进!
CREATE OR ALTER FUNCTION [dbo].[fn_DistinctWords]
(
@String NVARCHAR(MAX)
)
RETURNS NVARCHAR(MAX)
WITH SCHEMABINDING
AS
BEGIN
DECLARE @Result NVARCHAR(MAX);
WITH MY_CTE AS ( SELECT Distinct(value) FROM STRING_SPLIT(@String, ' ') )
SELECT @Result = STRING_AGG(value, ' ') FROM MY_CTE
RETURN @Result
END
GO
像这样使用:
SELECT dbo.fn_DistinctWords('One Two Three Two One');
Oracle(自版本 19c 起)suportslistagg (DISTINCT ...
,但 Microsoft SQL 服务器可能不行。
这是我对@ttugates 的改进,使其更通用:
CREATE OR ALTER FUNCTION [dbo].[fn_DistinctList]
(
@String NVARCHAR(MAX),
@Delimiter char(1)
)
RETURNS NVARCHAR(MAX)
WITH SCHEMABINDING
AS
BEGIN
DECLARE @Result NVARCHAR(MAX);
WITH MY_CTE AS ( SELECT Distinct(value) FROM STRING_SPLIT(@String,
@Delimiter) )
SELECT @Result = STRING_AGG(value, @Delimiter) FROM MY_CTE
RETURN @Result
END
下面查询returns结果如下图:
SELECT
ProjectID, newID.value
FROM
[dbo].[Data] WITH(NOLOCK)
CROSS APPLY
STRING_SPLIT([bID],';') AS newID
WHERE
newID.value IN ('O95833', 'Q96NY7-2')
结果:
ProjectID value
---------------------
2 Q96NY7-2
2 O95833
2 O95833
2 Q96NY7-2
2 O95833
2 Q96NY7-2
4 Q96NY7-2
4 Q96NY7-2
使用新添加的 STRING_AGG
函数(在 SQL Server 2017 中),如下面的查询所示,我能够得到下面的结果集。
SELECT
ProjectID,
STRING_AGG( newID.value, ',') WITHIN GROUP (ORDER BY newID.value) AS
NewField
FROM
[dbo].[Data] WITH(NOLOCK)
CROSS APPLY
STRING_SPLIT([bID],';') AS newID
WHERE
newID.value IN ('O95833', 'Q96NY7-2')
GROUP BY
ProjectID
ORDER BY
ProjectID
结果:
ProjectID NewField
-------------------------------------------------------------
2 O95833,O95833,O95833,Q96NY7-2,Q96NY7-2,Q96NY7-2
4 Q96NY7-2,Q96NY7-2
我希望我的最终输出只有如下独特的元素:
ProjectID NewField
-------------------------------
2 O95833, Q96NY7-2
4 Q96NY7-2
关于如何得到这个结果有什么建议吗?如果需要,请随时 refine/redesign 从头开始我的查询。
在合并结果之前,在子查询中使用 DISTINCT
关键字删除重复项:SQL Fiddle
SELECT
ProjectID
,STRING_AGG(value, ',') WITHIN GROUP (ORDER BY value) AS
NewField
from (
select distinct ProjectId, newId.value
FROM [dbo].[Data] WITH(NOLOCK)
CROSS APPLY STRING_SPLIT([bID],';') AS newID
WHERE newID.value IN ( 'O95833' , 'Q96NY7-2' )
) x
GROUP BY ProjectID
ORDER BY ProjectID
您可以在用于 apply
:
distinct
SELECT d.ProjectID,
STRING_AGG( newID.value, ',') WITHIN GROUP (ORDER BY newID.value) AS
NewField
FROM [dbo].[Data] d CROSS APPLY
(select distinct value
from STRING_SPLIT(d.[bID], ';') AS newID
) newID
WHERE newID.value IN ( 'O95833' , 'Q96NY7-2' )
group by projectid;
正如@SeanLange 在评论中指出的那样,这是提取数据的糟糕方式,但如果您不得不,只需将其设为 2 个单独的查询,如下所示:
SELECT
ProjectID
,STRING_AGG( val, ',') WITHIN GROUP (ORDER BY val) AS NewField
FROM
(
SELECT DISTINCT
ProjectID
,newID.value AS val
FROM
[dbo].[Data] WITH(NOLOCK)
CROSS APPLY STRING_SPLIT([bID],';') AS newID
WHERE
newID.value IN ('O95833' , 'Q96NY7-2')
) t
GROUP BY
ProjectID
应该这样做。
您可以对包含聚合值的 table 进行不同的视图,这甚至更简单:
Create Table Test (field1 varchar(1), field2 varchar(1));
go
Create View DistinctTest as (Select distinct field1, field2 from test group by field1,field2);
go
insert into Test Select 'A', '1';
insert into Test Select 'A', '2';
insert into Test Select 'A', '2';
insert into Test Select 'A', '2';
insert into Test Select 'D', '1';
insert into Test Select 'D', '1';
select string_agg(field1, ',') from Test where field2 = '1'; /* duplicates: A,D,D */;
select string_agg(field1, ',') from DistinctTest where field2 = '1'; /* no duplicates: A,D */;
从 STRING_AGG
获取唯一字符串的另一种可能性是在获取逗号分隔的字符串后执行以下三个步骤:
- 拆分字符串 (
STRING_SPLIT
) - Select
DISTINCT
来自拆分 - 再次将
STRING_AGG
应用到一个 select 和一个键上的组
示例:
(select STRING_AGG(CAST(value as VARCHAR(MAX)), ',')
from (SELECT distinct 1 single_key, value
FROM STRING_SPLIT(STRING_AGG(CAST(customer_division as VARCHAR(MAX)), ','), ','))
q group by single_key) as customer_division
这是我编写的函数,用于回答 OP 标题: 欢迎改进!
CREATE OR ALTER FUNCTION [dbo].[fn_DistinctWords]
(
@String NVARCHAR(MAX)
)
RETURNS NVARCHAR(MAX)
WITH SCHEMABINDING
AS
BEGIN
DECLARE @Result NVARCHAR(MAX);
WITH MY_CTE AS ( SELECT Distinct(value) FROM STRING_SPLIT(@String, ' ') )
SELECT @Result = STRING_AGG(value, ' ') FROM MY_CTE
RETURN @Result
END
GO
像这样使用:
SELECT dbo.fn_DistinctWords('One Two Three Two One');
Oracle(自版本 19c 起)suportslistagg (DISTINCT ...
,但 Microsoft SQL 服务器可能不行。
这是我对@ttugates 的改进,使其更通用:
CREATE OR ALTER FUNCTION [dbo].[fn_DistinctList]
(
@String NVARCHAR(MAX),
@Delimiter char(1)
)
RETURNS NVARCHAR(MAX)
WITH SCHEMABINDING
AS
BEGIN
DECLARE @Result NVARCHAR(MAX);
WITH MY_CTE AS ( SELECT Distinct(value) FROM STRING_SPLIT(@String,
@Delimiter) )
SELECT @Result = STRING_AGG(value, @Delimiter) FROM MY_CTE
RETURN @Result
END