在 STRING_AGG 中生成 DISTINCT 值
Produce DISTINCT values in STRING_AGG
我在 SQL Server 2017 中使用 STRING_AGG 函数。我想创建与 COUNT(DISTINCT <column>)
相同的效果。我试过 STRING_AGG(DISTINCT <column>,',')
但这不是合法的语法。
我想知道是否有 T-SQL 解决方法。这是我的样本:
WITH Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
SELECT State, City, COUNT(DISTINCT Siting) [# Of Types], STRING_AGG(Siting,',') Animals
FROM Sitings
GROUP BY State, City
上面的结果如下:
+---------+-----------+--------------+-------------------------+
| State | City | # Of Types | Animals |
+---------+-----------+--------------+-------------------------+
| Arizona | Flagstaff | 1 | dog |
| Florida | Orlando | 2 | dog,bird |
| Arizona | Phoenix | 2 | bird,bird,bird,dog,bird |
+---------+-----------+--------------+-------------------------+
输出正是我想要的,只是我希望为 Phoenix Arizona 列出的串联 "Animals" 是不同的,如下所示:
+---------+-----------+--------------+--------------------+
| State | City | # Of Types | Animals |
+---------+-----------+--------------+--------------------+
| Arizona | Flagstaff | 1 | dog |
| Florida | Orlando | 2 | dog,bird |
| Arizona | Phoenix | 2 | bird,dog |
+---------+-----------+--------------+--------------------+
有什么想法吗?
当我使用更大的真实数据集时,我收到关于 "Animals" 列超过 8000 个字符的错误。
我认为我的问题与this one相同,只是我的示例简单得多。
这是一种方法。
由于您还需要非重复计数,因此只需将行分组两次即可完成。第一个 GROUP BY
将删除重复项,第二个 GROUP BY
将产生最终结果。
WITH
Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
,CTE_Animals
AS
(
SELECT
State, City, Siting
FROM Sitings
GROUP BY State, City, Siting
)
SELECT
State, City, COUNT(1) AS [# Of Sitings], STRING_AGG(Siting,',') AS Animals
FROM CTE_Animals
GROUP BY State, City
ORDER BY
State
,City
;
结果
+---------+-----------+--------------+----------+
| State | City | # Of Sitings | Animals |
+---------+-----------+--------------+----------+
| Arizona | Flagstaff | 1 | dog |
| Arizona | Phoenix | 2 | bird,dog |
| Florida | Orlando | 2 | bird,dog |
+---------+-----------+--------------+----------+
如果您仍然收到有关超过 8000 个字符的错误消息,请在 STRING_AGG
.
之前将值转换为 varchar(max)
类似
STRING_AGG(CAST(Siting AS varchar(max)),',') AS Animals
只需使用 sub-query
WITH Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
select State,City,count(*) as [# Of Types],STRING_AGG(Siting,',') AS Animals from
(
SELECT State, City, Siting
FROM Sitings
GROUP BY State, City,Siting
) as T group by State,City
http://sqlfiddle.com/#!18/ba4b8/11
State City # Of Types Animals
Arizona Flagstaff 1 dog
Florida Orlando 2 bird,dog
Arizona Phoenix 2 bird,dog
当然是很晚才回复。
这是另一种方式。 STRING_AGG(Siting,',')
中的 Siting 可以被子查询以返回 SITTING 的 DISTINCT 列表,其中分组键匹配 STATE 和 CITIES。
WITH Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
SELECT
S.State,
S.City,
COUNT(DISTINCT S.Siting) AS [# Of Types],
--STRING_AGG(S.Siting,',') AS Animals
(
SELECT STRING_AGG(U.SITING, ',')
FROM
(
SELECT DISTINCT T.Siting
FROM Sitings AS T
WHERE
T.State = S.State AND
T.City = S.City
) AS U
) AS ANIMAL
FROM
Sitings AS S
GROUP BY
S.State,
S.City
ORDER BY
S.State,
S.City
这是另一种方法 (sql fiddle):
WITH Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
select State,City,count(*) as [# Of Sitings],(select string_agg(value,', ') from (select distinct value from string_split(string_agg(Siting, ','),',')) t) AS Animals
FROM Sitings
GROUP BY State, City
您可以轻松地将拆分和合并部分转换为可重用的标量值函数。
注意
这不是最佳解决方案,如果您先分组然后进行聚合(如上面的答案),效果会更好。此外,它不会得到 # of Types
,而是得到 # of Sitings
。但是,作为快速内联函数,它变得很方便。
我在 SQL Server 2017 中使用 STRING_AGG 函数。我想创建与 COUNT(DISTINCT <column>)
相同的效果。我试过 STRING_AGG(DISTINCT <column>,',')
但这不是合法的语法。
我想知道是否有 T-SQL 解决方法。这是我的样本:
WITH Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
SELECT State, City, COUNT(DISTINCT Siting) [# Of Types], STRING_AGG(Siting,',') Animals
FROM Sitings
GROUP BY State, City
上面的结果如下:
+---------+-----------+--------------+-------------------------+
| State | City | # Of Types | Animals |
+---------+-----------+--------------+-------------------------+
| Arizona | Flagstaff | 1 | dog |
| Florida | Orlando | 2 | dog,bird |
| Arizona | Phoenix | 2 | bird,bird,bird,dog,bird |
+---------+-----------+--------------+-------------------------+
输出正是我想要的,只是我希望为 Phoenix Arizona 列出的串联 "Animals" 是不同的,如下所示:
+---------+-----------+--------------+--------------------+
| State | City | # Of Types | Animals |
+---------+-----------+--------------+--------------------+
| Arizona | Flagstaff | 1 | dog |
| Florida | Orlando | 2 | dog,bird |
| Arizona | Phoenix | 2 | bird,dog |
+---------+-----------+--------------+--------------------+
有什么想法吗?
当我使用更大的真实数据集时,我收到关于 "Animals" 列超过 8000 个字符的错误。
我认为我的问题与this one相同,只是我的示例简单得多。
这是一种方法。
由于您还需要非重复计数,因此只需将行分组两次即可完成。第一个 GROUP BY
将删除重复项,第二个 GROUP BY
将产生最终结果。
WITH
Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
,CTE_Animals
AS
(
SELECT
State, City, Siting
FROM Sitings
GROUP BY State, City, Siting
)
SELECT
State, City, COUNT(1) AS [# Of Sitings], STRING_AGG(Siting,',') AS Animals
FROM CTE_Animals
GROUP BY State, City
ORDER BY
State
,City
;
结果
+---------+-----------+--------------+----------+
| State | City | # Of Sitings | Animals |
+---------+-----------+--------------+----------+
| Arizona | Flagstaff | 1 | dog |
| Arizona | Phoenix | 2 | bird,dog |
| Florida | Orlando | 2 | bird,dog |
+---------+-----------+--------------+----------+
如果您仍然收到有关超过 8000 个字符的错误消息,请在 STRING_AGG
.
varchar(max)
类似
STRING_AGG(CAST(Siting AS varchar(max)),',') AS Animals
只需使用 sub-query
WITH Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
select State,City,count(*) as [# Of Types],STRING_AGG(Siting,',') AS Animals from
(
SELECT State, City, Siting
FROM Sitings
GROUP BY State, City,Siting
) as T group by State,City
http://sqlfiddle.com/#!18/ba4b8/11
State City # Of Types Animals
Arizona Flagstaff 1 dog
Florida Orlando 2 bird,dog
Arizona Phoenix 2 bird,dog
当然是很晚才回复。
这是另一种方式。 STRING_AGG(Siting,',')
中的 Siting 可以被子查询以返回 SITTING 的 DISTINCT 列表,其中分组键匹配 STATE 和 CITIES。
WITH Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
SELECT
S.State,
S.City,
COUNT(DISTINCT S.Siting) AS [# Of Types],
--STRING_AGG(S.Siting,',') AS Animals
(
SELECT STRING_AGG(U.SITING, ',')
FROM
(
SELECT DISTINCT T.Siting
FROM Sitings AS T
WHERE
T.State = S.State AND
T.City = S.City
) AS U
) AS ANIMAL
FROM
Sitings AS S
GROUP BY
S.State,
S.City
ORDER BY
S.State,
S.City
这是另一种方法 (sql fiddle):
WITH Sitings
AS
(
SELECT * FROM (VALUES
(1, 'Florida', 'Orlando', 'bird'),
(2, 'Florida', 'Orlando', 'dog'),
(3, 'Arizona', 'Phoenix', 'bird'),
(4, 'Arizona', 'Phoenix', 'dog'),
(5, 'Arizona', 'Phoenix', 'bird'),
(6, 'Arizona', 'Phoenix', 'bird'),
(7, 'Arizona', 'Phoenix', 'bird'),
(8, 'Arizona', 'Flagstaff', 'dog')
) F (ID, State, City, Siting)
)
select State,City,count(*) as [# Of Sitings],(select string_agg(value,', ') from (select distinct value from string_split(string_agg(Siting, ','),',')) t) AS Animals
FROM Sitings
GROUP BY State, City
您可以轻松地将拆分和合并部分转换为可重用的标量值函数。
注意
这不是最佳解决方案,如果您先分组然后进行聚合(如上面的答案),效果会更好。此外,它不会得到 # of Types
,而是得到 # of Sitings
。但是,作为快速内联函数,它变得很方便。