T-SQL - 使用 STUFF 连接分组列并删除重复项
T-SQL - using STUFF to concatenate grouped columns and removing duplicates
我有一个 table 看起来像这样:
EmailAddress: nvarchar(255)
MarketingEmailOptIn: nvarchar(50)
NewsletterOptIn: nvarchar(50)
ThoughtLeaderOptIn: nvarchar(50)
下面显示的我的 SQL 语句采用上面的数据并使用逗号作为分隔符连接“订阅类型”:
SELECT
EmailAddress,
STUFF((SELECT ',' +
CASE
WHEN B.MarketingEmailOptIn = 'TRUE' THEN 'MarketingEmail'
WHEN B.ThoughtLeaderOptIn = 'TRUE' THEN 'ThoughtLeader'
WHEN B.NewsletterOptIn = 'TRUE' THEN 'Newsletter'
END
FROM UK_AGT_AgentForms_TEST_DE B
WHERE ISNULL(B.EmailAddress, '') = ISNULL(A.EmailAddress, '')
FOR XML PATH('')), 1, 2, '') AS Subscriptions
FROM
UK_AGT_AgentForms_TEST_DE A
GROUP BY
EmailAddress
运行 这个 SQL 产生以下输出:
但是请注意 MarketingEmail
被列出了两次,因为来源 table 也列出了两次(第一行和第二行)。我需要忽略检测到的任何重复项,这样我得到的 table 看起来像:
我对 STUFF
关键字很陌生。我只是有点迷失了如何在 运行 时间检测重复项 - 任何建议都将不胜感激。谢谢
尝试这样的事情:
DECLARE @Data table (
EmailAddress nvarchar(255),
MarketingEmailOptIn nvarchar(50),
NewsletterOptIn nvarchar(50),
ThoughtLeaderOptIn nvarchar(50)
);
INSERT INTO @Data VALUES
( 'mike@mikemarks.com', 'TRUE', NULL, NULL ),
( 'mike@mikemarks.com', 'TRUE', 'TRUE', NULL ),
( 'mike@mikemarks.com', 'TRUE', NULL, 'TRUE' );
SELECT
EmailAddress
, STUFF ( ( CASE WHEN EOptIn = 'TRUE' THEN ',MarketingEmail' ELSE '' END
+ CASE WHEN NOptIn = 'TRUE' THEN ',Newsletter' ELSE '' END
+ CASE WHEN TOptIn = 'TRUE' THEN ',ThoughtLeader' ELSE '' END
), 1, 1, '' ) AS Subscriptions
FROM (
SELECT TOP 100 PERCENT
EmailAddress
, MAX ( MarketingEmailOptIn ) AS EOptIn
, MAX ( NewsletterOptIn ) AS NOptIn
, MAX ( ThoughtLeaderOptIn ) AS TOptIn
FROM @Data A --UK_AGT_AgentForms_TEST_DE
GROUP BY EmailAddress
ORDER BY EmailAddress
) AS x
ORDER BY
EmailAddress;
Returns
+--------------------+-----------------------------------------+
| EmailAddress | Subscriptions |
+--------------------+-----------------------------------------+
| mike@mikemarks.com | MarketingEmail,Newsletter,ThoughtLeader |
+--------------------+-----------------------------------------+
如果您有 Sql Server 2017 或更高版本,您可以使用 String_agg()
来简化此操作:
SELECT
EmailAddress,
STRING_AGG(CASE
WHEN MarketingEmailOptIn = 'TRUE' THEN 'MarketingEmail'
WHEN ThoughtLeaderOptIn = 'TRUE' THEN 'ThoughtLeader'
WHEN NewsletterOptIn = 'TRUE' THEN 'Newsletter'
END, ', ') AS Subscriptions
FROM
UK_AGT_AgentForms_TEST_DE
GROUP BY
EmailAddress
如果仍然看到重复项,可以在嵌套查询中使用条件聚合先将其汇总:
SELECT
EmailAddress,
CASE WHEN MarketingEmailOptIn > 0 THEN 'MarketingEmail,' ELSE '' END
+ CASE WHEN ThoughtLeaderOptIn > 0 THEN 'ThoughtLeader,' ELSE '' END
+ CASE WHEN NewsletterOptIn = > 0 THEN 'Newsletter' ELSE '' END
AS Subscriptions
FROM (
SELECT EmailAddress
, SUM(CASE WHEN MarketingEmailOptIn = 'TRUE' THEN 1 ELSE 0 END) MarketingEmailOptIn
, SUM(CASE WHEN ThoughtLeaderOptIn = 'TRUE' THEN 1 ELSE 0 END) ThoughtLeaderOptIn
, SUM(CASE WHEN NewsletterOptIn = 'TRUE' THEN 1 ELSE 0 END) NewsletterOptIn
FROM UK_AGT_AgentForms_TEST_DE
GROUP BY EmailAddress
) T
呸。我不得不玩这个。也许不是完美的解决方案,但我认为我能够实现您正在尝试的目标。但是它不使用 stuff 函数。它只是连接每个字符串,然后删除最后一个逗号。
SELECT EmailAddress, CASE WHEN LEN(Subscriptions) > 0 THEN LEFT(Subscriptions, LEN(Subscriptions) - 1) ELSE '' END AS Subscriptions
FROM (
SELECT EmailAddress, CONCAT(
CASE WHEN SUM(CASE WHEN MarketingEmailOptIn = 'TRUE' THEN 1 ELSE 0 END) > 0 THEN 'MarketingEmail, ' ELSE '' END,
CASE WHEN SUM(CASE WHEN NewsletterOptIn = 'TRUE' THEN 1 ELSE 0 END) > 0 THEN 'Newsletter, ' ELSE '' END,
CASE WHEN SUM(CASE WHEN ThoughtLeaderOptIn = 'TRUE' THEN 1 ELSE 0 END) > 0 THEN 'ThoughLeader, ' ELSE '' END
) AS Subscriptions
FROM UK_AGT_AgentForms_TEST_DE
GROUP BY EmailAddress
) AS a
我有一个 table 看起来像这样:
EmailAddress: nvarchar(255)
MarketingEmailOptIn: nvarchar(50)
NewsletterOptIn: nvarchar(50)
ThoughtLeaderOptIn: nvarchar(50)
下面显示的我的 SQL 语句采用上面的数据并使用逗号作为分隔符连接“订阅类型”:
SELECT
EmailAddress,
STUFF((SELECT ',' +
CASE
WHEN B.MarketingEmailOptIn = 'TRUE' THEN 'MarketingEmail'
WHEN B.ThoughtLeaderOptIn = 'TRUE' THEN 'ThoughtLeader'
WHEN B.NewsletterOptIn = 'TRUE' THEN 'Newsletter'
END
FROM UK_AGT_AgentForms_TEST_DE B
WHERE ISNULL(B.EmailAddress, '') = ISNULL(A.EmailAddress, '')
FOR XML PATH('')), 1, 2, '') AS Subscriptions
FROM
UK_AGT_AgentForms_TEST_DE A
GROUP BY
EmailAddress
运行 这个 SQL 产生以下输出:
但是请注意 MarketingEmail
被列出了两次,因为来源 table 也列出了两次(第一行和第二行)。我需要忽略检测到的任何重复项,这样我得到的 table 看起来像:
我对 STUFF
关键字很陌生。我只是有点迷失了如何在 运行 时间检测重复项 - 任何建议都将不胜感激。谢谢
尝试这样的事情:
DECLARE @Data table (
EmailAddress nvarchar(255),
MarketingEmailOptIn nvarchar(50),
NewsletterOptIn nvarchar(50),
ThoughtLeaderOptIn nvarchar(50)
);
INSERT INTO @Data VALUES
( 'mike@mikemarks.com', 'TRUE', NULL, NULL ),
( 'mike@mikemarks.com', 'TRUE', 'TRUE', NULL ),
( 'mike@mikemarks.com', 'TRUE', NULL, 'TRUE' );
SELECT
EmailAddress
, STUFF ( ( CASE WHEN EOptIn = 'TRUE' THEN ',MarketingEmail' ELSE '' END
+ CASE WHEN NOptIn = 'TRUE' THEN ',Newsletter' ELSE '' END
+ CASE WHEN TOptIn = 'TRUE' THEN ',ThoughtLeader' ELSE '' END
), 1, 1, '' ) AS Subscriptions
FROM (
SELECT TOP 100 PERCENT
EmailAddress
, MAX ( MarketingEmailOptIn ) AS EOptIn
, MAX ( NewsletterOptIn ) AS NOptIn
, MAX ( ThoughtLeaderOptIn ) AS TOptIn
FROM @Data A --UK_AGT_AgentForms_TEST_DE
GROUP BY EmailAddress
ORDER BY EmailAddress
) AS x
ORDER BY
EmailAddress;
Returns
+--------------------+-----------------------------------------+
| EmailAddress | Subscriptions |
+--------------------+-----------------------------------------+
| mike@mikemarks.com | MarketingEmail,Newsletter,ThoughtLeader |
+--------------------+-----------------------------------------+
如果您有 Sql Server 2017 或更高版本,您可以使用 String_agg()
来简化此操作:
SELECT
EmailAddress,
STRING_AGG(CASE
WHEN MarketingEmailOptIn = 'TRUE' THEN 'MarketingEmail'
WHEN ThoughtLeaderOptIn = 'TRUE' THEN 'ThoughtLeader'
WHEN NewsletterOptIn = 'TRUE' THEN 'Newsletter'
END, ', ') AS Subscriptions
FROM
UK_AGT_AgentForms_TEST_DE
GROUP BY
EmailAddress
如果仍然看到重复项,可以在嵌套查询中使用条件聚合先将其汇总:
SELECT
EmailAddress,
CASE WHEN MarketingEmailOptIn > 0 THEN 'MarketingEmail,' ELSE '' END
+ CASE WHEN ThoughtLeaderOptIn > 0 THEN 'ThoughtLeader,' ELSE '' END
+ CASE WHEN NewsletterOptIn = > 0 THEN 'Newsletter' ELSE '' END
AS Subscriptions
FROM (
SELECT EmailAddress
, SUM(CASE WHEN MarketingEmailOptIn = 'TRUE' THEN 1 ELSE 0 END) MarketingEmailOptIn
, SUM(CASE WHEN ThoughtLeaderOptIn = 'TRUE' THEN 1 ELSE 0 END) ThoughtLeaderOptIn
, SUM(CASE WHEN NewsletterOptIn = 'TRUE' THEN 1 ELSE 0 END) NewsletterOptIn
FROM UK_AGT_AgentForms_TEST_DE
GROUP BY EmailAddress
) T
呸。我不得不玩这个。也许不是完美的解决方案,但我认为我能够实现您正在尝试的目标。但是它不使用 stuff 函数。它只是连接每个字符串,然后删除最后一个逗号。
SELECT EmailAddress, CASE WHEN LEN(Subscriptions) > 0 THEN LEFT(Subscriptions, LEN(Subscriptions) - 1) ELSE '' END AS Subscriptions
FROM (
SELECT EmailAddress, CONCAT(
CASE WHEN SUM(CASE WHEN MarketingEmailOptIn = 'TRUE' THEN 1 ELSE 0 END) > 0 THEN 'MarketingEmail, ' ELSE '' END,
CASE WHEN SUM(CASE WHEN NewsletterOptIn = 'TRUE' THEN 1 ELSE 0 END) > 0 THEN 'Newsletter, ' ELSE '' END,
CASE WHEN SUM(CASE WHEN ThoughtLeaderOptIn = 'TRUE' THEN 1 ELSE 0 END) > 0 THEN 'ThoughLeader, ' ELSE '' END
) AS Subscriptions
FROM UK_AGT_AgentForms_TEST_DE
GROUP BY EmailAddress
) AS a