SQL 服务器比较两行并合并第二行的单元格(如果第一行为空)
SQL Server compare two rows and combine cells from the second if first is null
我已经在论坛上搜索过了,但是要么不能正确提问,要么看不懂答案,需要有人带我一步步解决这个问题。
问题是:
我的数据库Users中有一个table。根据电子邮件比较,有一些重复项。现在,根据注册日期,其中一些具有更高的优先级(我们会忽略注册日期较早的那些),但一些优先级较低的记录会填写更多信息(例如性别、地址、phone 等等)。
我想得到的流程是:
-> 根据电子邮件查找重复项
-> 优先考虑最新注册日期的行
-> 如果此行中的单元格为空,则用优先级较低的行中的数据填充它
p.s。
问题也是,最多可能有三个重复的帐户使用相同的电子邮件。
我无法理解这个..
What I have
what I want
CREATE TABLE [dbo].[Person](
[userID] [nvarchar] PRIMARY KEY,
[email] [nvarchar] (50),
[priority] [nvarchar](2),
[FirstName] [nvarchar](50),
[LastName] [nvarchar](50)
)
GO
INSERT INTO Person VALUES (1,'a@a.com','1','','');
INSERT INTO Person VALUES (2,'a@a.com','2','Dennis','Li');
INSERT INTO Person VALUES (3,'b@b.com','1','Brent','Li');
INSERT INTO Person VALUES (4,'c@c.com','1','','');
INSERT INTO Person VALUES (5,'c@c.com','2','','Raji');
INSERT INTO Person VALUES (6,'c@c.com','3','Ben','Raji');
GO
使用下面的脚本,我们将注册表保留为最新的 regDate
,并使用同一电子邮件的先前注册表填充 NULL 值。
但是,如果您有三个或更多用户使用相同的电子邮件,则较旧的行将被忽略,我们只是将最近的行与第二个最近的行合并:
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
VALUES ('a@a.com', 'Andrew', null, null, null, null, null, null, '2018-03-09 00:00:00');
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
VALUES ('a@a.com', 'ANDREEW', 'Lopez', null, 'Santos', null, null, null, '2018-03-08 00:00:00');
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
VALUES ('b@b.com', 'Bob', 'Wilk', null, null, null, null, null, '2018-03-10 00:00:00');
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
VALUES ('b@b.com', 'Robert', null, 'Sandiego Street', 'Santos', null, null, '456 123 789', '2018-03-05 00:00:00');
SELECT * FROM Users;
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
SELECT
u.[email],
ISNULL(u.firstName,old.firstName),
ISNULL(u.lastName,old.lastName),
ISNULL(u.street,old.street),
ISNULL(u.city,old.city),
ISNULL(u.code,old.code),
ISNULL(u.country,old.country),
ISNULL(u.phone,old.phone),
u.regDate
FROM Users u
INNER JOIN Users old ON old.Id = (SELECT TOP 1 Id FROM Users oldMax WHERE oldMax.email = u.email AND oldMax.Id <> u.Id ORDER BY oldMax.regDate DESC)
WHERE u.Id = (SELECT TOP 1 new.Id From Users new WHERE new.email = u.email ORDER BY new.regDate DESC);
DELETE FROM Users WHERE Id NOT IN (SELECT MAX(Id) FROM Users GROUP BY email);
SELECT * FROM Users;
Here你在工作fiddle.
应该这样做
declare @T TABLE (
[userID] int PRIMARY KEY,
[email] [nvarchar] (50),
[priority] tinyint,
[FirstName] [nvarchar](50),
[LastName] [nvarchar](50)
);
INSERT INTO @T VALUES
(1,'a@a.com', 1, null, null)
, (2,'a@a.com', 2, 'Dennis','Li')
, (3,'b@b.com', 1, 'Brent','Li')
, (4,'c@c.com', 1, null,null)
, (5,'c@c.com', 2, null,'Raji')
, (6,'c@c.com', 3, 'Ben','Raji');
select t1.email
, (select top 1 tt.FirstName from @T tt where tt.FirstName is not null and tt.email = t1.email order by tt.priority asc) as FN
, (select top 1 tt.LastName from @T tt where tt.LastName is not null and tt.email = t1.email order by tt.priority asc) as LN
from @T t1
group by t1.email
order by t1.email;
下一个CTE只是为了显示重复邮件的数据。如果您需要一个适用于重复电子邮件和 non-duplicated 电子邮件的查询,您应该删除第一个 CTE 并完成!
;WITH DuplicatedEmails AS
(
SELECT
P.Email
FROM
Person AS p
GROUP BY
P.Email
HAVING
COUNT(1) > 1
),
DuplicatedEmailUserData AS
(
SELECT
P.*,
EmailRanking = ROW_NUMBER() OVER (PARTITION BY Email ORDER BY Priority DESC) -- Assuming a higher priority comes first
FROM
Persons AS P
INNER JOIN DuplicatedEmails AS E ON P.Email = E.Email
)
SELECT
D1.UserID,
D1.Email,
D1.Priority,
FirstName = COALESCE(D1.FirstName, D2.FirstName, D3.Firstname), -- Use COALESCE for the columns that might be NULL on 1st record
LastName = COALESCE(D1.LastName, D2.LastName, D3.Lastname)
FROM
DuplicatedEmailUserData AS D1
LEFT JOIN DuplicatedEmailUserData AS D2 ON
D1.Email = D2.Email AND
D1.EmailRanking + 1 = D2.EmailRanking
LEFT JOIN DuplicatedEmailUserData AS D3 ON
D1.Email = D3.Email AND
D2.EmailRanking + 1 = D3.EmailRanking
WHERE
D1.EmailRanking = 1
使用这种方法,您可能需要 LEFT JOIN
重复电子邮件的次数。
我已经在论坛上搜索过了,但是要么不能正确提问,要么看不懂答案,需要有人带我一步步解决这个问题。
问题是: 我的数据库Users中有一个table。根据电子邮件比较,有一些重复项。现在,根据注册日期,其中一些具有更高的优先级(我们会忽略注册日期较早的那些),但一些优先级较低的记录会填写更多信息(例如性别、地址、phone 等等)。
我想得到的流程是: -> 根据电子邮件查找重复项 -> 优先考虑最新注册日期的行 -> 如果此行中的单元格为空,则用优先级较低的行中的数据填充它
p.s。 问题也是,最多可能有三个重复的帐户使用相同的电子邮件。
我无法理解这个.. What I have what I want
CREATE TABLE [dbo].[Person](
[userID] [nvarchar] PRIMARY KEY,
[email] [nvarchar] (50),
[priority] [nvarchar](2),
[FirstName] [nvarchar](50),
[LastName] [nvarchar](50)
)
GO
INSERT INTO Person VALUES (1,'a@a.com','1','','');
INSERT INTO Person VALUES (2,'a@a.com','2','Dennis','Li');
INSERT INTO Person VALUES (3,'b@b.com','1','Brent','Li');
INSERT INTO Person VALUES (4,'c@c.com','1','','');
INSERT INTO Person VALUES (5,'c@c.com','2','','Raji');
INSERT INTO Person VALUES (6,'c@c.com','3','Ben','Raji');
GO
使用下面的脚本,我们将注册表保留为最新的 regDate
,并使用同一电子邮件的先前注册表填充 NULL 值。
但是,如果您有三个或更多用户使用相同的电子邮件,则较旧的行将被忽略,我们只是将最近的行与第二个最近的行合并:
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
VALUES ('a@a.com', 'Andrew', null, null, null, null, null, null, '2018-03-09 00:00:00');
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
VALUES ('a@a.com', 'ANDREEW', 'Lopez', null, 'Santos', null, null, null, '2018-03-08 00:00:00');
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
VALUES ('b@b.com', 'Bob', 'Wilk', null, null, null, null, null, '2018-03-10 00:00:00');
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
VALUES ('b@b.com', 'Robert', null, 'Sandiego Street', 'Santos', null, null, '456 123 789', '2018-03-05 00:00:00');
SELECT * FROM Users;
INSERT INTO Users ([email],[firstName],[lastName],[street],[city],[code],[country],[phone],[regDate])
SELECT
u.[email],
ISNULL(u.firstName,old.firstName),
ISNULL(u.lastName,old.lastName),
ISNULL(u.street,old.street),
ISNULL(u.city,old.city),
ISNULL(u.code,old.code),
ISNULL(u.country,old.country),
ISNULL(u.phone,old.phone),
u.regDate
FROM Users u
INNER JOIN Users old ON old.Id = (SELECT TOP 1 Id FROM Users oldMax WHERE oldMax.email = u.email AND oldMax.Id <> u.Id ORDER BY oldMax.regDate DESC)
WHERE u.Id = (SELECT TOP 1 new.Id From Users new WHERE new.email = u.email ORDER BY new.regDate DESC);
DELETE FROM Users WHERE Id NOT IN (SELECT MAX(Id) FROM Users GROUP BY email);
SELECT * FROM Users;
Here你在工作fiddle.
应该这样做
declare @T TABLE (
[userID] int PRIMARY KEY,
[email] [nvarchar] (50),
[priority] tinyint,
[FirstName] [nvarchar](50),
[LastName] [nvarchar](50)
);
INSERT INTO @T VALUES
(1,'a@a.com', 1, null, null)
, (2,'a@a.com', 2, 'Dennis','Li')
, (3,'b@b.com', 1, 'Brent','Li')
, (4,'c@c.com', 1, null,null)
, (5,'c@c.com', 2, null,'Raji')
, (6,'c@c.com', 3, 'Ben','Raji');
select t1.email
, (select top 1 tt.FirstName from @T tt where tt.FirstName is not null and tt.email = t1.email order by tt.priority asc) as FN
, (select top 1 tt.LastName from @T tt where tt.LastName is not null and tt.email = t1.email order by tt.priority asc) as LN
from @T t1
group by t1.email
order by t1.email;
下一个CTE只是为了显示重复邮件的数据。如果您需要一个适用于重复电子邮件和 non-duplicated 电子邮件的查询,您应该删除第一个 CTE 并完成!
;WITH DuplicatedEmails AS
(
SELECT
P.Email
FROM
Person AS p
GROUP BY
P.Email
HAVING
COUNT(1) > 1
),
DuplicatedEmailUserData AS
(
SELECT
P.*,
EmailRanking = ROW_NUMBER() OVER (PARTITION BY Email ORDER BY Priority DESC) -- Assuming a higher priority comes first
FROM
Persons AS P
INNER JOIN DuplicatedEmails AS E ON P.Email = E.Email
)
SELECT
D1.UserID,
D1.Email,
D1.Priority,
FirstName = COALESCE(D1.FirstName, D2.FirstName, D3.Firstname), -- Use COALESCE for the columns that might be NULL on 1st record
LastName = COALESCE(D1.LastName, D2.LastName, D3.Lastname)
FROM
DuplicatedEmailUserData AS D1
LEFT JOIN DuplicatedEmailUserData AS D2 ON
D1.Email = D2.Email AND
D1.EmailRanking + 1 = D2.EmailRanking
LEFT JOIN DuplicatedEmailUserData AS D3 ON
D1.Email = D3.Email AND
D2.EmailRanking + 1 = D3.EmailRanking
WHERE
D1.EmailRanking = 1
使用这种方法,您可能需要 LEFT JOIN
重复电子邮件的次数。