将包含不同数量值的列拆分为多个列,SQL 服务器
Splitting a column containing varying number of values into multiple columns, SQL Server
我有以下 table 名为 Company,它有很多列,但这是有问题的列:
Team
Frodo B (manager), Gandalf G (director), Batman C (cleaner)
John Doe (secretary), Mark Jacobs(manager), Lilly Hopes(director), Rihanna Williams (cleaner), Maddy James (supervisor), Merry Poppins (HR)
Rick Ross (cleaner)
Orlando Bloom (manager), Keira Knightly (secretary)
每一行都有不同的人数,后面是括号中他们在公司中的职位。我们知道有7种可能的职位:cleaner, director, manager, supervisor, secretary, HR, owner
要求的结果是将此列替换为多列 - 每种类型的职位和每一行,如果有人担任此职位,his/her 姓名将出现在相应的列中。一旦名称落在右列中,就不再需要括号中的位置,我只是将其留在结果 table 中以更好地说明结果。其余单元格可以为空白 N/A 或其他内容。
cleaner
director
manager
supervisor
secretary
HR
owner
Batman C
Gandalf G
Frodo B
Rihanna Williams (cleaner)
Lilly Hopes(director)
Mark Jacobs(manager)
Maddy James (supervisor)
John Doe (secretary)
Merry Poppins (HR)
Rick Ross (cleaner)
Orlando Bloom (manager)
Keira Knightly (secretary)
我知道如何在 python 中执行此操作,但我需要在 SQL 服务器中实现它,不幸的是,这不是我的强项之一。我查找了类似的问题,但无法解决。请帮忙。
PIVOT
关系运算符是一个选项:
示例数据:
SELECT *
INTO Company
FROM (VALUES
(1, 'Frodo B (manager), Gandalf G (director), Batman C (cleaner)'),
(2, 'John Doe (secretary), Mark Jacobs (manager), Lilly Hopes (director), Rihanna Williams (cleaner), Maddy James (supervisor), Merry Poppins (HR)'),
(3, 'Rick Ross (cleaner)'),
(4, 'Orlando Bloom (manager), Keira Knightly (secretary)')
) d (Id, Team)
声明:
SELECT *
FROM (
SELECT c.Id, LTRIM(s.[value]) AS [Name], p.Position
FROM Company c
CROSS APPLY STRING_SPLIT(c.Team, ',') s
JOIN (VALUES
('cleaner'), ('director'), ('manager'), ('supervisor'), ('secretary'), ('HR'), ('owner')
) p (Position) ON CHARINDEX(p.Position, s.[value]) > 0
) t
PIVOT (
MAX(Name)
FOR Position IN ([cleaner], [director], [manager], [supervisor], [secretary], [HR], [owner])
) p
结果:
Id
cleaner
director
manager
supervisor
secretary
HR
owner
1
Batman C (cleaner)
Gandalf G (director)
Frodo B (manager)
2
Rihanna Williams (cleaner)
Lilly Hopes (director)
Mark Jacobs (manager)
Maddy James (supervisor)
John Doe (secretary)
Merry Poppins (HR)
3
Rick Ross (cleaner)
4
Orlando Bloom (manager)
Keira Knightly (secretary)
对于 SQL 2016 之前的服务器版本(但有 XML 支持),您可以使用 well-known XML 技术拆分存储的文本:
SELECT *
FROM (
SELECT c.Id, LTRIM(s.[value].value('.', 'varchar(1000)')) AS [Name], p.Position
FROM (
SELECT *, CAST('<x>' + REPLACE(Team, ',', '</x><x>') + '</x>' AS XML) AS XmlTeam
FROM Company
) c
CROSS APPLY c.XmlTeam.nodes('./x') s ([value])
JOIN (VALUES
('cleaner'), ('director'), ('manager'), ('supervisor'), ('secretary'), ('HR'), ('owner')
) p (Position) ON CHARINDEX(p.Position, s.[value].value('.', 'varchar(1000)')) > 0
) t
PIVOT (
MAX(Name)
FOR Position IN ([cleaner], [director], [manager], [supervisor], [secretary], [HR], [owner])
) p
递归是另一种(知道它被限制为7次迭代):
with MyTable as (
select *
from (values
(1, 'Frodo B (manager), Gandalf G (director), Batman C (cleaner)')
,(2, 'John Doe (secretary), Mark Jacobs(manager), Lilly Hopes(director), Rihanna Williams (cleaner), Maddy James (supervisor), Merry Poppins (HR)')
,(3, 'Rick Ross (cleaner)')
,(4,'Orlando Bloom (manager), Keira Knightly (secretary)')
) T (Id, Team)
),
RCTE as ( -- Find closing bracket positions
select Id, FromPos=1 , BctPos=charindex(')',Team), Team from MyTable
union all
select R.Id, FromPos=R.BctPos+2, BctPos=charindex(')', M.Team, R.BctPos+1), M.Team
from MyTable M
inner join
RCTE R
on R.Id=M.Id
where charindex(')',M.Team,R.BctPos+1)>0
),
Parsed as
( select Id, Team, Member=SubString(Team,FromPos,BctPos-FromPos+1) from RCTE)
select
Id
, director=min(case when Member like '%(director)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, cleaner=min(case when Member like '%(cleaner)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, manager=min(case when Member like '%(manager)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, supervisor=min(case when Member like '%(supervisor)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, secretary=min(case when Member like '%(secretary)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, owner=min(case when Member like '%(owner)' then left(Member,len(Member)- charindex('(',Member)-1) end)
from Parsed
group by id
order by id
我假设它们都格式正确(括号正确打开关闭;空格使用一致,等等
我有以下 table 名为 Company,它有很多列,但这是有问题的列:
Team |
---|
Frodo B (manager), Gandalf G (director), Batman C (cleaner) |
John Doe (secretary), Mark Jacobs(manager), Lilly Hopes(director), Rihanna Williams (cleaner), Maddy James (supervisor), Merry Poppins (HR) |
Rick Ross (cleaner) |
Orlando Bloom (manager), Keira Knightly (secretary) |
每一行都有不同的人数,后面是括号中他们在公司中的职位。我们知道有7种可能的职位:cleaner, director, manager, supervisor, secretary, HR, owner
要求的结果是将此列替换为多列 - 每种类型的职位和每一行,如果有人担任此职位,his/her 姓名将出现在相应的列中。一旦名称落在右列中,就不再需要括号中的位置,我只是将其留在结果 table 中以更好地说明结果。其余单元格可以为空白 N/A 或其他内容。
cleaner | director | manager | supervisor | secretary | HR | owner |
---|---|---|---|---|---|---|
Batman C | Gandalf G | Frodo B | ||||
Rihanna Williams (cleaner) | Lilly Hopes(director) | Mark Jacobs(manager) | Maddy James (supervisor) | John Doe (secretary) | Merry Poppins (HR) | |
Rick Ross (cleaner) | ||||||
Orlando Bloom (manager) | Keira Knightly (secretary) |
我知道如何在 python 中执行此操作,但我需要在 SQL 服务器中实现它,不幸的是,这不是我的强项之一。我查找了类似的问题,但无法解决。请帮忙。
PIVOT
关系运算符是一个选项:
示例数据:
SELECT *
INTO Company
FROM (VALUES
(1, 'Frodo B (manager), Gandalf G (director), Batman C (cleaner)'),
(2, 'John Doe (secretary), Mark Jacobs (manager), Lilly Hopes (director), Rihanna Williams (cleaner), Maddy James (supervisor), Merry Poppins (HR)'),
(3, 'Rick Ross (cleaner)'),
(4, 'Orlando Bloom (manager), Keira Knightly (secretary)')
) d (Id, Team)
声明:
SELECT *
FROM (
SELECT c.Id, LTRIM(s.[value]) AS [Name], p.Position
FROM Company c
CROSS APPLY STRING_SPLIT(c.Team, ',') s
JOIN (VALUES
('cleaner'), ('director'), ('manager'), ('supervisor'), ('secretary'), ('HR'), ('owner')
) p (Position) ON CHARINDEX(p.Position, s.[value]) > 0
) t
PIVOT (
MAX(Name)
FOR Position IN ([cleaner], [director], [manager], [supervisor], [secretary], [HR], [owner])
) p
结果:
Id | cleaner | director | manager | supervisor | secretary | HR | owner |
---|---|---|---|---|---|---|---|
1 | Batman C (cleaner) | Gandalf G (director) | Frodo B (manager) | ||||
2 | Rihanna Williams (cleaner) | Lilly Hopes (director) | Mark Jacobs (manager) | Maddy James (supervisor) | John Doe (secretary) | Merry Poppins (HR) | |
3 | Rick Ross (cleaner) | ||||||
4 | Orlando Bloom (manager) | Keira Knightly (secretary) |
对于 SQL 2016 之前的服务器版本(但有 XML 支持),您可以使用 well-known XML 技术拆分存储的文本:
SELECT *
FROM (
SELECT c.Id, LTRIM(s.[value].value('.', 'varchar(1000)')) AS [Name], p.Position
FROM (
SELECT *, CAST('<x>' + REPLACE(Team, ',', '</x><x>') + '</x>' AS XML) AS XmlTeam
FROM Company
) c
CROSS APPLY c.XmlTeam.nodes('./x') s ([value])
JOIN (VALUES
('cleaner'), ('director'), ('manager'), ('supervisor'), ('secretary'), ('HR'), ('owner')
) p (Position) ON CHARINDEX(p.Position, s.[value].value('.', 'varchar(1000)')) > 0
) t
PIVOT (
MAX(Name)
FOR Position IN ([cleaner], [director], [manager], [supervisor], [secretary], [HR], [owner])
) p
递归是另一种(知道它被限制为7次迭代):
with MyTable as (
select *
from (values
(1, 'Frodo B (manager), Gandalf G (director), Batman C (cleaner)')
,(2, 'John Doe (secretary), Mark Jacobs(manager), Lilly Hopes(director), Rihanna Williams (cleaner), Maddy James (supervisor), Merry Poppins (HR)')
,(3, 'Rick Ross (cleaner)')
,(4,'Orlando Bloom (manager), Keira Knightly (secretary)')
) T (Id, Team)
),
RCTE as ( -- Find closing bracket positions
select Id, FromPos=1 , BctPos=charindex(')',Team), Team from MyTable
union all
select R.Id, FromPos=R.BctPos+2, BctPos=charindex(')', M.Team, R.BctPos+1), M.Team
from MyTable M
inner join
RCTE R
on R.Id=M.Id
where charindex(')',M.Team,R.BctPos+1)>0
),
Parsed as
( select Id, Team, Member=SubString(Team,FromPos,BctPos-FromPos+1) from RCTE)
select
Id
, director=min(case when Member like '%(director)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, cleaner=min(case when Member like '%(cleaner)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, manager=min(case when Member like '%(manager)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, supervisor=min(case when Member like '%(supervisor)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, secretary=min(case when Member like '%(secretary)' then left(Member,len(Member)- charindex('(',Member)-1) end)
, owner=min(case when Member like '%(owner)' then left(Member,len(Member)- charindex('(',Member)-1) end)
from Parsed
group by id
order by id
我假设它们都格式正确(括号正确打开关闭;空格使用一致,等等