使用缺失值应用从多个多值记录构建数据集
Building data sets from multiple multivalue records using applys with missing values
我有一个 SQL Server 2012 数据库,它是从多值环境中导入的,这让我头疼得比我想象的还要多,但它就是这样,我必须使用它。
我正在尝试使用这些多值记录构建数据集,但遇到了绊脚石。这是我的场景
我有一个自定义拆分字符串 TVF,它将 "Test,String" 的字符串拆分为
Rowno | Item
------+---------
1 | Test
2 | String
我有以下数据:
客户Table
Ref | Names | Surname | DOB | IdNo
----+-----------+-----------+---------------------+------
123 |John,Sally |Smith | DOB1,DoB2 | 45,56
456 |Dave,Paul |Jones,Dann| DOB1,DOB2 | 98
789 |Mary,Moe,Al|Lee | DOB1 | NULL
我需要创建的数据集如下所示:
Ref | Names | Surname | DOB | IdNo
----+-----------+-----------+---------------------+------
123 | John | Smith | DOB1 | 45
123 | Sally | Smith | DOB2 | 56
456 | Dave | Jones | DOB1 | 98
456 | Paul | Dann | DOB2 |
789 | Mary | Lee | DOB1 |
789 | Moe | Lee | |
789 | Al | Lee | |
过去,为了解决类似的问题,我会使用这样的查询来解决这个问题:
SELECT
Ref
, SplitForenames.ITEM names
, SplitSurname.ITEM Surname
, SplitDOB.ITEM dob
, SplitNI.ITEM ID
FROM
Clients
CROSS APPLY
dbo.udf_SplitString(Names, ',') SplitForenames
OUTER APPLY
dbo.udf_SplitString(Surname, ',') SplitSurname
OUTER APPLY
dbo.udf_SplitString(DOB, ',') SplitDOB
OUTER APPLY
dbo.udf_SplitString(ID, ',') SplitNI
WHERE
SplitSurname.RowNo = SplitForenames.RowNo
AND SplitDOB.RowNo = SplitForenames.RowNo
AND SplitNI.RowNo = SplitForenames.RowNo
ORDER BY
REF;
但是,由于存在姓氏与名字之间的差异示例以及缺失的 DOB 和 ID 字段,我无法以这种方式匹配它们。
我需要在有匹配项的地方进行匹配,否则 DOB 和 ID 为空并使用姓氏的第一个实例。我只是不知道如何实现这一目标。
任何人对我如何从原始来源创建所需的数据集有任何建议。
提前致谢
我认为您可以使用子查询并在 之前 OUTER APPLY
:
进行 RowNo
比较来处理此问题
FROM Clients c CROSS APPLY
dbo.udf_SplitString(Names, ',') SplitForenames OUTER APPLY
(SELECT . . .
FROM dbo.udf_SplitString(Surname, ',') SplitSurname
WHERE SplitSurname.RowNo = SplitForenames.RowNo
) SplitSurname OUTER APPLY
(SELECT . . .
FROM dbo.udf_SplitString(DOB, ',') SplitDOB
WHERE SplitDOB.RowNo = SplitForenames.RowNo
) SplitDOB OUTER APPLY
(SELECT . . .
FROM dbo.udf_SplitString(DOB, ',') SplitNI
WHERE SplitNI.RowNo = SplitForenames.RowNo
) SplitNI
我找不到要拆分或不拆分的 DOB 列的条件。
但是:使用 Split 函数 SpliF 如下:
CREATE FUNCTION SplitF(@str AS NVARCHAR(max))
RETURNS @People TABLE
(Rowno INT,Item NVARCHAR(10))
AS
BEGIN
DECLARE @i INT, @pos INT
DECLARE @subname NVARCHAR(max)
SET @I = 0;
WHILE(LEN(@str)>0)
BEGIN
SET @pos = CHARINDEX(',',@str)
IF @pos = 0 SET @pos = LEN(@str)+1
SET @subname = SUBSTRING(@str,1,@pos-1)
SET @str = SUBSTRING(@str, @pos+1, len(@str))
SET @i = @i + 1
INSERT INTO @People VALUES (@i, @subname)
END
RETURN
END
GO
select * from SplitF('test,my,function')
Rowno Item
----------- ----------
1 test
2 my
3 function
和基础数据:
select Ref, Names, Surname, DOB, IdNo into #clients
from ( select 123 as Ref, 'John,Sally' as Names, 'Smith' as Surname,
'DOB1,DOB2' as DOB, '45,56' as IdNo
union all select 456, 'Dave,Paul','Jones,Dann','DOB1,DOB2', '98'
union all select 789, 'Mary,Moe,Al', 'Lee', 'DOB1', NULL) A
select * from #clients
Ref Names Surname DOB IdNo
----------- ----------- ---------- --------- -----
123 John,Sally Smith DOB1,DOB2 45,56
456 Dave,Paul Jones,Dann DOB1,DOB2 98
789 Mary,Moe,Al Lee DOB1 NULL
使用下面的代码你会得到这样的结果:
select
Ref,
RTrim(S_NAM.Item) as Names,
coalesce(S_SURNAM.Item,S_SURNAM_LAST.Item) AS Surname,
coalesce(split_dob.Item, '') as DOB,
coalesce(split_IdNo.Item,'') as IdNo
from
#clients MAIN
outer apply(select Rowno, Item from SplitF(MAIN.Names)) as S_NAM
outer apply(select top 1 Item from SplitF(MAIN.Surname) where Rowno = S_NAM.Rowno) as S_SURNAM
outer apply(select top 1 Item from SplitF(MAIN.Surname) order by Rowno desc) as S_SURNAM_LAST
outer apply(select top 1 Item from SplitF(MAIN.IdNo) where Rowno = S_NAM.Rowno) as split_IdNo
outer apply(select top 1 Item from SplitF(MAIN.DOB) where Rowno = S_NAM.Rowno) as split_dob
order by MAIN.Ref, S_NAM.Rowno
Ref Names Surname DOB IdNo
----------- ---------- ---------- ---------- ----------
123 John Smith DOB1 45
123 Sally Smith DOB2 56
456 Dave Jones DOB1 98
456 Paul Dann DOB2
789 Mary Lee DOB1
789 Moe Lee
789 Al Lee
我有一个 SQL Server 2012 数据库,它是从多值环境中导入的,这让我头疼得比我想象的还要多,但它就是这样,我必须使用它。
我正在尝试使用这些多值记录构建数据集,但遇到了绊脚石。这是我的场景
我有一个自定义拆分字符串 TVF,它将 "Test,String" 的字符串拆分为
Rowno | Item
------+---------
1 | Test
2 | String
我有以下数据:
客户Table
Ref | Names | Surname | DOB | IdNo
----+-----------+-----------+---------------------+------
123 |John,Sally |Smith | DOB1,DoB2 | 45,56
456 |Dave,Paul |Jones,Dann| DOB1,DOB2 | 98
789 |Mary,Moe,Al|Lee | DOB1 | NULL
我需要创建的数据集如下所示:
Ref | Names | Surname | DOB | IdNo
----+-----------+-----------+---------------------+------
123 | John | Smith | DOB1 | 45
123 | Sally | Smith | DOB2 | 56
456 | Dave | Jones | DOB1 | 98
456 | Paul | Dann | DOB2 |
789 | Mary | Lee | DOB1 |
789 | Moe | Lee | |
789 | Al | Lee | |
过去,为了解决类似的问题,我会使用这样的查询来解决这个问题:
SELECT
Ref
, SplitForenames.ITEM names
, SplitSurname.ITEM Surname
, SplitDOB.ITEM dob
, SplitNI.ITEM ID
FROM
Clients
CROSS APPLY
dbo.udf_SplitString(Names, ',') SplitForenames
OUTER APPLY
dbo.udf_SplitString(Surname, ',') SplitSurname
OUTER APPLY
dbo.udf_SplitString(DOB, ',') SplitDOB
OUTER APPLY
dbo.udf_SplitString(ID, ',') SplitNI
WHERE
SplitSurname.RowNo = SplitForenames.RowNo
AND SplitDOB.RowNo = SplitForenames.RowNo
AND SplitNI.RowNo = SplitForenames.RowNo
ORDER BY
REF;
但是,由于存在姓氏与名字之间的差异示例以及缺失的 DOB 和 ID 字段,我无法以这种方式匹配它们。
我需要在有匹配项的地方进行匹配,否则 DOB 和 ID 为空并使用姓氏的第一个实例。我只是不知道如何实现这一目标。
任何人对我如何从原始来源创建所需的数据集有任何建议。
提前致谢
我认为您可以使用子查询并在 之前 OUTER APPLY
:
RowNo
比较来处理此问题
FROM Clients c CROSS APPLY
dbo.udf_SplitString(Names, ',') SplitForenames OUTER APPLY
(SELECT . . .
FROM dbo.udf_SplitString(Surname, ',') SplitSurname
WHERE SplitSurname.RowNo = SplitForenames.RowNo
) SplitSurname OUTER APPLY
(SELECT . . .
FROM dbo.udf_SplitString(DOB, ',') SplitDOB
WHERE SplitDOB.RowNo = SplitForenames.RowNo
) SplitDOB OUTER APPLY
(SELECT . . .
FROM dbo.udf_SplitString(DOB, ',') SplitNI
WHERE SplitNI.RowNo = SplitForenames.RowNo
) SplitNI
我找不到要拆分或不拆分的 DOB 列的条件。 但是:使用 Split 函数 SpliF 如下:
CREATE FUNCTION SplitF(@str AS NVARCHAR(max))
RETURNS @People TABLE
(Rowno INT,Item NVARCHAR(10))
AS
BEGIN
DECLARE @i INT, @pos INT
DECLARE @subname NVARCHAR(max)
SET @I = 0;
WHILE(LEN(@str)>0)
BEGIN
SET @pos = CHARINDEX(',',@str)
IF @pos = 0 SET @pos = LEN(@str)+1
SET @subname = SUBSTRING(@str,1,@pos-1)
SET @str = SUBSTRING(@str, @pos+1, len(@str))
SET @i = @i + 1
INSERT INTO @People VALUES (@i, @subname)
END
RETURN
END
GO
select * from SplitF('test,my,function')
Rowno Item
----------- ----------
1 test
2 my
3 function
和基础数据:
select Ref, Names, Surname, DOB, IdNo into #clients
from ( select 123 as Ref, 'John,Sally' as Names, 'Smith' as Surname,
'DOB1,DOB2' as DOB, '45,56' as IdNo
union all select 456, 'Dave,Paul','Jones,Dann','DOB1,DOB2', '98'
union all select 789, 'Mary,Moe,Al', 'Lee', 'DOB1', NULL) A
select * from #clients
Ref Names Surname DOB IdNo
----------- ----------- ---------- --------- -----
123 John,Sally Smith DOB1,DOB2 45,56
456 Dave,Paul Jones,Dann DOB1,DOB2 98
789 Mary,Moe,Al Lee DOB1 NULL
使用下面的代码你会得到这样的结果:
select
Ref,
RTrim(S_NAM.Item) as Names,
coalesce(S_SURNAM.Item,S_SURNAM_LAST.Item) AS Surname,
coalesce(split_dob.Item, '') as DOB,
coalesce(split_IdNo.Item,'') as IdNo
from
#clients MAIN
outer apply(select Rowno, Item from SplitF(MAIN.Names)) as S_NAM
outer apply(select top 1 Item from SplitF(MAIN.Surname) where Rowno = S_NAM.Rowno) as S_SURNAM
outer apply(select top 1 Item from SplitF(MAIN.Surname) order by Rowno desc) as S_SURNAM_LAST
outer apply(select top 1 Item from SplitF(MAIN.IdNo) where Rowno = S_NAM.Rowno) as split_IdNo
outer apply(select top 1 Item from SplitF(MAIN.DOB) where Rowno = S_NAM.Rowno) as split_dob
order by MAIN.Ref, S_NAM.Rowno
Ref Names Surname DOB IdNo
----------- ---------- ---------- ---------- ----------
123 John Smith DOB1 45
123 Sally Smith DOB2 56
456 Dave Jones DOB1 98
456 Paul Dann DOB2
789 Mary Lee DOB1
789 Moe Lee
789 Al Lee