将单列(和多行)中的字符串拆分为 SQL 服务器中的多列(存在缺失数据)
Split string in single column (& multiple rows) into multiple columns in SQL Server (missing data exists)
以下是我的数据:
一栏多行:
[column]
variable1:A,variable2:B,variable3:C,variable4:D,...,variable9:I
variable1:A,variable2:B,variable4:D,.....variable7:G
variable8:H,variable9:I
variable9:I
.
.
.
.
我正在尝试这样拆分:
variable1 | variabel2 | variable3| variable4| ...variable7| variable8|varaible9
A B C D G H I
A B D
H I
I
我正在处理这段代码:
--Approach 1. cross apply
select Finaldata.*
from [dbo].[table_name]
cross apply ( select str = [dbo].[table_name].attributes + ',,' ) f1
cross apply ( select p1 = charindex( ',', str ) ) ap1
cross apply ( select p2 = charindex( ',', str, p1 + 1 ) ) ap2
cross apply ( select p3 = charindex( ',', str, p2 + 1 ) ) ap3
cross apply ( select p4 = charindex( ',', str, p3 + 1 ) ) ap4
cross apply ( select p5 = charindex( ',', str, p4 + 1 ) ) ap5
cross apply ( select p6 = charindex( ',', str, p5 + 1 ) ) ap6
cross apply ( select p7 = charindex( ',', str, p6 + 1 ) ) ap7
cross apply ( select p8 = charindex( ',', str, p7 + 1 ) ) ap8
cross apply ( select p9 = charindex( ',', str, p8 + 1 ) ) ap9
cross apply ( select variable1 = substring( str, 1, p1-1 )
,variable2 = substring( str, p1+1, p2-p1-1 )
,variable3 = substring( str, p2+1, p3-p2-1 )
,variable4 = substring( str, p3+1, p4-p3-1 )
,variable5 = substring( str, p4+1, p5-p4-1 )
,variable6 = substring( str, p5+1, p6-p5-1 )
,variable7 = substring( str, p6+1, p7-p6-1 )
,variable8 = substring( str, p7+1, p8-p7-1 )
,variable9 = substring( str, p8+1, p9-p8-1 )
)Finaldata
--Approach 2. substring & charindex
--strings are in column 'attributes'
substring(attributes, charindex(attributes, 'variable1'),charindex(attributes, ',')-1) variable1
substring(attributes, charindex(attributes, 'variable2'),charindex(attributes, ',')-1) variable2
substring(attributes, charindex(attributes, 'variable3'),charindex(attributes, ',')-1) variable3
substring(attributes, charindex(attributes, 'variable4'),charindex(attributes, ',')-1) variable4
substring(attributes, charindex(attributes, 'variable5'), charindex(attributes, ',')-1) variable5
我收到相同的错误消息:
Invalid length parameter passed to the LEFT or SUBSTRING function.
我尝试使用互联网上已有的语法子字符串和字符索引、解析名称等其他代码,但它们不起作用。我认为这是因为我试图将字符串拆分成许多列并且缺少要考虑的数据。
请帮帮我!!!!
谢谢!!
此答案使用 SplitString
函数来拆分输入字符串。此函数取自 this answer on the question Efficient query to split a delimited column into a separate table。针对不同的分隔符稍作修改 (.
→ ,
).
CREATE TABLE #tt(id INT NOT NULL IDENTITY(1,1) PRIMARY KEY,col VARCHAR(4000));
INSERT INTO #tt(col)VALUES
('variable1:A,variable2:B,variable3:C,variable4:D,variable9:I'),
('variable1:A,variable2:B,variable4:D,variable7:G'),
('variable8:H,variable9:I'),
('variable9:I');
SELECT
variable1=ISNULL(variable1,''),
variable2=ISNULL(variable2,''),
variable3=ISNULL(variable3,''),
variable4=ISNULL(variable4,''),
variable5=ISNULL(variable5,''),
variable6=ISNULL(variable6,''),
variable7=ISNULL(variable7,''),
variable8=ISNULL(variable8,''),
variable9=ISNULL(variable9,'')
FROM
(
SELECT
id,
v_col=LEFT(item,CHARINDEX(':',item)-1),
v_val=SUBSTRING(item,CHARINDEX(':',item)+1,LEN(item))
FROM
#tt
CROSS APPLY dbo.SplitStrings(col)
) AS s
PIVOT (
MAX(v_val) FOR
v_col IN (variable1,variable2,variable3,variable4,variable5,variable6,variable7,variable8,variable9)
) AS p
DROP TABLE #tt;
结果:
+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| variable1 | variable2 | variable3 | variable4 | variable5 | variable6 | variable7 | variable8 | variable9 |
+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| A | B | C | D | | | | | I |
| A | B | | D | | | G | | |
| | | | | | | | H | I |
| | | | | | | | | I |
+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
以下是我的数据:
一栏多行:
[column]
variable1:A,variable2:B,variable3:C,variable4:D,...,variable9:I
variable1:A,variable2:B,variable4:D,.....variable7:G
variable8:H,variable9:I
variable9:I
.
.
.
.
我正在尝试这样拆分:
variable1 | variabel2 | variable3| variable4| ...variable7| variable8|varaible9
A B C D G H I
A B D
H I
I
我正在处理这段代码:
--Approach 1. cross apply
select Finaldata.*
from [dbo].[table_name]
cross apply ( select str = [dbo].[table_name].attributes + ',,' ) f1
cross apply ( select p1 = charindex( ',', str ) ) ap1
cross apply ( select p2 = charindex( ',', str, p1 + 1 ) ) ap2
cross apply ( select p3 = charindex( ',', str, p2 + 1 ) ) ap3
cross apply ( select p4 = charindex( ',', str, p3 + 1 ) ) ap4
cross apply ( select p5 = charindex( ',', str, p4 + 1 ) ) ap5
cross apply ( select p6 = charindex( ',', str, p5 + 1 ) ) ap6
cross apply ( select p7 = charindex( ',', str, p6 + 1 ) ) ap7
cross apply ( select p8 = charindex( ',', str, p7 + 1 ) ) ap8
cross apply ( select p9 = charindex( ',', str, p8 + 1 ) ) ap9
cross apply ( select variable1 = substring( str, 1, p1-1 )
,variable2 = substring( str, p1+1, p2-p1-1 )
,variable3 = substring( str, p2+1, p3-p2-1 )
,variable4 = substring( str, p3+1, p4-p3-1 )
,variable5 = substring( str, p4+1, p5-p4-1 )
,variable6 = substring( str, p5+1, p6-p5-1 )
,variable7 = substring( str, p6+1, p7-p6-1 )
,variable8 = substring( str, p7+1, p8-p7-1 )
,variable9 = substring( str, p8+1, p9-p8-1 )
)Finaldata
--Approach 2. substring & charindex
--strings are in column 'attributes'
substring(attributes, charindex(attributes, 'variable1'),charindex(attributes, ',')-1) variable1
substring(attributes, charindex(attributes, 'variable2'),charindex(attributes, ',')-1) variable2
substring(attributes, charindex(attributes, 'variable3'),charindex(attributes, ',')-1) variable3
substring(attributes, charindex(attributes, 'variable4'),charindex(attributes, ',')-1) variable4
substring(attributes, charindex(attributes, 'variable5'), charindex(attributes, ',')-1) variable5
我收到相同的错误消息:
Invalid length parameter passed to the LEFT or SUBSTRING function.
我尝试使用互联网上已有的语法子字符串和字符索引、解析名称等其他代码,但它们不起作用。我认为这是因为我试图将字符串拆分成许多列并且缺少要考虑的数据。
请帮帮我!!!!
谢谢!!
此答案使用 SplitString
函数来拆分输入字符串。此函数取自 this answer on the question Efficient query to split a delimited column into a separate table。针对不同的分隔符稍作修改 (.
→ ,
).
CREATE TABLE #tt(id INT NOT NULL IDENTITY(1,1) PRIMARY KEY,col VARCHAR(4000));
INSERT INTO #tt(col)VALUES
('variable1:A,variable2:B,variable3:C,variable4:D,variable9:I'),
('variable1:A,variable2:B,variable4:D,variable7:G'),
('variable8:H,variable9:I'),
('variable9:I');
SELECT
variable1=ISNULL(variable1,''),
variable2=ISNULL(variable2,''),
variable3=ISNULL(variable3,''),
variable4=ISNULL(variable4,''),
variable5=ISNULL(variable5,''),
variable6=ISNULL(variable6,''),
variable7=ISNULL(variable7,''),
variable8=ISNULL(variable8,''),
variable9=ISNULL(variable9,'')
FROM
(
SELECT
id,
v_col=LEFT(item,CHARINDEX(':',item)-1),
v_val=SUBSTRING(item,CHARINDEX(':',item)+1,LEN(item))
FROM
#tt
CROSS APPLY dbo.SplitStrings(col)
) AS s
PIVOT (
MAX(v_val) FOR
v_col IN (variable1,variable2,variable3,variable4,variable5,variable6,variable7,variable8,variable9)
) AS p
DROP TABLE #tt;
结果:
+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| variable1 | variable2 | variable3 | variable4 | variable5 | variable6 | variable7 | variable8 | variable9 |
+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| A | B | C | D | | | | | I |
| A | B | | D | | | G | | |
| | | | | | | | H | I |
| | | | | | | | | I |
+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+