将具有多个定界符的字符串解析为列
Parsing string with multiple delimiters into columns
我想将字符串拆分成列。
我的专栏应该是:
account_id, resource_type, resource_name
我有一个 JSON 文件源,我一直试图通过 ADF 数据流对其进行解析。这对我不起作用,因此我将数据展平并将其放入 SQL 服务器(我愿意通过 ADF 或 SQL 解析值,如果有人可以告诉我如何)。请检查底部的 JSON 文件。
使用此代码查询我正在使用的数据。
CREATE TABLE test.test2
(
resource_type nvarchar(max) NULL
)
INSERT INTO test.test2 ([resource_type])
VALUES
('account_id:224526257458,resource_type:buckets,resource_name:camp-stage-artifactory'),
('account_id:535533456241,resource_type:buckets,resource_name:tni-prod-diva-backups'),
('account_id:369798452057,resource_type:buckets,resource_name:369798452057-s3-manifests'),
('account_id:460085747812,resource_type:buckets,resource_name:vessel-incident-report-nonprod-accesslogs')
我应该能够在 SQL 服务器中查询的输出应该是这样的:
account_id
resource_type
resource_name
224526257458
buckets
camp-stage-artifactory
535533456241
buckets
tni-prod-diva-backups
等等。
请帮助我并在需要时寻求澄清。提前致谢。
编辑:
来源JSON格式:
{
"start_date": "2021-12-01 00:00:00+00:00",
"end_date": "2021-12-31 23:59:59+00:00",
"resource_type": "all",
"records": [
{
"directconnect_connections": [
"account_id:227148359287,resource_type:directconnect_connections,resource_name:'dxcon-fh40evn5'",
"account_id:401311080156,resource_type:directconnect_connections,resource_name:'dxcon-ffxgf6kh'",
"account_id:401311080156,resource_type:directconnect_connections,resource_name:'dxcon-fg5j5v6o'",
"account_id:227148359287,resource_type:directconnect_connections,resource_name:'dxcon-fgvfo1ej'"
]
},
{
"virtual_interfaces": [
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-fgvj25vt'",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-fgbw5gs0'",
"account_id:401311080156,resource_type:virtual_interfaces,resource_name:'dxvif-ffnosohr'",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-fg18bdhl'",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-ffmf6h64'",
"account_id:390251991779,resource_type:virtual_interfaces,resource_name:'dxvif-fgkxjhcj'",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-ffp6kl3f'"
]
}
]
}
由于您没有有效的 JSON 字符串并且不想参与字符串操作...也许这会有所帮助。
Select B.*
From test2 A
Cross Apply ( Select account_id = max(case when value like 'account_id:%' then stuff(value,1,11,'') end )
,resource_type = max(case when value like 'resource_type:%' then stuff(value,1,14,'') end )
,resource_name = max(case when value like 'resource_name:%' then stuff(value,1,14,'') end )
from string_split(resource_type,',')
)B
结果
account_id resource_type resource_name
224526257458 buckets camp-stage-artifactory
535533456241 buckets tni-prod-diva-backups
369798452057 buckets 369798452057-s3-manifests
460085747812 buckets vessel-incident-report-nonprod-accesslogs
不幸的是,数组中的值无效 JSON。您可以通过将 {}
添加到 beginning/end 并在 :
和 ,
.
的任一侧添加 "
来修补它们
DECLARE @json nvarchar(max) = N'{
"start_date": "2021-12-01 00:00:00+00:00",
"end_date": "2021-12-31 23:59:59+00:00",
"resource_type": "all",
"records": [
{
"directconnect_connections": [
"account_id:227148359287,resource_type:directconnect_connections,resource_name:''dxcon-fh40evn5''",
"account_id:401311080156,resource_type:directconnect_connections,resource_name:''dxcon-ffxgf6kh''",
"account_id:401311080156,resource_type:directconnect_connections,resource_name:''dxcon-fg5j5v6o''",
"account_id:227148359287,resource_type:directconnect_connections,resource_name:''dxcon-fgvfo1ej''"
]
},
{
"virtual_interfaces": [
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-fgvj25vt''",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-fgbw5gs0''",
"account_id:401311080156,resource_type:virtual_interfaces,resource_name:''dxvif-ffnosohr''",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-fg18bdhl''",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-ffmf6h64''",
"account_id:390251991779,resource_type:virtual_interfaces,resource_name:''dxvif-fgkxjhcj''",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-ffp6kl3f''"
]
}
]
}';
SELECT
j4.account_id,
j4.resource_type,
TRIM('''' FROM j4.resource_name) resource_name
FROM OPENJSON(@json, '$.records') j1
CROSS APPLY OPENJSON(j1.value) j2
CROSS APPLY OPENJSON(j2.value) j3
CROSS APPLY OPENJSON('{"' + REPLACE(REPLACE(j3.value, ':', '":"'), ',', '","') + '"}')
WITH (
account_id bigint,
resource_type varchar(20),
resource_name varchar(100)
) j4;
对 OPENJSON
的前三个调用没有架构,因此结果集为三列:key
value
和 type
。对于数组(j1
和 j3
),key
是数组的索引。在单个对象 (j2
) 的情况下,key
是每个 属性 名称。
我想将字符串拆分成列。
我的专栏应该是:
account_id, resource_type, resource_name
我有一个 JSON 文件源,我一直试图通过 ADF 数据流对其进行解析。这对我不起作用,因此我将数据展平并将其放入 SQL 服务器(我愿意通过 ADF 或 SQL 解析值,如果有人可以告诉我如何)。请检查底部的 JSON 文件。
使用此代码查询我正在使用的数据。
CREATE TABLE test.test2
(
resource_type nvarchar(max) NULL
)
INSERT INTO test.test2 ([resource_type])
VALUES
('account_id:224526257458,resource_type:buckets,resource_name:camp-stage-artifactory'),
('account_id:535533456241,resource_type:buckets,resource_name:tni-prod-diva-backups'),
('account_id:369798452057,resource_type:buckets,resource_name:369798452057-s3-manifests'),
('account_id:460085747812,resource_type:buckets,resource_name:vessel-incident-report-nonprod-accesslogs')
我应该能够在 SQL 服务器中查询的输出应该是这样的:
account_id | resource_type | resource_name |
---|---|---|
224526257458 | buckets | camp-stage-artifactory |
535533456241 | buckets | tni-prod-diva-backups |
等等。
请帮助我并在需要时寻求澄清。提前致谢。
编辑:
来源JSON格式:
{
"start_date": "2021-12-01 00:00:00+00:00",
"end_date": "2021-12-31 23:59:59+00:00",
"resource_type": "all",
"records": [
{
"directconnect_connections": [
"account_id:227148359287,resource_type:directconnect_connections,resource_name:'dxcon-fh40evn5'",
"account_id:401311080156,resource_type:directconnect_connections,resource_name:'dxcon-ffxgf6kh'",
"account_id:401311080156,resource_type:directconnect_connections,resource_name:'dxcon-fg5j5v6o'",
"account_id:227148359287,resource_type:directconnect_connections,resource_name:'dxcon-fgvfo1ej'"
]
},
{
"virtual_interfaces": [
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-fgvj25vt'",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-fgbw5gs0'",
"account_id:401311080156,resource_type:virtual_interfaces,resource_name:'dxvif-ffnosohr'",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-fg18bdhl'",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-ffmf6h64'",
"account_id:390251991779,resource_type:virtual_interfaces,resource_name:'dxvif-fgkxjhcj'",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:'dxvif-ffp6kl3f'"
]
}
]
}
由于您没有有效的 JSON 字符串并且不想参与字符串操作...也许这会有所帮助。
Select B.*
From test2 A
Cross Apply ( Select account_id = max(case when value like 'account_id:%' then stuff(value,1,11,'') end )
,resource_type = max(case when value like 'resource_type:%' then stuff(value,1,14,'') end )
,resource_name = max(case when value like 'resource_name:%' then stuff(value,1,14,'') end )
from string_split(resource_type,',')
)B
结果
account_id resource_type resource_name
224526257458 buckets camp-stage-artifactory
535533456241 buckets tni-prod-diva-backups
369798452057 buckets 369798452057-s3-manifests
460085747812 buckets vessel-incident-report-nonprod-accesslogs
不幸的是,数组中的值无效 JSON。您可以通过将 {}
添加到 beginning/end 并在 :
和 ,
.
"
来修补它们
DECLARE @json nvarchar(max) = N'{
"start_date": "2021-12-01 00:00:00+00:00",
"end_date": "2021-12-31 23:59:59+00:00",
"resource_type": "all",
"records": [
{
"directconnect_connections": [
"account_id:227148359287,resource_type:directconnect_connections,resource_name:''dxcon-fh40evn5''",
"account_id:401311080156,resource_type:directconnect_connections,resource_name:''dxcon-ffxgf6kh''",
"account_id:401311080156,resource_type:directconnect_connections,resource_name:''dxcon-fg5j5v6o''",
"account_id:227148359287,resource_type:directconnect_connections,resource_name:''dxcon-fgvfo1ej''"
]
},
{
"virtual_interfaces": [
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-fgvj25vt''",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-fgbw5gs0''",
"account_id:401311080156,resource_type:virtual_interfaces,resource_name:''dxvif-ffnosohr''",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-fg18bdhl''",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-ffmf6h64''",
"account_id:390251991779,resource_type:virtual_interfaces,resource_name:''dxvif-fgkxjhcj''",
"account_id:227148359287,resource_type:virtual_interfaces,resource_name:''dxvif-ffp6kl3f''"
]
}
]
}';
SELECT
j4.account_id,
j4.resource_type,
TRIM('''' FROM j4.resource_name) resource_name
FROM OPENJSON(@json, '$.records') j1
CROSS APPLY OPENJSON(j1.value) j2
CROSS APPLY OPENJSON(j2.value) j3
CROSS APPLY OPENJSON('{"' + REPLACE(REPLACE(j3.value, ':', '":"'), ',', '","') + '"}')
WITH (
account_id bigint,
resource_type varchar(20),
resource_name varchar(100)
) j4;
对 OPENJSON
的前三个调用没有架构,因此结果集为三列:key
value
和 type
。对于数组(j1
和 j3
),key
是数组的索引。在单个对象 (j2
) 的情况下,key
是每个 属性 名称。