使用 OPENJSON 解析 JSON 数组到 table
Parse JSON arrays using OPENJSON to table
我很困惑如何将我的 JSON 数据分解成 table,因为没有使用数组的名称进行格式化
实际的 JSON 文件要大得多(19K 行)所以我只提取了一小部分(顶级的前两个和其中的一些。
DECLARE @txt1 varchar(max) = '{ "Rv0005": { "p.Glu540Asp": { "annotations": [ { "type": "drug", "drug": "moxifloxacin", "literature": "10.1128/AAC.00825-17;10.1128/JCM.06860-11", "confers": "resistance" } ], "genome_positions": [ 6857, 6858, 6859 ] }, "p.Ala504Thr": { "annotations": [ { "type": "drug", "drug": "ciprofloxacin", "confers": "resistance" }, { "type": "drug", "drug": "fluoroquinolones", "confers": "resistance" }, { "type": "drug", "drug": "levofloxacin", "confers": "resistance" }, { "type": "drug", "drug": "moxifloxacin", "confers": "resistance" }, { "type": "drug", "drug": "ofloxacin", "confers": "resistance" } ], "genome_positions": [ 6749, 6750, 6751 ] }, "p.Ala504Val": { "annotations": [ { "type": "drug", "drug": "ciprofloxacin", "confers": "resistance" }, { "type": "drug", "drug": "fluoroquinolones", "confers": "resistance" }, { "type": "drug", "drug": "levofloxacin", "confers": "resistance" }, { "type": "drug", "drug": "moxifloxacin", "confers": "resistance" }, { "type": "drug", "drug": "ofloxacin", "confers": "resistance" } ], "genome_positions": [ 6749, 6750, 6751 ] } }, "Rv2043c": { "p.Thr100Ile": { "annotations": [ { "type": "drug", "drug": "pyrazinamide", "literature": "10.1128/JCM.01214-17", "confers": "resistance" } ], "genome_positions": [ 2288942, 2288943, 2288944 ] }, "p.Thr160Ala": { "annotations": [ { "type": "drug", "drug": "pyrazinamide", "literature": "10.1128/JCM.01214-17", "confers": "resistance" } ], "genome_positions": [ 2288762, 2288763, 2288764 ] }, "c.101_102insT": { "annotations": [ { "type": "drug", "drug": "pyrazinamide", "confers": "resistance" } ], "genome_positions": [ 2289140, 2289141 ] } } }'
SELECT * FROM OPENJSON(@txt1)
顶层是一个基因,这只是两个基因的数据(Rv0005 = 基因 1,Rv2043c = 基因 2)。每个基因都可以有多个突变(例如 Rv0005 在 p.Glu540Asp 和 p.Ala504Thr 处有一个突变)并且每个突变都有一些与之相关的数据(文献、抗性、基因组位置等)。我知道我可以通过
解析 JSON 和 JSON 数组的一部分
SELECT * FROM OPENJSON(@txt1)
SELECT * FROM OPENJSON(@txt1, '$.Rv0005."p.Glu540Asp".genome_positions')
但我不知道如何在不知道 keys/values 是什么的情况下将整个事情撕成碎片。特别是有 35 个独特的基因(JSON 树的顶部),每个突变都在它们下面命名,但都是独一无二的(例如 p.Glu540Asp 等)。
最终我要么想将数据拉入多个规范化的 tables 但老实说,一个大的 table 就可以了
CREATE TABLE #Muts (gene varchar(max), mutations varchar(max), annotation_type varchar(max), annotation_drug varchar(max), annotation_literature varchar(max), annotation_confers varchar(max), genome_positions int )
前几个值的数据如下所示(请注意,某些突变会产生对多种药物的耐药性)
gene
mutations
annotation_type
annotation_drug
annotation_literature
annotation_confers
genome_positions
Rv0005
p.Glu540Asp
drug
moxifloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6857
Rv0005
p.Glu540Asp
drug
moxifloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6858
Rv0005
p.Glu540Asp
drug
moxifloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6859
Rv0005
p.Ala504Thr
drug
ciprofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6849
Rv0005
p.Ala504Thr
drug
fluoroquinolones
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6849
Rv0005
p.Ala504Thr
drug
levofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6849
Rv0005
p.Ala504Thr
drug
moxifloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6849
Rv0005
p.Ala504Thr
drug
ofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6849
Rv0005
p.Ala504Thr
drug
ciprofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6850
Rv0005
p.Ala504Thr
drug
fluoroquinolones
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6850
Rv0005
p.Ala504Thr
drug
levofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6850
Rv0005
p.Ala504Thr
drug
moxifloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6850
Rv0005
p.Ala504Thr
drug
ofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6850
Rv0005
p.Ala504Thr
drug
ciprofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6851
Rv0005
p.Ala504Thr
drug
fluoroquinolones
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6851
Rv0005
p.Ala504Thr
drug
levofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6851
Rv0005
p.Ala504Thr
drug
moxifloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6851
Rv0005
p.Ala504Thr
drug
ofloxacin
10.1128/AAC.00825-17;10.1128/JCM.06860-11
resistance
6851
当您想将 JSON 数组“转换”为表格 table.
时,您必须将 CROSS APPLY 与 OPENJSON 结合使用
以下查询return预期结果:
SELECT a.[key] as gene, b.[key] as mutations, c.*, d.value as genome_positions
FROM OPENJSON(@txt1) a
CROSS APPLY OPENJSON(a.value) b
CROSS APPLY OPENJSON(b.value,'$.annotations')
WITH (
annotation_type nvarchar(100) '$.type'
, annotation_drug nvarchar(100) '$.drug'
, annotation_literature nvarchar(100) '$.literature'
, annotation_confers nvarchar(100) '$.confers'
) c
CROSS APPLY OPENJSON(b.value,'$.genome_positions') d
结果:
请尝试以下解决方案。
SQL
DECLARE @json NVARCHAR(MAX) =
N'{
"Rv0005": {
"p.Glu540Asp": {
"annotations": [
{
"type": "drug",
"drug": "moxifloxacin",
"literature": "10.1128/AAC.00825-17;10.1128/JCM.06860-11",
"confers": "resistance"
}
],
"genome_positions": [
6857,
6858,
6859
]
},
"p.Ala504Thr": {
"annotations": [
{
"type": "drug",
"drug": "ciprofloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "fluoroquinolones",
"confers": "resistance"
},
{
"type": "drug",
"drug": "levofloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "moxifloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "ofloxacin",
"confers": "resistance"
}
],
"genome_positions": [
6749,
6750,
6751
]
},
"p.Ala504Val": {
"annotations": [
{
"type": "drug",
"drug": "ciprofloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "fluoroquinolones",
"confers": "resistance"
},
{
"type": "drug",
"drug": "levofloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "moxifloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "ofloxacin",
"confers": "resistance"
}
],
"genome_positions": [
6749,
6750,
6751
]
}
},
"Rv2043c": {
"p.Thr100Ile": {
"annotations": [
{
"type": "drug",
"drug": "pyrazinamide",
"literature": "10.1128/JCM.01214-17",
"confers": "resistance"
}
],
"genome_positions": [
2288942,
2288943,
2288944
]
},
"p.Thr160Ala": {
"annotations": [
{
"type": "drug",
"drug": "pyrazinamide",
"literature": "10.1128/JCM.01214-17",
"confers": "resistance"
}
],
"genome_positions": [
2288762,
2288763,
2288764
]
},
"c.101_102insT": {
"annotations": [
{
"type": "drug",
"drug": "pyrazinamide",
"confers": "resistance"
}
],
"genome_positions": [
2289140,
2289141
]
}
}
}';
-- test if it is a legit JSON
SELECT ISJSON(@json) AS Result;
SELECT genes.[Key] AS gene
, mutations.[Key] AS mutation
, annotations.*
, JSON_VALUE(mutations.value, '$.genome_positions[0]') as [gen_pos1]
, JSON_VALUE(mutations.value, '$.genome_positions[1]') as [gen_pos2]
, JSON_VALUE(mutations.value, '$.genome_positions[2]') as [gen_pos3]
FROM OPENJSON (@json) AS genes
CROSS APPLY OPENJSON(genes.value) AS mutations
CROSS APPLY OPENJSON(mutations.value, '$.annotations')
WITH
(
[type] VARCHAR(20) '$.type'
, [drug] VARCHAR(20) '$.drug'
, [literature] VARCHAR(200) '$.literature'
, [confers] VARCHAR(20) '$.confers'
) AS annotations
输出
+---------+---------------+------+------------------+-------------------------------------------+------------+----------+----------+----------+
| gene | mutation | type | drug | literature | confers | gen_pos1 | gen_pos2 | gen_pos3 |
+---------+---------------+------+------------------+-------------------------------------------+------------+----------+----------+----------+
| Rv0005 | p.Glu540Asp | drug | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6857 | 6858 | 6859 |
| Rv0005 | p.Ala504Thr | drug | ciprofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Thr | drug | fluoroquinolones | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Thr | drug | levofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Thr | drug | moxifloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Thr | drug | ofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | ciprofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | fluoroquinolones | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | levofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | moxifloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | ofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv2043c | p.Thr100Ile | drug | pyrazinamide | 10.1128/JCM.01214-17 | resistance | 2288942 | 2288943 | 2288944 |
| Rv2043c | p.Thr160Ala | drug | pyrazinamide | 10.1128/JCM.01214-17 | resistance | 2288762 | 2288763 | 2288764 |
| Rv2043c | c.101_102insT | drug | pyrazinamide | NULL | resistance | 2289140 | 2289141 | NULL |
+---------+---------------+------+------------------+-------------------------------------------+------------+----------+----------+----------+
使用临时 table 可以更轻松地从展开的 json.
中转换数据
DECLARE @txt1 varchar(max) = '{...}'
IF OBJECT_ID('tempdb..#tmpJsonUnfolded', 'U') IS NOT NULL
DROP TABLE #tmpJsonUnfolded;
SELECT
lvl1.[key] as gene
, lvl2.[key] as mutations
, lvl3.[key] as data_class
, lvl4.[key] as num
, lvl5.[key] as col
, case
when lvl3.[key] = 'genome_positions'
then lvl4.[value]
when lvl3.[key] = 'annotations'
then lvl5.[value]
end as [value]
--, lvl4.[value] as value4
--, lvl5.[value] as value5
INTO #tmpJsonUnfolded
FROM OPENJSON(@txt1) lvl1
CROSS APPLY OPENJSON(lvl1.value) lvl2
CROSS APPLY OPENJSON(lvl2.value) lvl3
CROSS APPLY OPENJSON(lvl3.value) lvl4
OUTER APPLY (
SELECT *
FROM OPENJSON(lvl4.value)
WHERE lvl3.[key] = 'annotations'
) lvl5;
select
gene
, mutations
, [type] as annotation_type
, [num] as annotation_num
, [drug] as annotation_drug
, [literature] as annotation_literature
, [confers] as annotation_confers
, [genome_positions]
from (
select
gene
, mutations
, num
, [col]
, [value]
from #tmpJsonUnfolded
where data_class = 'annotations'
union all
select
gene
, mutations
, 0
, data_class as [col]
, string_agg([value], ', ') as [value]
from #tmpJsonUnfolded
where data_class = 'genome_positions'
group by gene, mutations, data_class
) src
pivot (
max([value])
for [col] in ([type], [drug], [literature], [confers], [genome_positions])
) pvt
gene | mutations | annotation_type | annotation_num | annotation_drug | annotation_literature | annotation_confers | genome_positions
:------ | :------------ | :-------------- | -------------: | :--------------- | :---------------------------------------- | :----------------- | :------------------------
Rv0005 | p.Ala504Thr | drug | 0 | ciprofloxacin | null | resistance | 6749, 6750, 6751
Rv0005 | p.Ala504Thr | drug | 1 | fluoroquinolones | null | resistance | null
Rv0005 | p.Ala504Thr | drug | 2 | levofloxacin | null | resistance | null
Rv0005 | p.Ala504Thr | drug | 3 | moxifloxacin | null | resistance | null
Rv0005 | p.Ala504Thr | drug | 4 | ofloxacin | null | resistance | null
Rv0005 | p.Ala504Val | drug | 0 | ciprofloxacin | null | resistance | 6749, 6750, 6751
Rv0005 | p.Ala504Val | drug | 1 | fluoroquinolones | null | resistance | null
Rv0005 | p.Ala504Val | drug | 2 | levofloxacin | null | resistance | null
Rv0005 | p.Ala504Val | drug | 3 | moxifloxacin | null | resistance | null
Rv0005 | p.Ala504Val | drug | 4 | ofloxacin | null | resistance | null
Rv0005 | p.Glu540Asp | drug | 0 | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6857, 6858, 6859
Rv2043c | c.101_102insT | drug | 0 | pyrazinamide | null | resistance | 2289140, 2289141
Rv2043c | p.Thr100Ile | drug | 0 | pyrazinamide | 10.1128/JCM.01214-17 | resistance | 2288942, 2288943, 2288944
Rv2043c | p.Thr160Ala | drug | 0 | pyrazinamide | 10.1128/JCM.01214-17 | resistance | 2288762, 2288763, 2288764
演示 db<>fiddle here
我很困惑如何将我的 JSON 数据分解成 table,因为没有使用数组的名称进行格式化
实际的 JSON 文件要大得多(19K 行)所以我只提取了一小部分(顶级的前两个和其中的一些。
DECLARE @txt1 varchar(max) = '{ "Rv0005": { "p.Glu540Asp": { "annotations": [ { "type": "drug", "drug": "moxifloxacin", "literature": "10.1128/AAC.00825-17;10.1128/JCM.06860-11", "confers": "resistance" } ], "genome_positions": [ 6857, 6858, 6859 ] }, "p.Ala504Thr": { "annotations": [ { "type": "drug", "drug": "ciprofloxacin", "confers": "resistance" }, { "type": "drug", "drug": "fluoroquinolones", "confers": "resistance" }, { "type": "drug", "drug": "levofloxacin", "confers": "resistance" }, { "type": "drug", "drug": "moxifloxacin", "confers": "resistance" }, { "type": "drug", "drug": "ofloxacin", "confers": "resistance" } ], "genome_positions": [ 6749, 6750, 6751 ] }, "p.Ala504Val": { "annotations": [ { "type": "drug", "drug": "ciprofloxacin", "confers": "resistance" }, { "type": "drug", "drug": "fluoroquinolones", "confers": "resistance" }, { "type": "drug", "drug": "levofloxacin", "confers": "resistance" }, { "type": "drug", "drug": "moxifloxacin", "confers": "resistance" }, { "type": "drug", "drug": "ofloxacin", "confers": "resistance" } ], "genome_positions": [ 6749, 6750, 6751 ] } }, "Rv2043c": { "p.Thr100Ile": { "annotations": [ { "type": "drug", "drug": "pyrazinamide", "literature": "10.1128/JCM.01214-17", "confers": "resistance" } ], "genome_positions": [ 2288942, 2288943, 2288944 ] }, "p.Thr160Ala": { "annotations": [ { "type": "drug", "drug": "pyrazinamide", "literature": "10.1128/JCM.01214-17", "confers": "resistance" } ], "genome_positions": [ 2288762, 2288763, 2288764 ] }, "c.101_102insT": { "annotations": [ { "type": "drug", "drug": "pyrazinamide", "confers": "resistance" } ], "genome_positions": [ 2289140, 2289141 ] } } }'
SELECT * FROM OPENJSON(@txt1)
顶层是一个基因,这只是两个基因的数据(Rv0005 = 基因 1,Rv2043c = 基因 2)。每个基因都可以有多个突变(例如 Rv0005 在 p.Glu540Asp 和 p.Ala504Thr 处有一个突变)并且每个突变都有一些与之相关的数据(文献、抗性、基因组位置等)。我知道我可以通过
解析 JSON 和 JSON 数组的一部分SELECT * FROM OPENJSON(@txt1)
SELECT * FROM OPENJSON(@txt1, '$.Rv0005."p.Glu540Asp".genome_positions')
但我不知道如何在不知道 keys/values 是什么的情况下将整个事情撕成碎片。特别是有 35 个独特的基因(JSON 树的顶部),每个突变都在它们下面命名,但都是独一无二的(例如 p.Glu540Asp 等)。
最终我要么想将数据拉入多个规范化的 tables 但老实说,一个大的 table 就可以了
CREATE TABLE #Muts (gene varchar(max), mutations varchar(max), annotation_type varchar(max), annotation_drug varchar(max), annotation_literature varchar(max), annotation_confers varchar(max), genome_positions int )
前几个值的数据如下所示(请注意,某些突变会产生对多种药物的耐药性)
gene | mutations | annotation_type | annotation_drug | annotation_literature | annotation_confers | genome_positions |
---|---|---|---|---|---|---|
Rv0005 | p.Glu540Asp | drug | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6857 |
Rv0005 | p.Glu540Asp | drug | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6858 |
Rv0005 | p.Glu540Asp | drug | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6859 |
Rv0005 | p.Ala504Thr | drug | ciprofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6849 |
Rv0005 | p.Ala504Thr | drug | fluoroquinolones | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6849 |
Rv0005 | p.Ala504Thr | drug | levofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6849 |
Rv0005 | p.Ala504Thr | drug | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6849 |
Rv0005 | p.Ala504Thr | drug | ofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6849 |
Rv0005 | p.Ala504Thr | drug | ciprofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6850 |
Rv0005 | p.Ala504Thr | drug | fluoroquinolones | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6850 |
Rv0005 | p.Ala504Thr | drug | levofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6850 |
Rv0005 | p.Ala504Thr | drug | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6850 |
Rv0005 | p.Ala504Thr | drug | ofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6850 |
Rv0005 | p.Ala504Thr | drug | ciprofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6851 |
Rv0005 | p.Ala504Thr | drug | fluoroquinolones | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6851 |
Rv0005 | p.Ala504Thr | drug | levofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6851 |
Rv0005 | p.Ala504Thr | drug | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6851 |
Rv0005 | p.Ala504Thr | drug | ofloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6851 |
当您想将 JSON 数组“转换”为表格 table.
时,您必须将 CROSS APPLY 与 OPENJSON 结合使用以下查询return预期结果:
SELECT a.[key] as gene, b.[key] as mutations, c.*, d.value as genome_positions
FROM OPENJSON(@txt1) a
CROSS APPLY OPENJSON(a.value) b
CROSS APPLY OPENJSON(b.value,'$.annotations')
WITH (
annotation_type nvarchar(100) '$.type'
, annotation_drug nvarchar(100) '$.drug'
, annotation_literature nvarchar(100) '$.literature'
, annotation_confers nvarchar(100) '$.confers'
) c
CROSS APPLY OPENJSON(b.value,'$.genome_positions') d
结果:
请尝试以下解决方案。
SQL
DECLARE @json NVARCHAR(MAX) =
N'{
"Rv0005": {
"p.Glu540Asp": {
"annotations": [
{
"type": "drug",
"drug": "moxifloxacin",
"literature": "10.1128/AAC.00825-17;10.1128/JCM.06860-11",
"confers": "resistance"
}
],
"genome_positions": [
6857,
6858,
6859
]
},
"p.Ala504Thr": {
"annotations": [
{
"type": "drug",
"drug": "ciprofloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "fluoroquinolones",
"confers": "resistance"
},
{
"type": "drug",
"drug": "levofloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "moxifloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "ofloxacin",
"confers": "resistance"
}
],
"genome_positions": [
6749,
6750,
6751
]
},
"p.Ala504Val": {
"annotations": [
{
"type": "drug",
"drug": "ciprofloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "fluoroquinolones",
"confers": "resistance"
},
{
"type": "drug",
"drug": "levofloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "moxifloxacin",
"confers": "resistance"
},
{
"type": "drug",
"drug": "ofloxacin",
"confers": "resistance"
}
],
"genome_positions": [
6749,
6750,
6751
]
}
},
"Rv2043c": {
"p.Thr100Ile": {
"annotations": [
{
"type": "drug",
"drug": "pyrazinamide",
"literature": "10.1128/JCM.01214-17",
"confers": "resistance"
}
],
"genome_positions": [
2288942,
2288943,
2288944
]
},
"p.Thr160Ala": {
"annotations": [
{
"type": "drug",
"drug": "pyrazinamide",
"literature": "10.1128/JCM.01214-17",
"confers": "resistance"
}
],
"genome_positions": [
2288762,
2288763,
2288764
]
},
"c.101_102insT": {
"annotations": [
{
"type": "drug",
"drug": "pyrazinamide",
"confers": "resistance"
}
],
"genome_positions": [
2289140,
2289141
]
}
}
}';
-- test if it is a legit JSON
SELECT ISJSON(@json) AS Result;
SELECT genes.[Key] AS gene
, mutations.[Key] AS mutation
, annotations.*
, JSON_VALUE(mutations.value, '$.genome_positions[0]') as [gen_pos1]
, JSON_VALUE(mutations.value, '$.genome_positions[1]') as [gen_pos2]
, JSON_VALUE(mutations.value, '$.genome_positions[2]') as [gen_pos3]
FROM OPENJSON (@json) AS genes
CROSS APPLY OPENJSON(genes.value) AS mutations
CROSS APPLY OPENJSON(mutations.value, '$.annotations')
WITH
(
[type] VARCHAR(20) '$.type'
, [drug] VARCHAR(20) '$.drug'
, [literature] VARCHAR(200) '$.literature'
, [confers] VARCHAR(20) '$.confers'
) AS annotations
输出
+---------+---------------+------+------------------+-------------------------------------------+------------+----------+----------+----------+
| gene | mutation | type | drug | literature | confers | gen_pos1 | gen_pos2 | gen_pos3 |
+---------+---------------+------+------------------+-------------------------------------------+------------+----------+----------+----------+
| Rv0005 | p.Glu540Asp | drug | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6857 | 6858 | 6859 |
| Rv0005 | p.Ala504Thr | drug | ciprofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Thr | drug | fluoroquinolones | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Thr | drug | levofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Thr | drug | moxifloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Thr | drug | ofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | ciprofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | fluoroquinolones | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | levofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | moxifloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv0005 | p.Ala504Val | drug | ofloxacin | NULL | resistance | 6749 | 6750 | 6751 |
| Rv2043c | p.Thr100Ile | drug | pyrazinamide | 10.1128/JCM.01214-17 | resistance | 2288942 | 2288943 | 2288944 |
| Rv2043c | p.Thr160Ala | drug | pyrazinamide | 10.1128/JCM.01214-17 | resistance | 2288762 | 2288763 | 2288764 |
| Rv2043c | c.101_102insT | drug | pyrazinamide | NULL | resistance | 2289140 | 2289141 | NULL |
+---------+---------------+------+------------------+-------------------------------------------+------------+----------+----------+----------+
使用临时 table 可以更轻松地从展开的 json.
中转换数据DECLARE @txt1 varchar(max) = '{...}' IF OBJECT_ID('tempdb..#tmpJsonUnfolded', 'U') IS NOT NULL DROP TABLE #tmpJsonUnfolded; SELECT lvl1.[key] as gene , lvl2.[key] as mutations , lvl3.[key] as data_class , lvl4.[key] as num , lvl5.[key] as col , case when lvl3.[key] = 'genome_positions' then lvl4.[value] when lvl3.[key] = 'annotations' then lvl5.[value] end as [value] --, lvl4.[value] as value4 --, lvl5.[value] as value5 INTO #tmpJsonUnfolded FROM OPENJSON(@txt1) lvl1 CROSS APPLY OPENJSON(lvl1.value) lvl2 CROSS APPLY OPENJSON(lvl2.value) lvl3 CROSS APPLY OPENJSON(lvl3.value) lvl4 OUTER APPLY ( SELECT * FROM OPENJSON(lvl4.value) WHERE lvl3.[key] = 'annotations' ) lvl5;
select gene , mutations , [type] as annotation_type , [num] as annotation_num , [drug] as annotation_drug , [literature] as annotation_literature , [confers] as annotation_confers , [genome_positions] from ( select gene , mutations , num , [col] , [value] from #tmpJsonUnfolded where data_class = 'annotations' union all select gene , mutations , 0 , data_class as [col] , string_agg([value], ', ') as [value] from #tmpJsonUnfolded where data_class = 'genome_positions' group by gene, mutations, data_class ) src pivot ( max([value]) for [col] in ([type], [drug], [literature], [confers], [genome_positions]) ) pvt
gene | mutations | annotation_type | annotation_num | annotation_drug | annotation_literature | annotation_confers | genome_positions :------ | :------------ | :-------------- | -------------: | :--------------- | :---------------------------------------- | :----------------- | :------------------------ Rv0005 | p.Ala504Thr | drug | 0 | ciprofloxacin | null | resistance | 6749, 6750, 6751 Rv0005 | p.Ala504Thr | drug | 1 | fluoroquinolones | null | resistance | null Rv0005 | p.Ala504Thr | drug | 2 | levofloxacin | null | resistance | null Rv0005 | p.Ala504Thr | drug | 3 | moxifloxacin | null | resistance | null Rv0005 | p.Ala504Thr | drug | 4 | ofloxacin | null | resistance | null Rv0005 | p.Ala504Val | drug | 0 | ciprofloxacin | null | resistance | 6749, 6750, 6751 Rv0005 | p.Ala504Val | drug | 1 | fluoroquinolones | null | resistance | null Rv0005 | p.Ala504Val | drug | 2 | levofloxacin | null | resistance | null Rv0005 | p.Ala504Val | drug | 3 | moxifloxacin | null | resistance | null Rv0005 | p.Ala504Val | drug | 4 | ofloxacin | null | resistance | null Rv0005 | p.Glu540Asp | drug | 0 | moxifloxacin | 10.1128/AAC.00825-17;10.1128/JCM.06860-11 | resistance | 6857, 6858, 6859 Rv2043c | c.101_102insT | drug | 0 | pyrazinamide | null | resistance | 2289140, 2289141 Rv2043c | p.Thr100Ile | drug | 0 | pyrazinamide | 10.1128/JCM.01214-17 | resistance | 2288942, 2288943, 2288944 Rv2043c | p.Thr160Ala | drug | 0 | pyrazinamide | 10.1128/JCM.01214-17 | resistance | 2288762, 2288763, 2288764
演示 db<>fiddle here