将 SQL 服务器中的 XML 列分解到新表中
Shredding XML column in SQL Server into new tables
我有一个来源 table,其中每一行都有一个 XML 字段,其中包含我需要切碎并放入输出 tables 的未知数量的调查回复.我试过 nodes 和 OPENXML 但我似乎无法让它工作。根据我的研究,我认为我需要使用 CROSS APPLY,但我似乎无法以我理解的方式将它们放在一起并让它发挥作用。如果我遗漏了一些简单的东西,我深表歉意,但我真的被困住了,如果能提供任何帮助来解决这个问题,我将不胜感激。
这是包含导入数据(我无法控制其格式)的源 table,如下所示:
ImportId SurveyId DateImported ResponseData
-------- -------- ------------ ------------
1 11223344 2017-05-21 18:00:00.00 <survey><data><result>...
2 55667788 2017-05-21 18:01:00.00 <survey><data><result>...
3 99009988 2017-05-21 18:02:00.00 <survey><data><result>...
这是 ResponseData 列中 XML 的简化示例:
<survey>
<data>
<result>
<id>12345</id>
<date_submitted>2017-05-10 09:30:15</date_submitted>
<url_variables>
<respondent_id>
<key>respondent_id</key>
<value>987654</value>
<type>url</type>
</respondent_id>
<respondent_level>
<key>respondent_level</key>
<value>5</value>
<type>url</type>
</respondent_level>
</url_variables>
<survey_data>
<question>
<id>1</id>
<answer>Yes</answer>
</question>
<question>
<id>2</id>
<answer>No</answer>
</question>
<question>
<id>3</id>
<subquestions>
<subquestion>
<id>4</id>
<answer>Maybe</answer>
</subquestion>
<subquestion>
<id>5</id>
<answer>I don't know</answer>
</subquestion>
</subquestions>
</question>
... more questions ...
</survey_data>
</result>
<result>
<id>67890</id>
<date_submitted>2017-05-11 10:00:00</date_submitted>
<url_variables>
<respondent_id>
<key>respondent_id</key>
<value>34567</value>
<type>url</type>
</respondent_id>
<respondent_level>
<key>respondent_level</key>
<value>10</value>
<type>url</type>
</respondent_level>
</url_variables>
<survey_data>
<question>
<id>1</id>
<answer>No</answer>
</question>
<question>
<id>2</id>
<answer>Yes</answer>
</question>
<question>
<id>3</id>
<subquestions>
<subquestion>
<id>4</id>
<answer>Definitely not</answer>
</subquestion>
<subquestion>
<id>5</id>
<answer>I object</answer>
</subquestion>
</subquestions>
</question>
... more questions ...
</survey_data>
</result>
... more results ...
<data>
</survey>
我需要获取此数据,粉碎 XML 以在每个 ResponseData 字段中获得多个结果并将其放入两个 table 中,如下所示:
ResultId SurveyId RespondentId RespondentLevel DateSubmitted
-------- -------- ------------ --------------- -------------
12345 11223344 987654 5 2017-05-10 09:30:15
67890 11223344 34567 10 2017-05-11 10:00:00
...
(Data extracted from the rest of ImportId 1 followed by ImportId 2, 3, etc)
ResultId QuestionId SubquestionId Answer
---------- ---------- ------------- ------
12345 1 0 Yes
12345 2 0 No
12345 3 4 Maybe
12345 3 5 I don't know
67890 1 0 No
67890 2 0 Yes
67890 3 4 Definitely not
67890 3 5 I object
...
(Data extracted from the rest of ImportId 1 followed by ImportId 2, 3, etc)
对于两个表,您需要两个查询
示例(我只使用了 1 条记录,但将应用于多条记录)
Declare @YourTable Table ([ImportId] int,[SurveyId] int,[DateImported] datetime,[ResponseData] xml)
Insert Into @YourTable Values
(1,11223344,'2017-05-21 18:00:00.00','<survey><data><result><id>12345</id><date_submitted>2017-05-10 09:30:15</date_submitted><url_variables><respondent_id><key>respondent_id</key><value>987654</value><type>url</type></respondent_id><respondent_level><key>respondent_level</key><value>5</value><type>url</type></respondent_level></url_variables><survey_data><question><id>1</id><answer>Yes</answer></question><question><id>2</id><answer>No</answer></question><question><id>3</id><subquestions><subquestion><id>4</id><answer>Maybe</answer></subquestion><subquestion><id>5</id><answer>I don''t know</answer></subquestion></subquestions></question></survey_data></result><result><id>67890</id><date_submitted>2017-05-11 10:00:00</date_submitted><url_variables><respondent_id><key>respondent_id</key><value>34567</value><type>url</type></respondent_id><respondent_level><key>respondent_level</key><value>10</value><type>url</type></respondent_level></url_variables><survey_data><question><id>1</id><answer>No</answer></question><question><id>2</id><answer>Yes</answer></question><question><id>3</id><subquestions><subquestion><id>4</id><answer>Definitely not</answer></subquestion><subquestion><id>5</id><answer>I object</answer></subquestion></subquestions></question></survey_data></result></data></survey>')
Select B.*
From @YourTable A
Cross Apply (
Select [ResultID] = r.n.value('(id)[1]','int')
,A.[SurveyId]
,[RespondentId] = r.n.value('(url_variables/respondent_id/value)[1]','int')
,[RespondentLevel] = r.n.value('(url_variables/respondent_level/value)[1]','int')
,[DateSubmitted] = r.n.value('(date_submitted)[1]','datetime')
From A.[ResponseData].nodes('survey/data/result') r(n)
) B
Select B.*
From @YourTable A
Cross Apply (
Select [ResultID] = r.n.value('(id)[1]','int')
,[QuestionId] = IsNull(q.n.value('(id)[1]','int'),0)
,[SubquestionId] = IsNull(s.n.value('(id)[1]','int'),0)
,[answer] = concat(q.n.value('(answer)[1]','varchar(50)')
,s.n.value('(answer)[1]','varchar(50)')
)
From A.[ResponseData].nodes('survey/data/result') r(n)
Cross Apply r.n.nodes('survey_data/question') q(n)
Outer Apply q.n.nodes('subquestions/subquestion') s(n)
) B
Returns
我有一个来源 table,其中每一行都有一个 XML 字段,其中包含我需要切碎并放入输出 tables 的未知数量的调查回复.我试过 nodes 和 OPENXML 但我似乎无法让它工作。根据我的研究,我认为我需要使用 CROSS APPLY,但我似乎无法以我理解的方式将它们放在一起并让它发挥作用。如果我遗漏了一些简单的东西,我深表歉意,但我真的被困住了,如果能提供任何帮助来解决这个问题,我将不胜感激。
这是包含导入数据(我无法控制其格式)的源 table,如下所示:
ImportId SurveyId DateImported ResponseData
-------- -------- ------------ ------------
1 11223344 2017-05-21 18:00:00.00 <survey><data><result>...
2 55667788 2017-05-21 18:01:00.00 <survey><data><result>...
3 99009988 2017-05-21 18:02:00.00 <survey><data><result>...
这是 ResponseData 列中 XML 的简化示例:
<survey>
<data>
<result>
<id>12345</id>
<date_submitted>2017-05-10 09:30:15</date_submitted>
<url_variables>
<respondent_id>
<key>respondent_id</key>
<value>987654</value>
<type>url</type>
</respondent_id>
<respondent_level>
<key>respondent_level</key>
<value>5</value>
<type>url</type>
</respondent_level>
</url_variables>
<survey_data>
<question>
<id>1</id>
<answer>Yes</answer>
</question>
<question>
<id>2</id>
<answer>No</answer>
</question>
<question>
<id>3</id>
<subquestions>
<subquestion>
<id>4</id>
<answer>Maybe</answer>
</subquestion>
<subquestion>
<id>5</id>
<answer>I don't know</answer>
</subquestion>
</subquestions>
</question>
... more questions ...
</survey_data>
</result>
<result>
<id>67890</id>
<date_submitted>2017-05-11 10:00:00</date_submitted>
<url_variables>
<respondent_id>
<key>respondent_id</key>
<value>34567</value>
<type>url</type>
</respondent_id>
<respondent_level>
<key>respondent_level</key>
<value>10</value>
<type>url</type>
</respondent_level>
</url_variables>
<survey_data>
<question>
<id>1</id>
<answer>No</answer>
</question>
<question>
<id>2</id>
<answer>Yes</answer>
</question>
<question>
<id>3</id>
<subquestions>
<subquestion>
<id>4</id>
<answer>Definitely not</answer>
</subquestion>
<subquestion>
<id>5</id>
<answer>I object</answer>
</subquestion>
</subquestions>
</question>
... more questions ...
</survey_data>
</result>
... more results ...
<data>
</survey>
我需要获取此数据,粉碎 XML 以在每个 ResponseData 字段中获得多个结果并将其放入两个 table 中,如下所示:
ResultId SurveyId RespondentId RespondentLevel DateSubmitted
-------- -------- ------------ --------------- -------------
12345 11223344 987654 5 2017-05-10 09:30:15
67890 11223344 34567 10 2017-05-11 10:00:00
...
(Data extracted from the rest of ImportId 1 followed by ImportId 2, 3, etc)
ResultId QuestionId SubquestionId Answer
---------- ---------- ------------- ------
12345 1 0 Yes
12345 2 0 No
12345 3 4 Maybe
12345 3 5 I don't know
67890 1 0 No
67890 2 0 Yes
67890 3 4 Definitely not
67890 3 5 I object
...
(Data extracted from the rest of ImportId 1 followed by ImportId 2, 3, etc)
对于两个表,您需要两个查询
示例(我只使用了 1 条记录,但将应用于多条记录)
Declare @YourTable Table ([ImportId] int,[SurveyId] int,[DateImported] datetime,[ResponseData] xml)
Insert Into @YourTable Values
(1,11223344,'2017-05-21 18:00:00.00','<survey><data><result><id>12345</id><date_submitted>2017-05-10 09:30:15</date_submitted><url_variables><respondent_id><key>respondent_id</key><value>987654</value><type>url</type></respondent_id><respondent_level><key>respondent_level</key><value>5</value><type>url</type></respondent_level></url_variables><survey_data><question><id>1</id><answer>Yes</answer></question><question><id>2</id><answer>No</answer></question><question><id>3</id><subquestions><subquestion><id>4</id><answer>Maybe</answer></subquestion><subquestion><id>5</id><answer>I don''t know</answer></subquestion></subquestions></question></survey_data></result><result><id>67890</id><date_submitted>2017-05-11 10:00:00</date_submitted><url_variables><respondent_id><key>respondent_id</key><value>34567</value><type>url</type></respondent_id><respondent_level><key>respondent_level</key><value>10</value><type>url</type></respondent_level></url_variables><survey_data><question><id>1</id><answer>No</answer></question><question><id>2</id><answer>Yes</answer></question><question><id>3</id><subquestions><subquestion><id>4</id><answer>Definitely not</answer></subquestion><subquestion><id>5</id><answer>I object</answer></subquestion></subquestions></question></survey_data></result></data></survey>')
Select B.*
From @YourTable A
Cross Apply (
Select [ResultID] = r.n.value('(id)[1]','int')
,A.[SurveyId]
,[RespondentId] = r.n.value('(url_variables/respondent_id/value)[1]','int')
,[RespondentLevel] = r.n.value('(url_variables/respondent_level/value)[1]','int')
,[DateSubmitted] = r.n.value('(date_submitted)[1]','datetime')
From A.[ResponseData].nodes('survey/data/result') r(n)
) B
Select B.*
From @YourTable A
Cross Apply (
Select [ResultID] = r.n.value('(id)[1]','int')
,[QuestionId] = IsNull(q.n.value('(id)[1]','int'),0)
,[SubquestionId] = IsNull(s.n.value('(id)[1]','int'),0)
,[answer] = concat(q.n.value('(answer)[1]','varchar(50)')
,s.n.value('(answer)[1]','varchar(50)')
)
From A.[ResponseData].nodes('survey/data/result') r(n)
Cross Apply r.n.nodes('survey_data/question') q(n)
Outer Apply q.n.nodes('subquestions/subquestion') s(n)
) B
Returns