在 SQL 中粉碎 XML 但交叉应用会添加不需要的记录
Shredding XML in SQL but cross apply adds unwanted records
我必须将这个 XML 源压缩到一个 SQL 服务器 table 中。每个源文件有多个 ResponseID,每个 ResponseID 有多个 TextAnalyticsItem。我想限制记录,以便只获取与 ResponseID 相关的 TextAnalyticsItems。但是,交叉应用方法为我提供了所有 ResponseId 和所有 TextAnalyticsItem。如何防止附加记录?
DECLARE @XMLToParse XML;
SET @XMLToParse = '
<Responses>
<Response>
<ResponseId>7662934</ResponseId>
<SurveyId>123</SurveyId>
<RespondentId>234909</RespondentId>
<QuestionId>141757</QuestionId>
<ScaleId>3401</ScaleId>
<AnswerId>17130</AnswerId>
<ResponseMemo>Useful</ResponseMemo>
<ResponseRank>0</ResponseRank>
<ResponseState>0</ResponseState>
<CompletedDate>2020-07-06T09:07:40</CompletedDate>
<ModifiedDate>2020-07-06T09:07:41</ModifiedDate>
<LanguageId>220</LanguageId>
<ResponseNum>0</ResponseNum>
<ResponseDate />
<TextAnalyticsData>
<TextAnalyticsItem>
<Level1>Values Standards</Level1>
<Level2>Better/Best/Brilliant</Level2>
<Level3>Positive</Level3>
<Sentiment>1</Sentiment>
</TextAnalyticsItem>
<TextAnalyticsItem>
<Level1>All-Behaviors</Level1>
<Level2>Positive_</Level2>
<Sentiment>1</Sentiment>
</TextAnalyticsItem>
</TextAnalyticsData>
</Response>
<Response>
<ResponseId>7662078</ResponseId>
<SurveyId>123</SurveyId>
<RespondentId>234826</RespondentId>
<QuestionId>141756</QuestionId>
<ScaleId>3400</ScaleId>
<AnswerId>17129</AnswerId>
<ResponseMemo>Ghjlkk</ResponseMemo>
<ResponseRank>0</ResponseRank>
<ResponseState>0</ResponseState>
<CompletedDate>2020-07-03T07:17:31</CompletedDate>
<ModifiedDate>2020-07-03T07:17:31</ModifiedDate>
<LanguageId>220</LanguageId>
<ResponseNum>0</ResponseNum>
<ResponseDate />
<TextAnalyticsData>
<TextAnalyticsItem>
<Level1>Nonactionable</Level1>
<Sentiment>0</Sentiment>
</TextAnalyticsItem>
</TextAnalyticsData>
</Response>
</Responses>'
SELECT xmlData.A.value('ResponseId[1]', 'VARCHAR(100)') AS ResponseId,
xmlData.A.value('SurveyId[1]', 'VARCHAR(100)') AS SurveyId,
xmlData.A.value('RespondentId[1]', 'VARCHAR(100)') AS RespondentId,
xmlData.A.value('TextAnalyticsData[1]', 'VARCHAR(100)') AS TextAnalyticsData,
tbl1.TxtItems.value('Level1[1]', 'VARCHAR(100)') AS Level1,
tbl1.TxtItems.value('Level2[1]', 'VARCHAR(100)') AS Level2,
tbl1.TxtItems.value('Level3[1]', 'VARCHAR(100)') AS Level3,
tbl1.TxtItems.value('Sentiment[1]', 'VARCHAR(100)') AS Sentiment
FROM @XMLToParse.nodes('Responses/Response/TextAnalyticsData/TextAnalyticsItem') tbl1(TxtItems)
cross apply @XMLToParse.nodes('Responses/Response') xmlData(A)
ORDER BY ResponseId,
RespondentId;
所以不用
ResponseId SurveyId RespondentId TextAnalyticsData Level1 Level2 Level3 Sentiment
---------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------
7662078 123 234826 Nonactionable0 Values Standards Better/Best/BrilliPositive 1
7662078 123 234826 Nonactionable0 All-Behaviors Positive_ NULL 1
7662078 123 234826 Nonactionable0 Nonactionable NULL NULL 0
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 Values Standards Better/Best/BrilliPositive 1
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 Nonactionable NULL NULL 0
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 All-Behaviors Positive_ NULL 1
我想得到
ResponseId SurveyId RespondentId TextAnalyticsData Level1 Level2 Level3 Sentiment
---------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------
7662078 123 234826 Nonactionable0 Nonactionable NULL NULL 0
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 Values Standards Better/Best/BrilliPositive 1
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 All-Behaviors Positive_ NULL 1
有什么建议吗?谢谢
查看如何实现您的需求。
它模拟一对多关系。
我不确定 TextAnalyticsData 列的目的是什么。这就是我注释掉它的原因。
SQL
DECLARE @XMLToParse XML =
N'<Responses>
<Response>
<ResponseId>7662934</ResponseId>
<SurveyId>123</SurveyId>
<RespondentId>234909</RespondentId>
<QuestionId>141757</QuestionId>
<ScaleId>3401</ScaleId>
<AnswerId>17130</AnswerId>
<ResponseMemo>Useful</ResponseMemo>
<ResponseRank>0</ResponseRank>
<ResponseState>0</ResponseState>
<CompletedDate>2020-07-06T09:07:40</CompletedDate>
<ModifiedDate>2020-07-06T09:07:41</ModifiedDate>
<LanguageId>220</LanguageId>
<ResponseNum>0</ResponseNum>
<ResponseDate />
<TextAnalyticsData>
<TextAnalyticsItem>
<Level1>Values Standards</Level1>
<Level2>Better/Best/Brilliant</Level2>
<Level3>Positive</Level3>
<Sentiment>1</Sentiment>
</TextAnalyticsItem>
<TextAnalyticsItem>
<Level1>All-Behaviors</Level1>
<Level2>Positive_</Level2>
<Sentiment>1</Sentiment>
</TextAnalyticsItem>
</TextAnalyticsData>
</Response>
<Response>
<ResponseId>7662078</ResponseId>
<SurveyId>123</SurveyId>
<RespondentId>234826</RespondentId>
<QuestionId>141756</QuestionId>
<ScaleId>3400</ScaleId>
<AnswerId>17129</AnswerId>
<ResponseMemo>Ghjlkk</ResponseMemo>
<ResponseRank>0</ResponseRank>
<ResponseState>0</ResponseState>
<CompletedDate>2020-07-03T07:17:31</CompletedDate>
<ModifiedDate>2020-07-03T07:17:31</ModifiedDate>
<LanguageId>220</LanguageId>
<ResponseNum>0</ResponseNum>
<ResponseDate />
<TextAnalyticsData>
<TextAnalyticsItem>
<Level1>Nonactionable</Level1>
<Sentiment>0</Sentiment>
</TextAnalyticsItem>
</TextAnalyticsData>
</Response>
</Responses>';
SELECT r.value('(ResponseId/text())[1]', 'VARCHAR(100)') AS ResponseId
, r.value('(SurveyId/text())[1]', 'VARCHAR(100)') AS SurveyId
, r.value('(RespondentId/text())[1]', 'VARCHAR(100)') AS RespondentId
--xmlData.A.value('TextAnalyticsData[1]', 'VARCHAR(100)') AS TextAnalyticsData,
, a.value('(Level1/text())[1]', 'VARCHAR(100)') AS Level1
, a.value('(Level2/text())[1]', 'VARCHAR(100)') AS Level2
, a.value('(Level3/text())[1]', 'VARCHAR(100)') AS Level3
, a.value('(Sentiment/text())[1]', 'VARCHAR(100)') AS Sentiment
FROM @XMLToParse.nodes('/Responses/Response') t1(r)
CROSS APPLY t1.r.nodes('TextAnalyticsData/TextAnalyticsItem[Level1/text()]') t2(a)
--ORDER BY ResponseId,
-- RespondentId;
输出
+------------+----------+--------------+------------------+-----------------------+----------+-----------+
| ResponseId | SurveyId | RespondentId | Level1 | Level2 | Level3 | Sentiment |
+------------+----------+--------------+------------------+-----------------------+----------+-----------+
| 7662934 | 123 | 234909 | Values Standards | Better/Best/Brilliant | Positive | 1 |
| 7662934 | 123 | 234909 | All-Behaviors | Positive_ | NULL | 1 |
| 7662078 | 123 | 234826 | Nonactionable | NULL | NULL | 0 |
+------------+----------+--------------+------------------+-----------------------+----------+-----------+
我必须将这个 XML 源压缩到一个 SQL 服务器 table 中。每个源文件有多个 ResponseID,每个 ResponseID 有多个 TextAnalyticsItem。我想限制记录,以便只获取与 ResponseID 相关的 TextAnalyticsItems。但是,交叉应用方法为我提供了所有 ResponseId 和所有 TextAnalyticsItem。如何防止附加记录?
DECLARE @XMLToParse XML;
SET @XMLToParse = '
<Responses>
<Response>
<ResponseId>7662934</ResponseId>
<SurveyId>123</SurveyId>
<RespondentId>234909</RespondentId>
<QuestionId>141757</QuestionId>
<ScaleId>3401</ScaleId>
<AnswerId>17130</AnswerId>
<ResponseMemo>Useful</ResponseMemo>
<ResponseRank>0</ResponseRank>
<ResponseState>0</ResponseState>
<CompletedDate>2020-07-06T09:07:40</CompletedDate>
<ModifiedDate>2020-07-06T09:07:41</ModifiedDate>
<LanguageId>220</LanguageId>
<ResponseNum>0</ResponseNum>
<ResponseDate />
<TextAnalyticsData>
<TextAnalyticsItem>
<Level1>Values Standards</Level1>
<Level2>Better/Best/Brilliant</Level2>
<Level3>Positive</Level3>
<Sentiment>1</Sentiment>
</TextAnalyticsItem>
<TextAnalyticsItem>
<Level1>All-Behaviors</Level1>
<Level2>Positive_</Level2>
<Sentiment>1</Sentiment>
</TextAnalyticsItem>
</TextAnalyticsData>
</Response>
<Response>
<ResponseId>7662078</ResponseId>
<SurveyId>123</SurveyId>
<RespondentId>234826</RespondentId>
<QuestionId>141756</QuestionId>
<ScaleId>3400</ScaleId>
<AnswerId>17129</AnswerId>
<ResponseMemo>Ghjlkk</ResponseMemo>
<ResponseRank>0</ResponseRank>
<ResponseState>0</ResponseState>
<CompletedDate>2020-07-03T07:17:31</CompletedDate>
<ModifiedDate>2020-07-03T07:17:31</ModifiedDate>
<LanguageId>220</LanguageId>
<ResponseNum>0</ResponseNum>
<ResponseDate />
<TextAnalyticsData>
<TextAnalyticsItem>
<Level1>Nonactionable</Level1>
<Sentiment>0</Sentiment>
</TextAnalyticsItem>
</TextAnalyticsData>
</Response>
</Responses>'
SELECT xmlData.A.value('ResponseId[1]', 'VARCHAR(100)') AS ResponseId,
xmlData.A.value('SurveyId[1]', 'VARCHAR(100)') AS SurveyId,
xmlData.A.value('RespondentId[1]', 'VARCHAR(100)') AS RespondentId,
xmlData.A.value('TextAnalyticsData[1]', 'VARCHAR(100)') AS TextAnalyticsData,
tbl1.TxtItems.value('Level1[1]', 'VARCHAR(100)') AS Level1,
tbl1.TxtItems.value('Level2[1]', 'VARCHAR(100)') AS Level2,
tbl1.TxtItems.value('Level3[1]', 'VARCHAR(100)') AS Level3,
tbl1.TxtItems.value('Sentiment[1]', 'VARCHAR(100)') AS Sentiment
FROM @XMLToParse.nodes('Responses/Response/TextAnalyticsData/TextAnalyticsItem') tbl1(TxtItems)
cross apply @XMLToParse.nodes('Responses/Response') xmlData(A)
ORDER BY ResponseId,
RespondentId;
所以不用
ResponseId SurveyId RespondentId TextAnalyticsData Level1 Level2 Level3 Sentiment
---------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------
7662078 123 234826 Nonactionable0 Values Standards Better/Best/BrilliPositive 1
7662078 123 234826 Nonactionable0 All-Behaviors Positive_ NULL 1
7662078 123 234826 Nonactionable0 Nonactionable NULL NULL 0
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 Values Standards Better/Best/BrilliPositive 1
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 Nonactionable NULL NULL 0
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 All-Behaviors Positive_ NULL 1
我想得到
ResponseId SurveyId RespondentId TextAnalyticsData Level1 Level2 Level3 Sentiment
---------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------
7662078 123 234826 Nonactionable0 Nonactionable NULL NULL 0
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 Values Standards Better/Best/BrilliPositive 1
7662934 123 234909 Values StandardsBetter/Best/BrilliantPositive1All-BehaviorsPositive_1 All-Behaviors Positive_ NULL 1
有什么建议吗?谢谢
查看如何实现您的需求。 它模拟一对多关系。
我不确定 TextAnalyticsData 列的目的是什么。这就是我注释掉它的原因。
SQL
DECLARE @XMLToParse XML =
N'<Responses>
<Response>
<ResponseId>7662934</ResponseId>
<SurveyId>123</SurveyId>
<RespondentId>234909</RespondentId>
<QuestionId>141757</QuestionId>
<ScaleId>3401</ScaleId>
<AnswerId>17130</AnswerId>
<ResponseMemo>Useful</ResponseMemo>
<ResponseRank>0</ResponseRank>
<ResponseState>0</ResponseState>
<CompletedDate>2020-07-06T09:07:40</CompletedDate>
<ModifiedDate>2020-07-06T09:07:41</ModifiedDate>
<LanguageId>220</LanguageId>
<ResponseNum>0</ResponseNum>
<ResponseDate />
<TextAnalyticsData>
<TextAnalyticsItem>
<Level1>Values Standards</Level1>
<Level2>Better/Best/Brilliant</Level2>
<Level3>Positive</Level3>
<Sentiment>1</Sentiment>
</TextAnalyticsItem>
<TextAnalyticsItem>
<Level1>All-Behaviors</Level1>
<Level2>Positive_</Level2>
<Sentiment>1</Sentiment>
</TextAnalyticsItem>
</TextAnalyticsData>
</Response>
<Response>
<ResponseId>7662078</ResponseId>
<SurveyId>123</SurveyId>
<RespondentId>234826</RespondentId>
<QuestionId>141756</QuestionId>
<ScaleId>3400</ScaleId>
<AnswerId>17129</AnswerId>
<ResponseMemo>Ghjlkk</ResponseMemo>
<ResponseRank>0</ResponseRank>
<ResponseState>0</ResponseState>
<CompletedDate>2020-07-03T07:17:31</CompletedDate>
<ModifiedDate>2020-07-03T07:17:31</ModifiedDate>
<LanguageId>220</LanguageId>
<ResponseNum>0</ResponseNum>
<ResponseDate />
<TextAnalyticsData>
<TextAnalyticsItem>
<Level1>Nonactionable</Level1>
<Sentiment>0</Sentiment>
</TextAnalyticsItem>
</TextAnalyticsData>
</Response>
</Responses>';
SELECT r.value('(ResponseId/text())[1]', 'VARCHAR(100)') AS ResponseId
, r.value('(SurveyId/text())[1]', 'VARCHAR(100)') AS SurveyId
, r.value('(RespondentId/text())[1]', 'VARCHAR(100)') AS RespondentId
--xmlData.A.value('TextAnalyticsData[1]', 'VARCHAR(100)') AS TextAnalyticsData,
, a.value('(Level1/text())[1]', 'VARCHAR(100)') AS Level1
, a.value('(Level2/text())[1]', 'VARCHAR(100)') AS Level2
, a.value('(Level3/text())[1]', 'VARCHAR(100)') AS Level3
, a.value('(Sentiment/text())[1]', 'VARCHAR(100)') AS Sentiment
FROM @XMLToParse.nodes('/Responses/Response') t1(r)
CROSS APPLY t1.r.nodes('TextAnalyticsData/TextAnalyticsItem[Level1/text()]') t2(a)
--ORDER BY ResponseId,
-- RespondentId;
输出
+------------+----------+--------------+------------------+-----------------------+----------+-----------+
| ResponseId | SurveyId | RespondentId | Level1 | Level2 | Level3 | Sentiment |
+------------+----------+--------------+------------------+-----------------------+----------+-----------+
| 7662934 | 123 | 234909 | Values Standards | Better/Best/Brilliant | Positive | 1 |
| 7662934 | 123 | 234909 | All-Behaviors | Positive_ | NULL | 1 |
| 7662078 | 123 | 234826 | Nonactionable | NULL | NULL | 0 |
+------------+----------+--------------+------------------+-----------------------+----------+-----------+