SQL XML 在 T-SQL 中解析
SQL XML Parsing in T-SQL
我有 SQL 服务器 table,其中 XML 列的数据如下所示。 table 中将有多个记录。
假设 table T1 和 C1 列。
我们的要求是我们需要获取最新数据(最多 datereported、datecreated、dateupdated XML 单个部分的节点,如 AAA、BB、CC)。
<XMLDoc>
<AAA>
<Name>Name_A</Name>
<Value>Val_A</Value>
<dateReported>1/1/2001</dateReported>
</AAA>
<AAA>
<Name>Name_B</Name>
<Value>Val_B</Value>
<dateReported>1/1/2014</dateReported>
</AAA>
<AAA>
<Name>Name_C</Name>
<Value>Val_C</Value>
<dateReported>1/1/2012</dateReported>
</AAA>
<AAA>
<Name>Name_D</Name>
<Value>Val_D</Value>
<dateReported>1/1/2011</dateReported>
</AAA>
<BB>
<ID>112</ID>
<dateCreated>1/1/2011</dateCreated>
</BB>
<BB>
<ID>121</ID>
<dateCreated>1/1/2012</dateCreated>
</BB>
<BB>
<ID>12</ID>
<dateCreated>1/1/2015</dateCreated>
</BB>
<CC>
<Type>XML</Type>
<dateUpdated>1/1/2015</dateCreated>
</CC>
<CC>
<Type>TXT</Type>
<dateUpdated>3/3/2015</dateUpdated>
</CC>
<CC>
<Type>XLS</Type>
<dateUpdated>2/2/2015</dateUpdated>
</CC>
</XMLDoc>
我有SQL写在下面
SELECT
ID,
Name = C1.value('(/XMLDoc/AAA/Name)[1]', 'varchar(100)'),
Value = C1.value('(/XMLDoc/AAA/Value)[1]', 'varchar(100)'),
BB_ID = C1.value('(/XMLDoc/BB/ID)[1]', 'int'),
CC_Type = C1.value('(/XMLDoc/CC/Type)[1]', 'varchar(50)')
From T1
我们需要输出为
Id Name, Value, BB_ID, CC_Type
1 Name_B Val_B 12 TXT
需要您的意见
您的 XML 示例大错特错 - 很多结束标签并不是真正的结束标签,有几个与开始标签不匹配,并且中间有一个 </x>
没有任何开始标签......下次,请尝试确保你 post 是 实际有效 XML!
尝试使用此代码:
DECLARE @T1 TABLE (ID INT NOT NULL, XmlContent XML)
INSERT INTO @T1 VALUES(1, '<XMLDoc>
<AAA>
<Name>Name_A</Name>
<Value>Val_A</Value>
<dateReported>1/1/2001</dateReported>
</AAA>
<AAA>
<Name>Name_B</Name>
<Value>Val_B</Value>
<dateReported>1/1/2014</dateReported>
</AAA>
<AAA>
<Name>Name_C</Name>
<Value>Val_C</Value>
<dateReported>1/1/2012</dateReported>
</AAA>
<AAA>
<Name>Name_D</Name>
<Value>Val_D</Value>
<dateReported>1/1/2011</dateReported>
</AAA>
<BB>
<ID>112</ID>
<dateCreated>1/1/2011</dateCreated>
</BB>
<BB>
<ID>121</ID>
<dateCreated>1/1/2012</dateCreated>
</BB>
<BB>
<ID>12</ID>
<dateCreated>1/1/2015</dateCreated>
</BB>
<CC>
<Type>XML</Type>
<dateUpdated>1/1/2015</dateUpdated>
</CC>
<CC>
<Type>TXT</Type>
<dateUpdated>3/3/2015</dateUpdated>
</CC>
<CC>
<Type>XLS</Type>
<dateUpdated>2/2/2015</dateUpdated>
</CC>
</XMLDoc>')
;WITH XmlData AS
(
SELECT
ID,
NodeType = 'AAA',
RelevantDate = XC.value('(dateReported)[1]', 'datetime'),
Name = XC.value('(Name)[1]', 'varchar(50)'),
Value = XC.value('(Value)[1]', 'varchar(50)')
FROM
@T1
CROSS APPLY
XmlContent.nodes('/XMLDoc/AAA') XT(XC)
UNION
SELECT
ID,
NodeType = 'BB',
RelevantDate = XC.value('(dateCreated)[1]', 'datetime'),
Name = null,
Value = XC.value('(ID)[1]', 'varchar(50)')
FROM
@T1
CROSS APPLY
XmlContent.nodes('/XMLDoc/BB') XT(XC)
UNION
SELECT
ID,
NodeType = 'CC',
RelevantDate = XC.value('(dateUpdated)[1]', 'datetime'),
Name = XC.value('(Type)[1]', 'varchar(50)'),
Value = null
FROM
@T1
CROSS APPLY
XmlContent.nodes('/XMLDoc/CC') XT(XC)
)
SELECT
xd.ID,
Name = MAX(Name),
Value = MAX(Value),
BB_ID = MAX(BB_ID),
CC_Type = MAX(CC_Type)
FROM
XmlData xd
WHERE
xd.RelevantDate = (SELECT MAX(xd1.RelevantDate) FROM XmlData xd1 WHERE xd1.NodeType = xd.NodeType)
GROUP BY
xd.ID
这给我输出:
基本上,根据你的 XML,我正在创建一个 CTE(通用 Table 表达式),它为每个 "node types" - <AAA>
提取相关信息] 节点、<BB>
和 <CC>
节点。然后,我可以通过从该 CTE 中选择来获取每个组的日期 MAX()
。
我在互联网上搜索并找到处理最大值的好方法:
这是 table 我用过的:
CREATE TABLE #xmlTable (ID INT, xmlData XML)
Select:
SELECT
ID
, xmlData.value ('(/XMLDoc/AAA[not(/XMLDoc/AAA/dateReported > dateReported)]/Name)[1]', 'varchar(100)') AS Name
, xmlData.value ('(/XMLDoc/AAA[not(/XMLDoc/AAA/dateReported > dateReported)]/Value)[1]', 'varchar(100)') AS Value
, xmlData.value ('(/XMLDoc/BB[not(/XMLDoc/BB/dateCreated > dateCreated)]/ID)[1]', 'INT') AS BB_ID
, xmlData.value ('(/XMLDoc/CC[not(/XMLDoc/CC/dateUpdated > dateUpdated)]/Type)[1]', 'varchar(50)') AS CC_Type
FROM #xmlTable AS xt
最终结果:
ID Name Value BB_ID CC_Type
1 Name_B Val_B 121 TXT
我在这个例子中找到的解决方案
我有 SQL 服务器 table,其中 XML 列的数据如下所示。 table 中将有多个记录。 假设 table T1 和 C1 列。
我们的要求是我们需要获取最新数据(最多 datereported、datecreated、dateupdated XML 单个部分的节点,如 AAA、BB、CC)。
<XMLDoc>
<AAA>
<Name>Name_A</Name>
<Value>Val_A</Value>
<dateReported>1/1/2001</dateReported>
</AAA>
<AAA>
<Name>Name_B</Name>
<Value>Val_B</Value>
<dateReported>1/1/2014</dateReported>
</AAA>
<AAA>
<Name>Name_C</Name>
<Value>Val_C</Value>
<dateReported>1/1/2012</dateReported>
</AAA>
<AAA>
<Name>Name_D</Name>
<Value>Val_D</Value>
<dateReported>1/1/2011</dateReported>
</AAA>
<BB>
<ID>112</ID>
<dateCreated>1/1/2011</dateCreated>
</BB>
<BB>
<ID>121</ID>
<dateCreated>1/1/2012</dateCreated>
</BB>
<BB>
<ID>12</ID>
<dateCreated>1/1/2015</dateCreated>
</BB>
<CC>
<Type>XML</Type>
<dateUpdated>1/1/2015</dateCreated>
</CC>
<CC>
<Type>TXT</Type>
<dateUpdated>3/3/2015</dateUpdated>
</CC>
<CC>
<Type>XLS</Type>
<dateUpdated>2/2/2015</dateUpdated>
</CC>
</XMLDoc>
我有SQL写在下面
SELECT
ID,
Name = C1.value('(/XMLDoc/AAA/Name)[1]', 'varchar(100)'),
Value = C1.value('(/XMLDoc/AAA/Value)[1]', 'varchar(100)'),
BB_ID = C1.value('(/XMLDoc/BB/ID)[1]', 'int'),
CC_Type = C1.value('(/XMLDoc/CC/Type)[1]', 'varchar(50)')
From T1
我们需要输出为
Id Name, Value, BB_ID, CC_Type
1 Name_B Val_B 12 TXT
需要您的意见
您的 XML 示例大错特错 - 很多结束标签并不是真正的结束标签,有几个与开始标签不匹配,并且中间有一个 </x>
没有任何开始标签......下次,请尝试确保你 post 是 实际有效 XML!
尝试使用此代码:
DECLARE @T1 TABLE (ID INT NOT NULL, XmlContent XML)
INSERT INTO @T1 VALUES(1, '<XMLDoc>
<AAA>
<Name>Name_A</Name>
<Value>Val_A</Value>
<dateReported>1/1/2001</dateReported>
</AAA>
<AAA>
<Name>Name_B</Name>
<Value>Val_B</Value>
<dateReported>1/1/2014</dateReported>
</AAA>
<AAA>
<Name>Name_C</Name>
<Value>Val_C</Value>
<dateReported>1/1/2012</dateReported>
</AAA>
<AAA>
<Name>Name_D</Name>
<Value>Val_D</Value>
<dateReported>1/1/2011</dateReported>
</AAA>
<BB>
<ID>112</ID>
<dateCreated>1/1/2011</dateCreated>
</BB>
<BB>
<ID>121</ID>
<dateCreated>1/1/2012</dateCreated>
</BB>
<BB>
<ID>12</ID>
<dateCreated>1/1/2015</dateCreated>
</BB>
<CC>
<Type>XML</Type>
<dateUpdated>1/1/2015</dateUpdated>
</CC>
<CC>
<Type>TXT</Type>
<dateUpdated>3/3/2015</dateUpdated>
</CC>
<CC>
<Type>XLS</Type>
<dateUpdated>2/2/2015</dateUpdated>
</CC>
</XMLDoc>')
;WITH XmlData AS
(
SELECT
ID,
NodeType = 'AAA',
RelevantDate = XC.value('(dateReported)[1]', 'datetime'),
Name = XC.value('(Name)[1]', 'varchar(50)'),
Value = XC.value('(Value)[1]', 'varchar(50)')
FROM
@T1
CROSS APPLY
XmlContent.nodes('/XMLDoc/AAA') XT(XC)
UNION
SELECT
ID,
NodeType = 'BB',
RelevantDate = XC.value('(dateCreated)[1]', 'datetime'),
Name = null,
Value = XC.value('(ID)[1]', 'varchar(50)')
FROM
@T1
CROSS APPLY
XmlContent.nodes('/XMLDoc/BB') XT(XC)
UNION
SELECT
ID,
NodeType = 'CC',
RelevantDate = XC.value('(dateUpdated)[1]', 'datetime'),
Name = XC.value('(Type)[1]', 'varchar(50)'),
Value = null
FROM
@T1
CROSS APPLY
XmlContent.nodes('/XMLDoc/CC') XT(XC)
)
SELECT
xd.ID,
Name = MAX(Name),
Value = MAX(Value),
BB_ID = MAX(BB_ID),
CC_Type = MAX(CC_Type)
FROM
XmlData xd
WHERE
xd.RelevantDate = (SELECT MAX(xd1.RelevantDate) FROM XmlData xd1 WHERE xd1.NodeType = xd.NodeType)
GROUP BY
xd.ID
这给我输出:
基本上,根据你的 XML,我正在创建一个 CTE(通用 Table 表达式),它为每个 "node types" - <AAA>
提取相关信息] 节点、<BB>
和 <CC>
节点。然后,我可以通过从该 CTE 中选择来获取每个组的日期 MAX()
。
我在互联网上搜索并找到处理最大值的好方法:
这是 table 我用过的:
CREATE TABLE #xmlTable (ID INT, xmlData XML)
Select:
SELECT
ID
, xmlData.value ('(/XMLDoc/AAA[not(/XMLDoc/AAA/dateReported > dateReported)]/Name)[1]', 'varchar(100)') AS Name
, xmlData.value ('(/XMLDoc/AAA[not(/XMLDoc/AAA/dateReported > dateReported)]/Value)[1]', 'varchar(100)') AS Value
, xmlData.value ('(/XMLDoc/BB[not(/XMLDoc/BB/dateCreated > dateCreated)]/ID)[1]', 'INT') AS BB_ID
, xmlData.value ('(/XMLDoc/CC[not(/XMLDoc/CC/dateUpdated > dateUpdated)]/Type)[1]', 'varchar(50)') AS CC_Type
FROM #xmlTable AS xt
最终结果:
ID Name Value BB_ID CC_Type
1 Name_B Val_B 121 TXT
我在这个例子中找到的解决方案