如何检查 SQL 服务器中的字符串是否格式正确 XML?
How to check whether a string is well-formed XML in SQL Server?
我有一个包含 table 的数据库,其中包含无模式的 XML 列,其中包含任意非 XML 数据(纯文本)。这里有一个示例脚本来生成和填充这样的 table:
CREATE TABLE TestTable (
ID INT NOT NULL IDENTITY (1, 1),
XmlColumn XML NOT NULL,
CONSTRAINT [PK_TestTable] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY]
) ON [PRIMARY]
GO
INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><parent><child /></parent></root>');
GO
我如何(首选)强制只能添加格式正确的 XML?
否则,我如何确定哪些条目格式不正确并将它们 NULL
排除?
我读过几篇建议将 CAST
/ CONVERT
与 TRY CATCH
结合使用的帖子(例如 whosebug.com/questions/14753119),但我从来没有例外, CAST
/ CONVERT
总是成功:
DECLARE @xml AS XML;
DECLARE @isValid AS BIT = 1;
BEGIN TRY
SET @xml = CONVERT(xml, 'Foo')
END TRY
BEGIN CATCH
SET @isValid = 0;
END CATCH;
SELECT @isValid; -- returns 1
有什么想法吗?
奇怪的是,SQL 服务器 可以 将 'foo'
之类的值转换为 XML,因此仅尝试转换实际上不会起作用.但是,您可以做的是检查以确保该值以 '<'
和 '>'
开始和结束(有效的 XML 应该具有)并执行 TRY_CONVERT
:
SELECT CASE WHEN TRY_CONVERT(xml,XMLColumn) IS NOT NULL AND XMLColumn LIKE '<%>' THEN 1 ELSE 0 END AS IsValid
FROM (VALUES ('<root><child /></root>'),
('Foo, this is not XML'),
('<root><parent><child /></parent></root>'))V(XMLColumn);
您可以尝试以下三种方法。
方法#:1
我们将找出是否至少有一个根级节点。
顺便说一句 SQL 服务器允许格式不正确的 XML,即只有 XML 没有根元素的片段。这就是为什么我将该用例添加到样本数据群中的原因。
此外,为了完整性,我添加了一条 XML 评论。
结果的含义:
- 2(或任何大于 1 的数字)- XML 片段
- 1 - 格式正确 XML
- 0 - 没有 XML 个元素、文本或注释节点。
SQL
-- DDL and sample data population, start
DECLARE @TestTable TABLE (ID INT IDENTITY (1, 1) PRIMARY KEY, XmlColumn XML NOT NULL);
INSERT INTO @TestTable (XmlColumn) VALUES
(N'<root><child /></root>'),
(N'<city>Miami</city><city>Orlando</city>'),
(N'Foo, this is not XML'),
(N'<root><child /></root>Foo'),
(N'<!-- -->'),
(N'<root><parent><child /></parent></root>');
-- DDL and sample data population, end
SELECT *
, XmlColumn.value('count(/*)', 'INT') AS Result
FROM @TestTable;
输出
+----+-----------------------------------------+-----------+
| ID | XmlColumn | Result |
+----+-----------------------------------------+-----------+
| 1 | <root><child /></root> | 1 |
| 2 | <city>Miami</city><city>Orlando</city> | 2 |
| 3 | Foo, this is not XML | 0 |
| 4 | <!-- --> | 0 |
| 5 | <root><parent><child /></parent></root> | 1 |
+----+-----------------------------------------+-----------+
方法#:2
通过使用 instance of element()
XQuery 构造
SELECT *
, XmlColumn.query('<root>{
for $x in /*
return
if ($x instance of element()) then <r/> else ()
}</root>').value('count(/root/r)', 'INT') AS Result
, XmlColumn.query('for $x in .
return if ($x eq /*[1]) then "well-formed" else "not well- formed"').value('.','VARCHAR(20)') AS [well-formed]
FROM @TestTable;
输出
+----+-----------------------------------------+--------+-----------------+
| ID | XmlColumn | Result | well-formed |
+----+-----------------------------------------+--------+-----------------+
| 1 | <root><child /></root> | 1 | well-formed |
| 2 | <city>Miami</city><city>Orlando</city> | 2 | not well-formed |
| 3 | Foo, this is not XML | 0 | not well-formed |
| 4 | <root><child /></root>Foo | 1 | not well-formed |
| 5 | <!-- --> | 0 | not well-formed |
| 6 | <root><parent><child /></parent></root> | 1 | well-formed |
+----+-----------------------------------------+--------+-----------------+
方法#: 3
一个完整的解决方案。该算法基于计数比较:任何类型节点的计数与仅元素的计数。此外,它在 NodeList 列中提供了节点类型的细分,以便于了解正在发生的事情。
;WITH rs AS
(
SELECT *
, XmlColumn.value('count(/node())', 'INT') AS NodeCount -- any type of nodes
, XmlColumn.value('count(/*)', 'INT') AS ElementCount -- elements only
, XmlColumn.query('
for $x in /node()
return if ($x instance of element()) then text {"element()"}
else if ($x instance of text()) then text {"text()"}
else if ($x instance of comment()) then text {"comment()"}
else if ($x instance of processing-instruction()) then text {"processing-instruction()"}
else ()
') AS NodeList
FROM @TestTable
)
SELECT *
, CASE
WHEN NodeCount = 1 AND ElementCount = 1 THEN 'well-formed'
WHEN NodeCount = 2 AND ElementCount = 1
AND LEFT(NodeList, 24) = 'processing-instruction()' THEN 'well-formed'
WHEN NodeCount > 1 AND (ElementCount = NodeCount) THEN 'XML fragment'
WHEN NodeCount > ElementCount THEN 'not well-formed'
ELSE '???'
END AS Result
FROM rs;
输出
+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
| ID | XmlColumn | NodeCount | ElementCount | NodeList | Result |
+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
| 1 | <root><child /></root> | 1 | 1 | element() | well-formed |
| 2 | <city>Miami</city><city>Orlando</city> | 2 | 2 | element()element() | XML fragment |
| 3 | Foo, this is not XML | 1 | 0 | text() | not well-formed |
| 4 | <root><child /></root>Foo | 2 | 1 | element()text() | not well-formed |
| 5 | <!-- --> | 1 | 0 | comment() | not well-formed |
| 6 | <root><parent><child /></parent></root> | 1 | 1 | element() | well-formed |
+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
use tempdb
go
drop table if exists TestTable;
drop table if exists TestTablewithcheck;
drop function if exists dbo.mywellformedxml
go
CREATE TABLE TestTable (
ID INT NOT NULL IDENTITY (1, 1),
XmlColumn XML NOT NULL,
CONSTRAINT [PK_TestTable] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY]
) ON [PRIMARY]
GO
INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><parent><child /></parent></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root xmlns="test"><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<!-- comment -->');
INSERT INTO TestTable (XmlColumn) VALUES ('<!-- comment --><root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<noroot><child /></noroot><noroot><child /></noroot>');
INSERT INTO TestTable (XmlColumn) VALUES ('<?pi my processing instruction?>');
GO
create or alter function dbo.mywellformedxml(@xml xml)
returns bit
with schemabinding
as
begin
return
(
isnull(
(
select 1
where @xml.exist('/*[1]') = 1 --root..
and @xml.exist('/*[2]') = 0 --..only..
and @xml.exist('text()') = 0 --..without text..
), 0)
)
end
go
CREATE TABLE TestTablewithcheck (
ID INT NOT NULL IDENTITY (1, 1),
XmlColumn XML NOT NULL,
CONSTRAINT [PK_TestTablewithcheck] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY],
constraint chkwfxml check(dbo.mywellformedxml(XmlColumn) = 1)
) ON [PRIMARY]
GO
declare @i int = 1
while @i <= 9
begin
insert into TestTablewithcheck(XmlColumn)
select XmlColumn
from TestTable
where id = @i;
select @i = @i + 1;
end
go
select *
from TestTablewithcheck;
go
select *, dbo.mywellformedxml(XmlColumn) as wfxml
from TestTable
go
drop table if exists TestTable;
drop table if exists TestTablewithcheck;
drop function if exists dbo.mywellformedxml
go
我有一个包含 table 的数据库,其中包含无模式的 XML 列,其中包含任意非 XML 数据(纯文本)。这里有一个示例脚本来生成和填充这样的 table:
CREATE TABLE TestTable (
ID INT NOT NULL IDENTITY (1, 1),
XmlColumn XML NOT NULL,
CONSTRAINT [PK_TestTable] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY]
) ON [PRIMARY]
GO
INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><parent><child /></parent></root>');
GO
我如何(首选)强制只能添加格式正确的 XML?
否则,我如何确定哪些条目格式不正确并将它们 NULL
排除?
我读过几篇建议将 CAST
/ CONVERT
与 TRY CATCH
结合使用的帖子(例如 whosebug.com/questions/14753119),但我从来没有例外, CAST
/ CONVERT
总是成功:
DECLARE @xml AS XML;
DECLARE @isValid AS BIT = 1;
BEGIN TRY
SET @xml = CONVERT(xml, 'Foo')
END TRY
BEGIN CATCH
SET @isValid = 0;
END CATCH;
SELECT @isValid; -- returns 1
有什么想法吗?
奇怪的是,SQL 服务器 可以 将 'foo'
之类的值转换为 XML,因此仅尝试转换实际上不会起作用.但是,您可以做的是检查以确保该值以 '<'
和 '>'
开始和结束(有效的 XML 应该具有)并执行 TRY_CONVERT
:
SELECT CASE WHEN TRY_CONVERT(xml,XMLColumn) IS NOT NULL AND XMLColumn LIKE '<%>' THEN 1 ELSE 0 END AS IsValid
FROM (VALUES ('<root><child /></root>'),
('Foo, this is not XML'),
('<root><parent><child /></parent></root>'))V(XMLColumn);
您可以尝试以下三种方法。
方法#:1
我们将找出是否至少有一个根级节点。 顺便说一句 SQL 服务器允许格式不正确的 XML,即只有 XML 没有根元素的片段。这就是为什么我将该用例添加到样本数据群中的原因。
此外,为了完整性,我添加了一条 XML 评论。
结果的含义:
- 2(或任何大于 1 的数字)- XML 片段
- 1 - 格式正确 XML
- 0 - 没有 XML 个元素、文本或注释节点。
SQL
-- DDL and sample data population, start
DECLARE @TestTable TABLE (ID INT IDENTITY (1, 1) PRIMARY KEY, XmlColumn XML NOT NULL);
INSERT INTO @TestTable (XmlColumn) VALUES
(N'<root><child /></root>'),
(N'<city>Miami</city><city>Orlando</city>'),
(N'Foo, this is not XML'),
(N'<root><child /></root>Foo'),
(N'<!-- -->'),
(N'<root><parent><child /></parent></root>');
-- DDL and sample data population, end
SELECT *
, XmlColumn.value('count(/*)', 'INT') AS Result
FROM @TestTable;
输出
+----+-----------------------------------------+-----------+
| ID | XmlColumn | Result |
+----+-----------------------------------------+-----------+
| 1 | <root><child /></root> | 1 |
| 2 | <city>Miami</city><city>Orlando</city> | 2 |
| 3 | Foo, this is not XML | 0 |
| 4 | <!-- --> | 0 |
| 5 | <root><parent><child /></parent></root> | 1 |
+----+-----------------------------------------+-----------+
方法#:2
通过使用 instance of element()
XQuery 构造
SELECT *
, XmlColumn.query('<root>{
for $x in /*
return
if ($x instance of element()) then <r/> else ()
}</root>').value('count(/root/r)', 'INT') AS Result
, XmlColumn.query('for $x in .
return if ($x eq /*[1]) then "well-formed" else "not well- formed"').value('.','VARCHAR(20)') AS [well-formed]
FROM @TestTable;
输出
+----+-----------------------------------------+--------+-----------------+
| ID | XmlColumn | Result | well-formed |
+----+-----------------------------------------+--------+-----------------+
| 1 | <root><child /></root> | 1 | well-formed |
| 2 | <city>Miami</city><city>Orlando</city> | 2 | not well-formed |
| 3 | Foo, this is not XML | 0 | not well-formed |
| 4 | <root><child /></root>Foo | 1 | not well-formed |
| 5 | <!-- --> | 0 | not well-formed |
| 6 | <root><parent><child /></parent></root> | 1 | well-formed |
+----+-----------------------------------------+--------+-----------------+
方法#: 3
一个完整的解决方案。该算法基于计数比较:任何类型节点的计数与仅元素的计数。此外,它在 NodeList 列中提供了节点类型的细分,以便于了解正在发生的事情。
;WITH rs AS
(
SELECT *
, XmlColumn.value('count(/node())', 'INT') AS NodeCount -- any type of nodes
, XmlColumn.value('count(/*)', 'INT') AS ElementCount -- elements only
, XmlColumn.query('
for $x in /node()
return if ($x instance of element()) then text {"element()"}
else if ($x instance of text()) then text {"text()"}
else if ($x instance of comment()) then text {"comment()"}
else if ($x instance of processing-instruction()) then text {"processing-instruction()"}
else ()
') AS NodeList
FROM @TestTable
)
SELECT *
, CASE
WHEN NodeCount = 1 AND ElementCount = 1 THEN 'well-formed'
WHEN NodeCount = 2 AND ElementCount = 1
AND LEFT(NodeList, 24) = 'processing-instruction()' THEN 'well-formed'
WHEN NodeCount > 1 AND (ElementCount = NodeCount) THEN 'XML fragment'
WHEN NodeCount > ElementCount THEN 'not well-formed'
ELSE '???'
END AS Result
FROM rs;
输出
+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
| ID | XmlColumn | NodeCount | ElementCount | NodeList | Result |
+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
| 1 | <root><child /></root> | 1 | 1 | element() | well-formed |
| 2 | <city>Miami</city><city>Orlando</city> | 2 | 2 | element()element() | XML fragment |
| 3 | Foo, this is not XML | 1 | 0 | text() | not well-formed |
| 4 | <root><child /></root>Foo | 2 | 1 | element()text() | not well-formed |
| 5 | <!-- --> | 1 | 0 | comment() | not well-formed |
| 6 | <root><parent><child /></parent></root> | 1 | 1 | element() | well-formed |
+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
use tempdb
go
drop table if exists TestTable;
drop table if exists TestTablewithcheck;
drop function if exists dbo.mywellformedxml
go
CREATE TABLE TestTable (
ID INT NOT NULL IDENTITY (1, 1),
XmlColumn XML NOT NULL,
CONSTRAINT [PK_TestTable] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY]
) ON [PRIMARY]
GO
INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><parent><child /></parent></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root xmlns="test"><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<!-- comment -->');
INSERT INTO TestTable (XmlColumn) VALUES ('<!-- comment --><root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<noroot><child /></noroot><noroot><child /></noroot>');
INSERT INTO TestTable (XmlColumn) VALUES ('<?pi my processing instruction?>');
GO
create or alter function dbo.mywellformedxml(@xml xml)
returns bit
with schemabinding
as
begin
return
(
isnull(
(
select 1
where @xml.exist('/*[1]') = 1 --root..
and @xml.exist('/*[2]') = 0 --..only..
and @xml.exist('text()') = 0 --..without text..
), 0)
)
end
go
CREATE TABLE TestTablewithcheck (
ID INT NOT NULL IDENTITY (1, 1),
XmlColumn XML NOT NULL,
CONSTRAINT [PK_TestTablewithcheck] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY],
constraint chkwfxml check(dbo.mywellformedxml(XmlColumn) = 1)
) ON [PRIMARY]
GO
declare @i int = 1
while @i <= 9
begin
insert into TestTablewithcheck(XmlColumn)
select XmlColumn
from TestTable
where id = @i;
select @i = @i + 1;
end
go
select *
from TestTablewithcheck;
go
select *, dbo.mywellformedxml(XmlColumn) as wfxml
from TestTable
go
drop table if exists TestTable;
drop table if exists TestTablewithcheck;
drop function if exists dbo.mywellformedxml
go