如何检查 SQL 服务器中的字符串是否格式正确 XML?

How to check whether a string is well-formed XML in SQL Server?

我有一个包含 table 的数据库,其中包含无模式的 XML 列,其中包含任意非 XML 数据(纯文本)。这里有一个示例脚本来生成和填充这样的 table:

CREATE TABLE TestTable (
  ID INT NOT NULL IDENTITY (1, 1), 
  XmlColumn XML NOT NULL, 
  CONSTRAINT [PK_TestTable] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY]
) ON [PRIMARY]
GO

INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><parent><child /></parent></root>');
GO

我如何(首选)强制只能添加格式正确的 XML? 否则,我如何确定哪些条目格式不正确并将它们 NULL 排除?

我读过几篇建议将 CAST / CONVERTTRY CATCH 结合使用的帖子(例如 whosebug.com/questions/14753119),但我从来没有例外, CAST / CONVERT 总是成功:

DECLARE @xml AS XML;
DECLARE @isValid AS BIT = 1;
BEGIN TRY
  SET @xml = CONVERT(xml, 'Foo')
END TRY
BEGIN CATCH
  SET @isValid = 0;
END CATCH;
SELECT @isValid; -- returns 1

有什么想法吗?

奇怪的是,SQL 服务器 可以 'foo' 之类的值转换为 XML,因此仅尝试转换实际上不会起作用.但是,您可以做的是检查以确保该值以 '<''>' 开始和结束(有效的 XML 应该具有)并执行 TRY_CONVERT:

SELECT CASE WHEN TRY_CONVERT(xml,XMLColumn) IS NOT NULL AND XMLColumn LIKE '<%>' THEN 1 ELSE 0 END AS IsValid
FROM (VALUES ('<root><child /></root>'),
             ('Foo, this is not XML'),
             ('<root><parent><child /></parent></root>'))V(XMLColumn);

db<>fiddle

您可以尝试以下三种方法。

方法#:1

我们将找出是否至少有一个根级节点。 顺便说一句 SQL 服务器允许格式不正确的 XML,即只有 XML 没有根元素的片段。这就是为什么我将该用例添加到样本数据群中的原因。

此外,为了完整性,我添加了一条 XML 评论。

结果的含义:

  • 2(或任何大于 1 的数字)- XML 片段
  • 1 - 格式正确 XML
  • 0 - 没有 XML 个元素、文本或注释节点。

SQL

-- DDL and sample data population, start
DECLARE @TestTable TABLE (ID INT IDENTITY (1, 1) PRIMARY KEY, XmlColumn XML NOT NULL);
INSERT INTO @TestTable (XmlColumn) VALUES 
(N'<root><child /></root>'),
(N'<city>Miami</city><city>Orlando</city>'),
(N'Foo, this is not XML'),
(N'<root><child /></root>Foo'),
(N'<!-- -->'),
(N'<root><parent><child /></parent></root>');
-- DDL and sample data population, end

SELECT * 
    , XmlColumn.value('count(/*)', 'INT') AS Result
FROM @TestTable;

输出

+----+-----------------------------------------+-----------+
| ID |                XmlColumn                | Result    |
+----+-----------------------------------------+-----------+
|  1 | <root><child /></root>                  |         1 |
|  2 | <city>Miami</city><city>Orlando</city>  |         2 |
|  3 | Foo, this is not XML                    |         0 |
|  4 | <!-- -->                                |         0 |
|  5 | <root><parent><child /></parent></root> |         1 |
+----+-----------------------------------------+-----------+

方法#:2

通过使用 instance of element() XQuery 构造

SELECT * 
    , XmlColumn.query('<root>{
        for $x in /*
        return
        if ($x instance of element()) then <r/> else ()
        }</root>').value('count(/root/r)', 'INT') AS Result
    , XmlColumn.query('for $x in .
        return if ($x eq /*[1]) then "well-formed" else "not well- formed"').value('.','VARCHAR(20)') AS [well-formed]
FROM @TestTable;

输出

+----+-----------------------------------------+--------+-----------------+
| ID |                XmlColumn                | Result |   well-formed   |
+----+-----------------------------------------+--------+-----------------+
|  1 | <root><child /></root>                  |      1 | well-formed     |
|  2 | <city>Miami</city><city>Orlando</city>  |      2 | not well-formed |
|  3 | Foo, this is not XML                    |      0 | not well-formed |
|  4 | <root><child /></root>Foo               |      1 | not well-formed |
|  5 | <!-- -->                                |      0 | not well-formed |
|  6 | <root><parent><child /></parent></root> |      1 | well-formed     |
+----+-----------------------------------------+--------+-----------------+

方法#: 3

一个完整的解决方案。该算法基于计数比较:任何类型节点的计数与仅元素的计数。此外,它在 NodeList 列中提供了节点类型的细分,以便于了解正在发生的事情。

;WITH rs AS
(
SELECT * 
    , XmlColumn.value('count(/node())', 'INT') AS NodeCount -- any type of nodes
    , XmlColumn.value('count(/*)', 'INT') AS ElementCount -- elements only
    , XmlColumn.query('
    for $x in /node()
    return if ($x instance of element()) then text {"element()"}
    else if ($x instance of text()) then text {"text()"}
    else if ($x instance of comment()) then text {"comment()"}
    else if ($x instance of processing-instruction()) then text {"processing-instruction()"}
    else ()
    ') AS NodeList
FROM @TestTable
)
SELECT *
    , CASE
        WHEN NodeCount = 1 AND ElementCount = 1 THEN 'well-formed'
        WHEN NodeCount = 2 AND ElementCount = 1 
           AND LEFT(NodeList, 24) = 'processing-instruction()' THEN 'well-formed'
        WHEN NodeCount > 1 AND (ElementCount = NodeCount) THEN 'XML fragment'
        WHEN NodeCount > ElementCount THEN 'not well-formed'
        ELSE '???'
       END AS Result 
FROM rs;

输出

+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
| ID |                XmlColumn                | NodeCount | ElementCount |      NodeList      |     Result      |
+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
|  1 | <root><child /></root>                  |         1 |            1 | element()          | well-formed     |
|  2 | <city>Miami</city><city>Orlando</city>  |         2 |            2 | element()element() | XML fragment    |
|  3 | Foo, this is not XML                    |         1 |            0 | text()             | not well-formed |
|  4 | <root><child /></root>Foo               |         2 |            1 | element()text()    | not well-formed |
|  5 | <!-- -->                                |         1 |            0 | comment()          | not well-formed |
|  6 | <root><parent><child /></parent></root> |         1 |            1 | element()          | well-formed     |
+----+-----------------------------------------+-----------+--------------+--------------------+-----------------+
use tempdb
go

drop table if exists TestTable;
drop table if exists TestTablewithcheck;
drop function if exists dbo.mywellformedxml
go

CREATE TABLE TestTable (
  ID INT NOT NULL IDENTITY (1, 1), 
  XmlColumn XML NOT NULL, 
  CONSTRAINT [PK_TestTable] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY]
) ON [PRIMARY]
GO

INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><parent><child /></parent></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<root><child /></root>Foo, this is not XML');
INSERT INTO TestTable (XmlColumn) VALUES ('<root xmlns="test"><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<!-- comment -->');
INSERT INTO TestTable (XmlColumn) VALUES ('<!-- comment --><root><child /></root>');
INSERT INTO TestTable (XmlColumn) VALUES ('<noroot><child /></noroot><noroot><child /></noroot>');
INSERT INTO TestTable (XmlColumn) VALUES ('<?pi my processing instruction?>');
GO

create or alter function dbo.mywellformedxml(@xml xml)
returns bit
with schemabinding
as
begin
    return
    ( 
        isnull(
        (
        select 1
        where @xml.exist('/*[1]') = 1 --root..
        and @xml.exist('/*[2]') = 0 --..only..
        and @xml.exist('text()') = 0 --..without text..
        ), 0)
    )
end
go

CREATE TABLE TestTablewithcheck (
  ID INT NOT NULL IDENTITY (1, 1), 
  XmlColumn XML NOT NULL, 
  CONSTRAINT [PK_TestTablewithcheck] PRIMARY KEY CLUSTERED (ID ASC) ON [PRIMARY],
  constraint chkwfxml check(dbo.mywellformedxml(XmlColumn) = 1)
) ON [PRIMARY]
GO

declare @i int = 1
while @i <= 9
begin
    insert into TestTablewithcheck(XmlColumn)
    select XmlColumn
    from TestTable
    where id = @i;
    
    select @i = @i + 1;
end
go

select *
from TestTablewithcheck;
go


select *, dbo.mywellformedxml(XmlColumn) as wfxml
from TestTable
go

drop table if exists TestTable;
drop table if exists TestTablewithcheck;
drop function if exists dbo.mywellformedxml
go