提取 CDATA 中缺少标签的 XML 个节点
Extract XML nodes in CDATA with missing tags
我有 XML,其中包含 html 标签,但 closed.So 我在其中嵌入了 CDATA,因此它不会出错。如何提取不同的 XML 个节点。
CREATE tABLE dbo.temp(ID int, input varchar(max))
INSERT into dbo.temp(1,'<?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE title [ <!ELEMENT title ANY > <!ENTITY xxe SYSTEM "https://grepular.com/xxe.txt" >]>
<customer>
<![CDATA[<TransmissionId>5555</TransmissionId>
<HeadLine>Hair Loss & Growth Treatments and Products Sales Market Research Report 2016-2021</p></HeadLine>
]]></customer>')
正如我们所见,标题节点中有一个 </p>
没有匹配的
。我如何从中提取节点
谢谢
先生
您可以使用 left、right 和 charindex 等字符串函数,但我觉得那很乏味。如果您对 TVF 持开放态度,它将根据提供的模式提取值。我要补充一点,这不限于 XML 标签。
作为 TVF 将返回一个或多个值。
例子
Declare @YourTable table (ID int,input varchar(max))
Insert Into @YourTable Values
(1,'<?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE title [ <!ELEMENT title ANY > <!ENTITY xxe SYSTEM "https://grepular.com/xxe.txt" >]>
<customer>
<![CDATA[<TransmissionId>5555</TransmissionId>
<HeadLine>Hair Loss & Growth Treatments and Products Sales Market Research Report 2016-2021</p></HeadLine>
]]></customer>')
Select A.ID
,B.RetVal
From @YourTable A
Cross Apply [dbo].[udf-Str-Extract](A.Input,'<HeadLine>','</HeadLine>') B
Returns
ID RetVal
1 Hair Loss & Growth Treatments and Products Sales Market Research Report 2016-2021</p>
感兴趣的 UDF
CREATE FUNCTION [dbo].[udf-Str-Extract] (@String varchar(max),@Delimiter1 varchar(100),@Delimiter2 varchar(100))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 N1,cte1 N2,cte1 N3,cte1 N4,cte1 N5,cte1 N6) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter1) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter1)) = @Delimiter1),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter1,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By N)
,RetPos = N
,RetLen = charindex(@Delimiter2,RetVal)-1
,RetVal = left(RetVal,charindex(@Delimiter2,RetVal)-1)
From (Select A.N,RetVal = ltrim(rtrim(Substring(@String, A.N, A.L))) From cte4 A ) A
Where charindex(@Delimiter2,RetVal)>1
)
/*
Max Length of String 1MM characters
Declare @String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[udf-Str-Extract] (@String,'[[',']]')
*/
我有 XML,其中包含 html 标签,但 closed.So 我在其中嵌入了 CDATA,因此它不会出错。如何提取不同的 XML 个节点。
CREATE tABLE dbo.temp(ID int, input varchar(max))
INSERT into dbo.temp(1,'<?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE title [ <!ELEMENT title ANY > <!ENTITY xxe SYSTEM "https://grepular.com/xxe.txt" >]>
<customer>
<![CDATA[<TransmissionId>5555</TransmissionId>
<HeadLine>Hair Loss & Growth Treatments and Products Sales Market Research Report 2016-2021</p></HeadLine>
]]></customer>')
正如我们所见,标题节点中有一个 </p>
没有匹配的
。我如何从中提取节点
谢谢 先生
您可以使用 left、right 和 charindex 等字符串函数,但我觉得那很乏味。如果您对 TVF 持开放态度,它将根据提供的模式提取值。我要补充一点,这不限于 XML 标签。
作为 TVF 将返回一个或多个值。
例子
Declare @YourTable table (ID int,input varchar(max))
Insert Into @YourTable Values
(1,'<?xml version="1.0" encoding="iso-8859-1"?> <!DOCTYPE title [ <!ELEMENT title ANY > <!ENTITY xxe SYSTEM "https://grepular.com/xxe.txt" >]>
<customer>
<![CDATA[<TransmissionId>5555</TransmissionId>
<HeadLine>Hair Loss & Growth Treatments and Products Sales Market Research Report 2016-2021</p></HeadLine>
]]></customer>')
Select A.ID
,B.RetVal
From @YourTable A
Cross Apply [dbo].[udf-Str-Extract](A.Input,'<HeadLine>','</HeadLine>') B
Returns
ID RetVal
1 Hair Loss & Growth Treatments and Products Sales Market Research Report 2016-2021</p>
感兴趣的 UDF
CREATE FUNCTION [dbo].[udf-Str-Extract] (@String varchar(max),@Delimiter1 varchar(100),@Delimiter2 varchar(100))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 N1,cte1 N2,cte1 N3,cte1 N4,cte1 N5,cte1 N6) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter1) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter1)) = @Delimiter1),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter1,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By N)
,RetPos = N
,RetLen = charindex(@Delimiter2,RetVal)-1
,RetVal = left(RetVal,charindex(@Delimiter2,RetVal)-1)
From (Select A.N,RetVal = ltrim(rtrim(Substring(@String, A.N, A.L))) From cte4 A ) A
Where charindex(@Delimiter2,RetVal)>1
)
/*
Max Length of String 1MM characters
Declare @String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[udf-Str-Extract] (@String,'[[',']]')
*/