有没有办法在 SQL 服务器中进行 HTML 解码?
Is there any way to do HTML decode in SQL Server?
我的其中一个 table
中有以下记录
CD&M Communications
auburndale oil & propane inc
C F La Fountaine #7561
Laramie County Fire District # 2
AmeriGas Propane LP #2250
有没有办法删除 &、#7561、#2250 等字符
"&"应根据 C# HTMLDECODE 函数替换为“&”
以下 SQL 函数适用于您的情况,或者它是您扩展它的良好起点。但是,请注意,与应用程序层中的字符串操作相比,数据库 [SQL 服务器] 中的字符串操作会更慢。
GO
IF OBJECT_ID('dbo.MyHTMLDecode') IS NOT NULL BEGIN DROP FUNCTION dbo.MyHTMLDecode END
GO
CREATE FUNCTION dbo.MyHTMLDecode (@vcWhat VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE @vcResult VARCHAR(MAX)
DECLARE @siPos INT
,@vcEncoded VARCHAR(7)
,@siChar INT
SET @vcResult = RTRIM(LTRIM(CAST(REPLACE(@vcWhat COLLATE Latin1_General_BIN, CHAR(0), '') AS VARCHAR(MAX))))
SELECT @vcResult = REPLACE(REPLACE(@vcResult, ' ', ' '), ' ', ' ')
IF @vcResult = ''
RETURN @vcResult
SELECT @siPos = PATINDEX('%&#[0-9][0-9][0-9];%', @vcResult)
WHILE @siPos > 0
BEGIN
SELECT @vcEncoded = SUBSTRING(@vcResult, @siPos, 6)
,@siChar = CAST(SUBSTRING(@vcEncoded, 3, 3) AS INT)
,@vcResult = REPLACE(@vcResult, @vcEncoded, NCHAR(@siChar))
,@siPos = PATINDEX('%&#[0-9][0-9][0-9];%', @vcResult)
END
SELECT @siPos = PATINDEX('%&#[0-9][0-9][0-9][0-9];%', @vcResult)
WHILE @siPos > 0
BEGIN
SELECT @vcEncoded = SUBSTRING(@vcResult, @siPos, 7)
,@siChar = CAST(SUBSTRING(@vcEncoded, 3, 4) AS INT)
,@vcResult = REPLACE(@vcResult, @vcEncoded, NCHAR(@siChar))
,@siPos = PATINDEX('%&#[0-9][0-9][0-9][0-9];%', @vcResult)
END
SELECT @siPos = PATINDEX('%#[0-9][0-9][0-9][0-9]%', @vcResult)
WHILE @siPos > 0
BEGIN
SELECT @vcEncoded = SUBSTRING(@vcResult, @siPos, 5)
,@vcResult = REPLACE(@vcResult, @vcEncoded, '')
,@siPos = PATINDEX('%#[0-9][0-9][0-9][0-9]%', @vcResult)
END
SELECT @vcResult = REPLACE(REPLACE(@vcResult, NCHAR(160), ' '), CHAR(160), ' ')
SELECT @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult, '&', '&'), '"', '"'), '<', '<'), '>', '>'), '&amp;', '&')
RETURN @vcResult
END
GO
插图:
DECLARE @S VARCHAR(MAX)='CD&amp;amp;M Communications
auburndale oil &amp;amp; propane inc
C F La Fountaine #7561
Laramie County Fire District # 2
AmeriGas Propane LP #2250'
SELECT dbo.MyHTMLDecode (@s)
输出:
CD&M Communications
auburndale oil & propane inc
C F La Fountaine
Laramie County Fire District # 2
AmeriGas Propane LP
以前的版本不适用于日文、韩文...
这里固定版本:
GO
IF OBJECT_ID('dbo.fn_HTMLDecode') IS NOT NULL BEGIN DROP FUNCTION dbo.fn_HTMLDecode END
GO
CREATE FUNCTION dbo.fn_HTMLDecode(
@vcWhat NVARCHAR(MAX)
,@toDecodeMainISOSymbols bit = 1
,@toDecodeISOChars bit = 1
)
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE @vcResult NVARCHAR(MAX);
DECLARE @siPos INT ,@vcEncoded NVARCHAR(9) ,@siChar INT;
SET @vcResult = RTRIM(LTRIM(CAST(REPLACE(@vcWhat COLLATE Latin1_General_BIN, CHAR(0), '') AS NVARCHAR(MAX))));
SELECT @vcResult = REPLACE(REPLACE(@vcResult, ' ', ' '), ' ', ' ');
IF @vcResult = '' RETURN @vcResult;
declare @s varchar(35);
declare @n int; set @n = 6;
declare @i int;
while @n > 2
begin
set @s = '';
set @i=1;
while @i<=@n
begin
set @s = @s + '[0-9]';
set @i = @i + 1;
end
set @s = '%&#' + @s + '%';
SELECT @siPos = PATINDEX(@s, @vcResult);
WHILE @siPos > 0
BEGIN
SELECT @vcEncoded = SUBSTRING(@vcResult, @siPos, @n+3)
,@siChar = CAST(SUBSTRING(@vcEncoded, 3, @n) AS INT)
,@vcResult = REPLACE(@vcResult, @vcEncoded, NCHAR(@siChar))
,@siPos = PATINDEX(@s, @vcResult);
END
set @n = @n - 1;
end
if @toDecodeMainISOSymbols=1
begin
select @vcResult = REPLACE(REPLACE(@vcResult, NCHAR(160), ' '), CHAR(160), ' ');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult, '&', '&'), '"', '"'), '<', '<'), '>', '>'), '&amp;', '&'),'”','”'),'„','„'),'–','–'),'—','—');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult,'‘','‘'),'’','’'),'•','•'),'…','…'),'‰','‰') COLLATE Latin1_General_BIN,'′','′') COLLATE Latin1_General_BIN,'″','″'),'ˆ','ˆ'),'˜','˜'),' ',' ');
end
if @toDecodeISOChars=1
begin
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Š','Š') COLLATE Latin1_General_BIN,'š','š') COLLATE Latin1_General_BIN,'Ç','Ç') COLLATE Latin1_General_BIN,'ç','ç');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult,'À','À') COLLATE Latin1_General_BIN,'à','à') COLLATE Latin1_General_BIN,'Á','Á') COLLATE Latin1_General_BIN,'á','á') COLLATE Latin1_General_BIN,'Â','Â') COLLATE Latin1_General_BIN,'â','â') COLLATE Latin1_General_BIN,'Ã','Ã') COLLATE Latin1_General_BIN,'ã','ã') COLLATE Latin1_General_BIN,'Ä','Ä') COLLATE Latin1_General_BIN,'ä','ä') COLLATE Latin1_General_BIN,'Å','Å') COLLATE Latin1_General_BIN,'å','å') COLLATE Latin1_General_BIN,'Æ','Æ') COLLATE Latin1_General_BIN,'æ','æ');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'È','È') COLLATE Latin1_General_BIN,'è','è') COLLATE Latin1_General_BIN,'É','É') COLLATE Latin1_General_BIN,'é','é') COLLATE Latin1_General_BIN,'Ê','Ê') COLLATE Latin1_General_BIN,'ê','ê') COLLATE Latin1_General_BIN,'Ë','Ë') COLLATE Latin1_General_BIN,'ë','ë');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Ì','Ì') COLLATE Latin1_General_BIN,'ì','ì') COLLATE Latin1_General_BIN,'Í','Í') COLLATE Latin1_General_BIN,'í','í') COLLATE Latin1_General_BIN,'Î','Î') COLLATE Latin1_General_BIN,'î','î') COLLATE Latin1_General_BIN,'Ï','Ï') COLLATE Latin1_General_BIN,'ï','ï');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Ò','Ò') COLLATE Latin1_General_BIN,'ò','ò') COLLATE Latin1_General_BIN,'Ó','Ó') COLLATE Latin1_General_BIN,'ó','ó') COLLATE Latin1_General_BIN,'Ô','Ô') COLLATE Latin1_General_BIN,'ô','ô') COLLATE Latin1_General_BIN,'Õ','Õ') COLLATE Latin1_General_BIN,'õ','õ') COLLATE Latin1_General_BIN,'Ö','Ö') COLLATE Latin1_General_BIN,'ö','ö') COLLATE Latin1_General_BIN,'Ø','Ø') COLLATE Latin1_General_BIN,'ø','ø');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Ù','Ù') COLLATE Latin1_General_BIN,'ù','ù') COLLATE Latin1_General_BIN,'Ú','Ú') COLLATE Latin1_General_BIN,'ú','ú') COLLATE Latin1_General_BIN,'Û','Û') COLLATE Latin1_General_BIN,'û','û') COLLATE Latin1_General_BIN,'Ü','Ü') COLLATE Latin1_General_BIN,'ü','ü');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Ð','Ð') COLLATE Latin1_General_BIN,'ð','ð') COLLATE Latin1_General_BIN,'Ñ','Ñ') COLLATE Latin1_General_BIN,'ñ','ñ') COLLATE Latin1_General_BIN,'Ý','Ý') COLLATE Latin1_General_BIN,'ý','ý') COLLATE Latin1_General_BIN,'Þ','Þ') COLLATE Latin1_General_BIN,'þ','þ') COLLATE Latin1_General_BIN,'ß','ß');
end
RETURN @vcResult;
END
-- test:
-- select dbo.fn_HTMLDecode(N'A fine example of man and nature co-existing is Slovenia’s ecological tourist farms.',1,1)
-- select dbo.fn_HTMLDecode(N'm0 와인분야에서 m1 가장 m2 영향력 m3있는m10',1,1)
有一个更简单的解决方案...
SQL 服务器支持 XML 数据类型,并且支持解码 XML/HTML 编码实体。如果您只是将字符串转换为 XML 数据类型,则可以使用内置的解码函数。
看起来像这样:
select cast('Q & A' as XML).value('.[1]','nvarchar(max)' );
把它变成一个函数以便于使用:
create function dbo.xmlDecode (@string nvarchar(max))
returns varchar(max)
begin
return cast(@string as XML).value('.[1]','nvarchar(max)' )
end;
请记住,在 OP 的示例中,字符串似乎连续编码了 3 次。 &
变成了 &
,然后变成了 &amp;
,然后变成了 &amp;amp;
。结果,要取回 "original" 字符串,必须使用 decode 函数 3 次。
使用来自@Wouter 的转换方法处理十进制 和 十六进制实体,而接受的答案不会。
但是,还需要处理程序来处理输入中的 XML 个特殊字符。
XML 特殊字符:<, >, ", ', &
create function [dbo].[XMLdecode] (@input nvarchar(max))
returns nvarchar(max)
begin
declare @output nvarchar(max) = ''
declare @next nchar(1)
declare @endIdx int = 0
declare @idx int = 0
while @idx < len(@input)
begin
set @idx += 1
set @next = substring(@input, @idx, 1)
set @endIdx = charindex(';', @input, @idx) - @idx
if @next = '&' and (@endIdx > 8 or @endIdx < 1)
set @output += '&'
else if @next = '&' and @endIdx > 1 and @endIdx < 8
begin
set @output += lower(substring(@input,@idx,@endIdx+1))
set @idx += @endIdx
end
else
set @output += @next
end
set @output = replace(@output,'<','<')
set @output = replace(@output,'>','>')
set @output = replace(@output,'"','"')
set @output = replace(@output,'''',''')
return cast(@output as XML).value('.[1]','nvarchar(max)')
end;
用法示例:
select dbo.XMLdecode('this is a tÉst ºf HEX & DECIMAL €ntities & <<<< non-entities too! ☑')
------------------------------------------------------------------------------
returns: 'this is a tÉst ºf HEX & DECIMAL €ntities & <<<< non-entities too! ☑'
它不是防弹的,但处理了我所有的案例。
看起来像 HTML 实体但实际上不是的东西仍然会造成麻烦。例如:
甚至 &;
它也不处理 non-XML 命名实体,例如 ✓
或 ♥
我的其中一个 table
中有以下记录CD&amp;amp;M Communications
auburndale oil &amp;amp; propane inc
C F La Fountaine #7561
Laramie County Fire District # 2
AmeriGas Propane LP #2250
有没有办法删除 &、#7561、#2250 等字符
"&"应根据 C# HTMLDECODE 函数替换为“&”
以下 SQL 函数适用于您的情况,或者它是您扩展它的良好起点。但是,请注意,与应用程序层中的字符串操作相比,数据库 [SQL 服务器] 中的字符串操作会更慢。
GO
IF OBJECT_ID('dbo.MyHTMLDecode') IS NOT NULL BEGIN DROP FUNCTION dbo.MyHTMLDecode END
GO
CREATE FUNCTION dbo.MyHTMLDecode (@vcWhat VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE @vcResult VARCHAR(MAX)
DECLARE @siPos INT
,@vcEncoded VARCHAR(7)
,@siChar INT
SET @vcResult = RTRIM(LTRIM(CAST(REPLACE(@vcWhat COLLATE Latin1_General_BIN, CHAR(0), '') AS VARCHAR(MAX))))
SELECT @vcResult = REPLACE(REPLACE(@vcResult, ' ', ' '), ' ', ' ')
IF @vcResult = ''
RETURN @vcResult
SELECT @siPos = PATINDEX('%&#[0-9][0-9][0-9];%', @vcResult)
WHILE @siPos > 0
BEGIN
SELECT @vcEncoded = SUBSTRING(@vcResult, @siPos, 6)
,@siChar = CAST(SUBSTRING(@vcEncoded, 3, 3) AS INT)
,@vcResult = REPLACE(@vcResult, @vcEncoded, NCHAR(@siChar))
,@siPos = PATINDEX('%&#[0-9][0-9][0-9];%', @vcResult)
END
SELECT @siPos = PATINDEX('%&#[0-9][0-9][0-9][0-9];%', @vcResult)
WHILE @siPos > 0
BEGIN
SELECT @vcEncoded = SUBSTRING(@vcResult, @siPos, 7)
,@siChar = CAST(SUBSTRING(@vcEncoded, 3, 4) AS INT)
,@vcResult = REPLACE(@vcResult, @vcEncoded, NCHAR(@siChar))
,@siPos = PATINDEX('%&#[0-9][0-9][0-9][0-9];%', @vcResult)
END
SELECT @siPos = PATINDEX('%#[0-9][0-9][0-9][0-9]%', @vcResult)
WHILE @siPos > 0
BEGIN
SELECT @vcEncoded = SUBSTRING(@vcResult, @siPos, 5)
,@vcResult = REPLACE(@vcResult, @vcEncoded, '')
,@siPos = PATINDEX('%#[0-9][0-9][0-9][0-9]%', @vcResult)
END
SELECT @vcResult = REPLACE(REPLACE(@vcResult, NCHAR(160), ' '), CHAR(160), ' ')
SELECT @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult, '&', '&'), '"', '"'), '<', '<'), '>', '>'), '&amp;', '&')
RETURN @vcResult
END
GO
插图:
DECLARE @S VARCHAR(MAX)='CD&amp;amp;M Communications
auburndale oil &amp;amp; propane inc
C F La Fountaine #7561
Laramie County Fire District # 2
AmeriGas Propane LP #2250'
SELECT dbo.MyHTMLDecode (@s)
输出:
CD&M Communications
auburndale oil & propane inc
C F La Fountaine
Laramie County Fire District # 2
AmeriGas Propane LP
以前的版本不适用于日文、韩文... 这里固定版本:
GO
IF OBJECT_ID('dbo.fn_HTMLDecode') IS NOT NULL BEGIN DROP FUNCTION dbo.fn_HTMLDecode END
GO
CREATE FUNCTION dbo.fn_HTMLDecode(
@vcWhat NVARCHAR(MAX)
,@toDecodeMainISOSymbols bit = 1
,@toDecodeISOChars bit = 1
)
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE @vcResult NVARCHAR(MAX);
DECLARE @siPos INT ,@vcEncoded NVARCHAR(9) ,@siChar INT;
SET @vcResult = RTRIM(LTRIM(CAST(REPLACE(@vcWhat COLLATE Latin1_General_BIN, CHAR(0), '') AS NVARCHAR(MAX))));
SELECT @vcResult = REPLACE(REPLACE(@vcResult, ' ', ' '), ' ', ' ');
IF @vcResult = '' RETURN @vcResult;
declare @s varchar(35);
declare @n int; set @n = 6;
declare @i int;
while @n > 2
begin
set @s = '';
set @i=1;
while @i<=@n
begin
set @s = @s + '[0-9]';
set @i = @i + 1;
end
set @s = '%&#' + @s + '%';
SELECT @siPos = PATINDEX(@s, @vcResult);
WHILE @siPos > 0
BEGIN
SELECT @vcEncoded = SUBSTRING(@vcResult, @siPos, @n+3)
,@siChar = CAST(SUBSTRING(@vcEncoded, 3, @n) AS INT)
,@vcResult = REPLACE(@vcResult, @vcEncoded, NCHAR(@siChar))
,@siPos = PATINDEX(@s, @vcResult);
END
set @n = @n - 1;
end
if @toDecodeMainISOSymbols=1
begin
select @vcResult = REPLACE(REPLACE(@vcResult, NCHAR(160), ' '), CHAR(160), ' ');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult, '&', '&'), '"', '"'), '<', '<'), '>', '>'), '&amp;', '&'),'”','”'),'„','„'),'–','–'),'—','—');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult,'‘','‘'),'’','’'),'•','•'),'…','…'),'‰','‰') COLLATE Latin1_General_BIN,'′','′') COLLATE Latin1_General_BIN,'″','″'),'ˆ','ˆ'),'˜','˜'),' ',' ');
end
if @toDecodeISOChars=1
begin
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Š','Š') COLLATE Latin1_General_BIN,'š','š') COLLATE Latin1_General_BIN,'Ç','Ç') COLLATE Latin1_General_BIN,'ç','ç');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult,'À','À') COLLATE Latin1_General_BIN,'à','à') COLLATE Latin1_General_BIN,'Á','Á') COLLATE Latin1_General_BIN,'á','á') COLLATE Latin1_General_BIN,'Â','Â') COLLATE Latin1_General_BIN,'â','â') COLLATE Latin1_General_BIN,'Ã','Ã') COLLATE Latin1_General_BIN,'ã','ã') COLLATE Latin1_General_BIN,'Ä','Ä') COLLATE Latin1_General_BIN,'ä','ä') COLLATE Latin1_General_BIN,'Å','Å') COLLATE Latin1_General_BIN,'å','å') COLLATE Latin1_General_BIN,'Æ','Æ') COLLATE Latin1_General_BIN,'æ','æ');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'È','È') COLLATE Latin1_General_BIN,'è','è') COLLATE Latin1_General_BIN,'É','É') COLLATE Latin1_General_BIN,'é','é') COLLATE Latin1_General_BIN,'Ê','Ê') COLLATE Latin1_General_BIN,'ê','ê') COLLATE Latin1_General_BIN,'Ë','Ë') COLLATE Latin1_General_BIN,'ë','ë');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Ì','Ì') COLLATE Latin1_General_BIN,'ì','ì') COLLATE Latin1_General_BIN,'Í','Í') COLLATE Latin1_General_BIN,'í','í') COLLATE Latin1_General_BIN,'Î','Î') COLLATE Latin1_General_BIN,'î','î') COLLATE Latin1_General_BIN,'Ï','Ï') COLLATE Latin1_General_BIN,'ï','ï');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Ò','Ò') COLLATE Latin1_General_BIN,'ò','ò') COLLATE Latin1_General_BIN,'Ó','Ó') COLLATE Latin1_General_BIN,'ó','ó') COLLATE Latin1_General_BIN,'Ô','Ô') COLLATE Latin1_General_BIN,'ô','ô') COLLATE Latin1_General_BIN,'Õ','Õ') COLLATE Latin1_General_BIN,'õ','õ') COLLATE Latin1_General_BIN,'Ö','Ö') COLLATE Latin1_General_BIN,'ö','ö') COLLATE Latin1_General_BIN,'Ø','Ø') COLLATE Latin1_General_BIN,'ø','ø');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Ù','Ù') COLLATE Latin1_General_BIN,'ù','ù') COLLATE Latin1_General_BIN,'Ú','Ú') COLLATE Latin1_General_BIN,'ú','ú') COLLATE Latin1_General_BIN,'Û','Û') COLLATE Latin1_General_BIN,'û','û') COLLATE Latin1_General_BIN,'Ü','Ü') COLLATE Latin1_General_BIN,'ü','ü');
select @vcResult = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(@vcResult COLLATE Latin1_General_BIN,'Ð','Ð') COLLATE Latin1_General_BIN,'ð','ð') COLLATE Latin1_General_BIN,'Ñ','Ñ') COLLATE Latin1_General_BIN,'ñ','ñ') COLLATE Latin1_General_BIN,'Ý','Ý') COLLATE Latin1_General_BIN,'ý','ý') COLLATE Latin1_General_BIN,'Þ','Þ') COLLATE Latin1_General_BIN,'þ','þ') COLLATE Latin1_General_BIN,'ß','ß');
end
RETURN @vcResult;
END
-- test:
-- select dbo.fn_HTMLDecode(N'A fine example of man and nature co-existing is Slovenia’s ecological tourist farms.',1,1)
-- select dbo.fn_HTMLDecode(N'm0 와인분야에서 m1 가장 m2 영향력 m3있는m10',1,1)
有一个更简单的解决方案...
SQL 服务器支持 XML 数据类型,并且支持解码 XML/HTML 编码实体。如果您只是将字符串转换为 XML 数据类型,则可以使用内置的解码函数。
看起来像这样:
select cast('Q & A' as XML).value('.[1]','nvarchar(max)' );
把它变成一个函数以便于使用:
create function dbo.xmlDecode (@string nvarchar(max))
returns varchar(max)
begin
return cast(@string as XML).value('.[1]','nvarchar(max)' )
end;
请记住,在 OP 的示例中,字符串似乎连续编码了 3 次。 &
变成了 &
,然后变成了 &amp;
,然后变成了 &amp;amp;
。结果,要取回 "original" 字符串,必须使用 decode 函数 3 次。
使用来自@Wouter 的转换方法处理十进制 和 十六进制实体,而接受的答案不会。
但是,还需要处理程序来处理输入中的 XML 个特殊字符。
XML 特殊字符:<, >, ", ', &
create function [dbo].[XMLdecode] (@input nvarchar(max))
returns nvarchar(max)
begin
declare @output nvarchar(max) = ''
declare @next nchar(1)
declare @endIdx int = 0
declare @idx int = 0
while @idx < len(@input)
begin
set @idx += 1
set @next = substring(@input, @idx, 1)
set @endIdx = charindex(';', @input, @idx) - @idx
if @next = '&' and (@endIdx > 8 or @endIdx < 1)
set @output += '&'
else if @next = '&' and @endIdx > 1 and @endIdx < 8
begin
set @output += lower(substring(@input,@idx,@endIdx+1))
set @idx += @endIdx
end
else
set @output += @next
end
set @output = replace(@output,'<','<')
set @output = replace(@output,'>','>')
set @output = replace(@output,'"','"')
set @output = replace(@output,'''',''')
return cast(@output as XML).value('.[1]','nvarchar(max)')
end;
用法示例:
select dbo.XMLdecode('this is a tÉst ºf HEX & DECIMAL €ntities & <<<< non-entities too! ☑')
------------------------------------------------------------------------------
returns: 'this is a tÉst ºf HEX & DECIMAL €ntities & <<<< non-entities too! ☑'
它不是防弹的,但处理了我所有的案例。
看起来像 HTML 实体但实际上不是的东西仍然会造成麻烦。例如:
甚至 &;
它也不处理 non-XML 命名实体,例如 ✓
或 ♥