使用 SQL 服务器输出 XML 编码为 UTF-8 的文件
Output XML Files with encoding UTF-8 using SQL Server
我有一个生成 XML 文件并使用 <?xml version="1.0"?>
将它们加载到 FTP 的查询。
我需要将编码切换为 UTF-8,如下所示:
<?xml version="1.0" encoding="utf-8"?>
我可以在文本编辑器中手动完成。但不能在 SQL 服务器中执行。
我也阅读了这篇文章,但这对找到问题的解决方案没有帮助。
https://docs.microsoft.com/en-us/sql/relational-databases/xml/create-instances-of-xml-data
我的代码:
USE [Audit_DBA]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- to call the procedure use the code below but assign your own path:
-- EXEC [Audit_DBA].[dbo].[StateAndLocalAward_ToXML] 'C:\NLP\Test\NewFolder'
CREATE PROCEDURE [dbo].[StateAndLocalAward_ToXML_VC]
@FileDestinationDir VARCHAR(2000)
AS
DECLARE @FilePath VARCHAR(4000)
DECLARE @SQLStr VARCHAR(8000),
@Cmd VARCHAR(1000),
@Ret INT
IF EXISTS (SELECT * FROM InputTemp.SYS.TABLES WHERE NAME = 'StateAndLocalAward_output')
DROP TABLE InputTemp.dbo.StateAndLocalAward_output;
;WITH CTE_StateAndLocalAward_output AS
(
SELECT TOP 200
sc.stateContractId,
ca.OnviaId AS [Reference], --AS OnviaID,
ca.contractTitle AS [Title],
nigp.NIGPCodeTx AS [NIPGCode],
nigp.NIGPDescriptionLongTx AS [NIPGDescription],
ca.Description,
vm.parentName AS [VendorName],
offering.offeringTx AS [PrimaryOffering],
ca.StartDate,
ca.AwardDt
FROM
ISCore.dbo.StateContract sc WITH (NOLOCK)
INNER JOIN
ISCore.dbo.ContractAward ca WITH (NOLOCK) ON sc.contractAwardId = ca.contractAwardID
INNER JOIN
ISLibrary.dbo.VendorMaster vm WITH (NOLOCK) ON ca.vendorId = vm.vendorID
LEFT OUTER JOIN
ISCore.dbo.CompanyProfile cp WITH (NOLOCK) ON ca.vendorId = cp.vendorId
LEFT OUTER JOIN
ISCore.dbo.Offering offering WITH (NOLOCK) ON ca.offeringID = offering.offeringID
LEFT OUTER JOIN
ISCore.dbo.NIGPSrvc nigp WITH (NOLOCK) ON ca.NIGPCode = nigp.NIGPCodeTx
WHERE
vm.showUnverified = 1 AND sc.stateContractId = -- 464482 stateContractId
AND StartDate >= DATEADD(month, -12, GETDATE())
)
SELECT
*
INTO
InputTemp.dbo.StateAndLocalAward_output
FROM
CTE_StateAndLocalAward_output;
--select * from InputTemp.dbo.StateAndLocalAward_output
DECLARE @StateContractId INT;
--DECLARE @xml XML;
DECLARE Cur_StateAndLocalAward_StateContractId CURSOR FOR
SELECT StateContractId
FROM inputtemp.dbo.StateAndLocalAward_output t1 WITH (NOLOCK)
OPEN Cur_StateAndLocalAward_StateContractId;
FETCH NEXT FROM Cur_StateAndLocalAward_StateContractId INTO @StateContractId -- @xml;
WHILE @@FETCH_STATUS = 0
BEGIN
SELECT
@SQLStr = 'SELECT Body.[Reference], Body.[Title], Body.[NIPGCode], Body.[NIPGDescription], Body.[Description], Body.[VendorName], Body.[PrimaryOffering] FROM InputTemp.dbo.StateAndLocalAward_output AS Body where StateContractId = ''' + str(@StateContractId) + ''' FOR XML AUTO, ELEMENTS;'
--SELECT N'<?xml version="1.0" encoding="UTF-8"?>'
SELECT
CAST((SELECT N'<?xml version="1.0" encoding="UTF-8"?>' + (@SQLSTr)) as varchar(8000) /*as XML*/) as SQLStr
-- select CAST((SELECT N'<?xml version="1.0" encoding="UTF-8"?>' +
-- (@SQLSTr)) as XML) as SQLStr
如果我 运行 此代码而不是上面的相同代码但转换为 XML type
,我得到一个错误:
Msg 9402, Level 16, State 1, Procedure StateAndLocalAward_ToXML, Line 86 [Batch Start Line 10]
XML parsing: line 1, character 38, unable to switch the encoding
select @FilePath = @FileDestinationDir+'\NewFolder'+ltrim(rtrim(str(@StateContractId)))+'.xml'
select @Cmd = ' bcp " ' + @SQLStr + '" queryout '+@FilePath+' -w -r "" -T -S ' +@@ServerName
exec @Ret = master.dbo.xp_cmdshell @Cmd
FETCH NEXT FROM Cur_StateAndLocalAward_StateContractId INTO @StateContractId -- @xml;
END
CLOSE Cur_StateAndLocalAward_StateContractId;
DEALLOCATE Cur_StateAndLocalAward_StateContractId;
GO
有些事情要知道:
- SQL服务器不支持通过 BCP 导出到 2016 版之前的 UTF-8(以及带有 SP2 的 2014 版)。
- 不能将 xml-declaration (
<?xml blah ?>
) 添加到原生 SQL-Server XML 类型的变量或列。这将失败(“...切换编码”)或 xml-declaration 将消失。
- 您可以将字符串级别的 xml-declaration 添加到转换为
NVARCHAR(MAX)
的 xml。但是你不能 re-cast (re-convert) this to an XML without failing or lost the declaration.
- 内部 SQL-Server 在任何情况下都将任何 XML 保持为
UCS-2
(非常接近 UTF-16
)。
- SQL-Servers
VARCHAR
(CHAR
) 类型不是 utf-8
而是扩展的 ASCII(取决于 COLLATION
)
- 在字符串级别上,您可以在 xml-declaration 中写入任何字面意思(因为您可以创建一些东西,看起来像 XML,但不是 well-formed)。这只是一个未经检查的字符串.
- xml-declaration中所述的编码很重要仅用于标记写入磁盘或作为字节流处理时的实际文件编码。
- 您可以编写
encoding="x"
并使用编码 y 存储文件 - 但您不应该这样做。
- SQL-Server 会将带有
utf-8
声明的字符串转换为 XML 当它是 VARCHAR
时它会转换带有 utf-16
的字符串是 NVARCHAR
,但是你不能越过这个 (Read this related answer)。其他编码很可能导致无法切换编码错误。
关于您的代码
- 您应该将
@SQLStr
和 @cmd
更改为 NVARCHAR(MAX)
,否则您可能会遇到 non-plain-latin 个字符的问题。
- 当您使用
CURSOR
时,您应该用语句的结果填充一个 XML-typed 变量,将其转换为 NVARCHAR(MAX)
并将声明添加到该字符串。不要将结果转换回 XML.
- 阅读
BCP
文档。说明 -w
将写成 unicode
(wide),这不是 utf-8
(你写到声明中的内容在这里没有效果)。
提示:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[MyXMLTest]
@FileDestinationDir VARCHAR(2000)
-- to call procedure specify your own file path
-- EXEC [Audit_DBA].[dbo].[MyXMLTest] 'E:\NLP\GovwinIQ_Ontology\NewFolder'
AS
SET QUOTED_IDENTIFIER ON
IF OBJECT_ID (N'InputTemp.dbo.XMLTest', N'U') IS NOT NULL
DROP TABLE InputTemp.dbo.XMLTest;
CREATE TABLE InputTemp.dbo.XMLTest
(
[Id] INT NOT NULL,
[FirstName] VARCHAR(100) NOT NULL,
[LastName] VARCHAR(100) NOT NULL,
[Address] VARCHAR(100) NOT NULL
);
INSERT INTO InputTemp.dbo.XMLTest ([Id], [FirstName], [LastName], [Address])
VALUES (12, 'Zhuk', 'Termik', '123 Gam Str, Boston, NY');
--SELECT * FROM InputTemp.dbo.XMLTest
DECLARE @FilePath VARCHAR(4000)
DECLARE @SQLStr NVARCHAR(4000),
@Cmd NVARCHAR(4000),
@Ret INT
DECLARE @Id INT;
SELECT @Id = 12;
SELECT @SQLStr =
'SELECT N''<?xml version=''''1.0'''' encoding=''''UTF-8''''?>'' + (SELECT CAST((SELECT [Id], [FirstName], [LastName], [Address] FROM InputTemp.dbo.XMLTest AS Body WHERE Id = ''' + str(@Id) + ''' FOR XML AUTO, ELEMENTS) AS NVARCHAR(MAX)))'
SELECT @SQLStr AS SQLStr
SELECT @FilePath = @FileDestinationDir+'\NewFolder'+ltrim(rtrim(str(@Id)))+'.xml'
SELECT @Cmd = ' bcp " ' + @SQLStr + '" queryout '+@FilePath+' -c -C65001 -r "" -T -S ' +@@ServerName
EXEC @Ret = master.dbo.xp_cmdshell @Cmd
IF OBJECT_ID (N'InputTemp.dbo.XMLTest', N'U') IS NOT NULL
DROP TABLE InputTemp.dbo.XMLTest;
GO
我有一个生成 XML 文件并使用 <?xml version="1.0"?>
将它们加载到 FTP 的查询。
我需要将编码切换为 UTF-8,如下所示:
<?xml version="1.0" encoding="utf-8"?>
我可以在文本编辑器中手动完成。但不能在 SQL 服务器中执行。
我也阅读了这篇文章,但这对找到问题的解决方案没有帮助。
https://docs.microsoft.com/en-us/sql/relational-databases/xml/create-instances-of-xml-data
我的代码:
USE [Audit_DBA]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- to call the procedure use the code below but assign your own path:
-- EXEC [Audit_DBA].[dbo].[StateAndLocalAward_ToXML] 'C:\NLP\Test\NewFolder'
CREATE PROCEDURE [dbo].[StateAndLocalAward_ToXML_VC]
@FileDestinationDir VARCHAR(2000)
AS
DECLARE @FilePath VARCHAR(4000)
DECLARE @SQLStr VARCHAR(8000),
@Cmd VARCHAR(1000),
@Ret INT
IF EXISTS (SELECT * FROM InputTemp.SYS.TABLES WHERE NAME = 'StateAndLocalAward_output')
DROP TABLE InputTemp.dbo.StateAndLocalAward_output;
;WITH CTE_StateAndLocalAward_output AS
(
SELECT TOP 200
sc.stateContractId,
ca.OnviaId AS [Reference], --AS OnviaID,
ca.contractTitle AS [Title],
nigp.NIGPCodeTx AS [NIPGCode],
nigp.NIGPDescriptionLongTx AS [NIPGDescription],
ca.Description,
vm.parentName AS [VendorName],
offering.offeringTx AS [PrimaryOffering],
ca.StartDate,
ca.AwardDt
FROM
ISCore.dbo.StateContract sc WITH (NOLOCK)
INNER JOIN
ISCore.dbo.ContractAward ca WITH (NOLOCK) ON sc.contractAwardId = ca.contractAwardID
INNER JOIN
ISLibrary.dbo.VendorMaster vm WITH (NOLOCK) ON ca.vendorId = vm.vendorID
LEFT OUTER JOIN
ISCore.dbo.CompanyProfile cp WITH (NOLOCK) ON ca.vendorId = cp.vendorId
LEFT OUTER JOIN
ISCore.dbo.Offering offering WITH (NOLOCK) ON ca.offeringID = offering.offeringID
LEFT OUTER JOIN
ISCore.dbo.NIGPSrvc nigp WITH (NOLOCK) ON ca.NIGPCode = nigp.NIGPCodeTx
WHERE
vm.showUnverified = 1 AND sc.stateContractId = -- 464482 stateContractId
AND StartDate >= DATEADD(month, -12, GETDATE())
)
SELECT
*
INTO
InputTemp.dbo.StateAndLocalAward_output
FROM
CTE_StateAndLocalAward_output;
--select * from InputTemp.dbo.StateAndLocalAward_output
DECLARE @StateContractId INT;
--DECLARE @xml XML;
DECLARE Cur_StateAndLocalAward_StateContractId CURSOR FOR
SELECT StateContractId
FROM inputtemp.dbo.StateAndLocalAward_output t1 WITH (NOLOCK)
OPEN Cur_StateAndLocalAward_StateContractId;
FETCH NEXT FROM Cur_StateAndLocalAward_StateContractId INTO @StateContractId -- @xml;
WHILE @@FETCH_STATUS = 0
BEGIN
SELECT
@SQLStr = 'SELECT Body.[Reference], Body.[Title], Body.[NIPGCode], Body.[NIPGDescription], Body.[Description], Body.[VendorName], Body.[PrimaryOffering] FROM InputTemp.dbo.StateAndLocalAward_output AS Body where StateContractId = ''' + str(@StateContractId) + ''' FOR XML AUTO, ELEMENTS;'
--SELECT N'<?xml version="1.0" encoding="UTF-8"?>'
SELECT
CAST((SELECT N'<?xml version="1.0" encoding="UTF-8"?>' + (@SQLSTr)) as varchar(8000) /*as XML*/) as SQLStr
-- select CAST((SELECT N'<?xml version="1.0" encoding="UTF-8"?>' +
-- (@SQLSTr)) as XML) as SQLStr
如果我 运行 此代码而不是上面的相同代码但转换为 XML type
,我得到一个错误:
Msg 9402, Level 16, State 1, Procedure StateAndLocalAward_ToXML, Line 86 [Batch Start Line 10]
XML parsing: line 1, character 38, unable to switch the encoding
select @FilePath = @FileDestinationDir+'\NewFolder'+ltrim(rtrim(str(@StateContractId)))+'.xml'
select @Cmd = ' bcp " ' + @SQLStr + '" queryout '+@FilePath+' -w -r "" -T -S ' +@@ServerName
exec @Ret = master.dbo.xp_cmdshell @Cmd
FETCH NEXT FROM Cur_StateAndLocalAward_StateContractId INTO @StateContractId -- @xml;
END
CLOSE Cur_StateAndLocalAward_StateContractId;
DEALLOCATE Cur_StateAndLocalAward_StateContractId;
GO
有些事情要知道:
- SQL服务器不支持通过 BCP 导出到 2016 版之前的 UTF-8(以及带有 SP2 的 2014 版)。
- 不能将 xml-declaration (
<?xml blah ?>
) 添加到原生 SQL-Server XML 类型的变量或列。这将失败(“...切换编码”)或 xml-declaration 将消失。 - 您可以将字符串级别的 xml-declaration 添加到转换为
NVARCHAR(MAX)
的 xml。但是你不能 re-cast (re-convert) this to an XML without failing or lost the declaration. - 内部 SQL-Server 在任何情况下都将任何 XML 保持为
UCS-2
(非常接近UTF-16
)。 - SQL-Servers
VARCHAR
(CHAR
) 类型不是utf-8
而是扩展的 ASCII(取决于COLLATION
) - 在字符串级别上,您可以在 xml-declaration 中写入任何字面意思(因为您可以创建一些东西,看起来像 XML,但不是 well-formed)。这只是一个未经检查的字符串.
- xml-declaration中所述的编码很重要仅用于标记写入磁盘或作为字节流处理时的实际文件编码。
- 您可以编写
encoding="x"
并使用编码 y 存储文件 - 但您不应该这样做。 - SQL-Server 会将带有
utf-8
声明的字符串转换为 XML 当它是VARCHAR
时它会转换带有utf-16
的字符串是NVARCHAR
,但是你不能越过这个 (Read this related answer)。其他编码很可能导致无法切换编码错误。
关于您的代码
- 您应该将
@SQLStr
和@cmd
更改为NVARCHAR(MAX)
,否则您可能会遇到 non-plain-latin 个字符的问题。 - 当您使用
CURSOR
时,您应该用语句的结果填充一个 XML-typed 变量,将其转换为NVARCHAR(MAX)
并将声明添加到该字符串。不要将结果转换回 XML. - 阅读
BCP
文档。说明-w
将写成unicode
(wide),这不是utf-8
(你写到声明中的内容在这里没有效果)。
提示:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[MyXMLTest]
@FileDestinationDir VARCHAR(2000)
-- to call procedure specify your own file path
-- EXEC [Audit_DBA].[dbo].[MyXMLTest] 'E:\NLP\GovwinIQ_Ontology\NewFolder'
AS
SET QUOTED_IDENTIFIER ON
IF OBJECT_ID (N'InputTemp.dbo.XMLTest', N'U') IS NOT NULL
DROP TABLE InputTemp.dbo.XMLTest;
CREATE TABLE InputTemp.dbo.XMLTest
(
[Id] INT NOT NULL,
[FirstName] VARCHAR(100) NOT NULL,
[LastName] VARCHAR(100) NOT NULL,
[Address] VARCHAR(100) NOT NULL
);
INSERT INTO InputTemp.dbo.XMLTest ([Id], [FirstName], [LastName], [Address])
VALUES (12, 'Zhuk', 'Termik', '123 Gam Str, Boston, NY');
--SELECT * FROM InputTemp.dbo.XMLTest
DECLARE @FilePath VARCHAR(4000)
DECLARE @SQLStr NVARCHAR(4000),
@Cmd NVARCHAR(4000),
@Ret INT
DECLARE @Id INT;
SELECT @Id = 12;
SELECT @SQLStr =
'SELECT N''<?xml version=''''1.0'''' encoding=''''UTF-8''''?>'' + (SELECT CAST((SELECT [Id], [FirstName], [LastName], [Address] FROM InputTemp.dbo.XMLTest AS Body WHERE Id = ''' + str(@Id) + ''' FOR XML AUTO, ELEMENTS) AS NVARCHAR(MAX)))'
SELECT @SQLStr AS SQLStr
SELECT @FilePath = @FileDestinationDir+'\NewFolder'+ltrim(rtrim(str(@Id)))+'.xml'
SELECT @Cmd = ' bcp " ' + @SQLStr + '" queryout '+@FilePath+' -c -C65001 -r "" -T -S ' +@@ServerName
EXEC @Ret = master.dbo.xp_cmdshell @Cmd
IF OBJECT_ID (N'InputTemp.dbo.XMLTest', N'U') IS NOT NULL
DROP TABLE InputTemp.dbo.XMLTest;
GO