在 Microsoft SQL 服务器中一次对大量 XML 文件使用转换样式选项 2
Using Convert Style Option 2 for a large number of XML files at once in Microsoft SQL Server
我对此很陌生,所以请多多包涵。我试图一次性将许多 XML 文件传输到 Microsoft SQL 服务器。不幸的是,我收到一条错误消息,提示我需要使用样式选项 2 进行转换。我不知道如何对这么多文件执行此操作。
这是我的查询:
CREATE TABLE [dbo].[staagingTable]
(
[Counter] INT NOT NULL,
[majority] [nvarchar](max) NULL,
[congress] [int] NULL,
[session] [nvarchar](max) NULL,
[chamber] [nvarchar](max) NULL,
[rollcall-num] [int] NULL,
[legis-num] [nvarchar](max) NULL,
[vote-question] [nvarchar](max) NULL,
[vote-type] [nvarchar](max) NULL,
[vote-result] [nvarchar](max) NULL,
[action-date] [nvarchar](max) NULL,
[action-time] [nvarchar](max) NULL,
[vote-desc] [nvarchar](max) NULL,
[sourceXML] [XML] NULL
);
GO
DECLARE @Counter INT=1;
DECLARE @command VARCHAR(MAX);
WHILE @Counter<800
BEGIN
SET @command=
'
DECLARE @xml XML=
(
SELECT BulkColumn
FROM OPENROWSET (BULK ''C:\Users\Owner\Documents\congress\House votes4 congress 2015\Passage\roll' + REPLACE(STR(@Counter,3),' ','0') + '.xml'', SINGLE_BLOB) AS c
);
INSERT INTO dbo.staagingTable(Counter, majority, congress,[session], chamber, [rollcall-num], [legis-num], [vote-question], [vote-type], [vote-result], [action-date], [action-time], [vote-desc], [sourceXML])
SELECT
' + CAST(@Counter AS VARCHAR(10)) + ',
v.value(N''majority[1]'', N''nvarchar(max)''),
v.value(N''congress[1]'', N''int''),
v.value(N''session[1]'', N''nvarchar(max)''),
v.value(N''chamber[1]'', N''nvarchar(max)''),
v.value(N''rollcall-num[1]'', N''int''),
v.value(N''legis-num[1]'', N''nvarchar(max)''),
v.value(N''vote-question[1]'', N''nvarchar(max)''),
v.value(N''vote-type[1]'', N''nvarchar(max)''),
v.value(N''vote-result[1]'', N''nvarchar(max)''),
v.value(N''action-date[1]'', N''nvarchar(max)''),
v.value(N''action-time[1]'', N''nvarchar(max)''),
v.value(N''vote-desc[1]'', N''nvarchar(max)''),
@xml
FROM
@xml.nodes(N''/rollcall-vote/vote-metadata'') AS A(v);
';
BEGIN TRY
EXEC(@command);
END TRY
BEGIN CATCH
PRINT ERROR_MESSAGE()
END CATCH;
SET @Counter = @Counter + 1;
END
SELECT * FROM dbo.staagingTable;
GO
DROP TABLE dbo.staagingTable;
我需要的是解决这个问题的方法,它可以一次处理所有文件。由于文件不存在而无法批量加载的错误不是问题,因为这些文件实际上不存在。在下图中,重要的错误旁边有一个紫色符号。这些并不是唯一的。有数百个。这些只是我滚动到的一些。您在下面看到的两种类型的错误是执行我的查询时唯一产生的错误。
这些文件都包含相同的列、相同的数据类型,但列的值不同。下面是这样一个 xml 文档的婴儿版本。
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE rollcall-vote PUBLIC "-//US Congress//DTDs/vote v1.0 20031119 //EN" "http://clerk.house.gov/evs/vote.dtd">
<?xml-stylesheet type="text/xsl" href="http://clerk.house.gov/evs/vote.xsl"?>
<rollcall-vote>
<vote-metadata>
<majority>R</majority>
<congress>114</congress>
<session>1st</session>
<chamber>U.S. House of Representatives</chamber>
<rollcall-num>6</rollcall-num>
<legis-num>H RES 5</legis-num>
<vote-question>On Agreeing to the Resolution</vote-question>
<vote-type>YEA-AND-NAY</vote-type>
<vote-result>Passed</vote-result>
<action-date>6-Jan-2015</action-date>
<action-time time-etz="17:30">5:30 PM</action-time>
<vote-desc>Adopting rules for the One Hundred Fourteenth Congress</vote-desc>
<vote-totals>
<totals-by-party-header>
<party-header>Party</party-header>
<yea-header>Yeas</yea-header>
<nay-header>Nays</nay-header>
<present-header>Answered “Presentâ€</present-header>
<not-voting-header>Not Voting</not-voting-header>
</totals-by-party-header>
<totals-by-party>
<party>Republican</party>
<yea-total>234</yea-total>
<nay-total>4</nay-total>
<present-total>1</present-total>
<not-voting-total>3</not-voting-total>
</totals-by-party>
<totals-by-party>
<party>Democratic</party>
<yea-total>0</yea-total>
<nay-total>168</nay-total>
<present-total>0</present-total>
<not-voting-total>5</not-voting-total>
</totals-by-party>
<totals-by-party>
<party>Independent</party>
<yea-total>0</yea-total>
<nay-total>0</nay-total>
<present-total>0</present-total>
<not-voting-total>0</not-voting-total>
</totals-by-party>
<totals-by-vote>
<total-stub>Totals</total-stub>
<yea-total>234</yea-total>
<nay-total>172</nay-total>
<present-total>1</present-total>
<not-voting-total>8</not-voting-total>
</totals-by-vote>
</vote-totals>
</vote-metadata>
<vote-data>
<recorded-vote>
<legislator name-id="A000374" sort-field="Abraham" unaccented-name="Abraham" party="R" state="LA" role="legislator">Abraham</legislator>
<vote>Yea</vote>
</recorded-vote>
<recorded-vote>
<legislator name-id="A000370" sort-field="Adams" unaccented-name="Adams" party="D" state="NC" role="legislator">Adams</legislator>
<vote>Nay</vote>
</recorded-vote>
<recorded-vote>
<legislator name-id="A000055" sort-field="Aderholt" unaccented-name="Aderholt" party="R" state="AL" role="legislator">Aderholt</legislator>
<vote>Yea</vote>
</recorded-vote>
</vote-data>
</rollcall-vote>
只需将您的阅读部分更改为:
--DECLARE @xmlString NVARCHAR(MAX)= --try one of these...
DECLARE @xmlString VARCHAR(MAX)=
(
SELECT BulkColumn
FROM OPENROWSET (BULK ''C:\Whosebug\xml' + REPLACE(STR(@Counter,3),' ','0') + '.xml'', SINGLE_BLOB) AS c
);
SET @xmlString=SUBSTRING(@xmlString,CHARINDEX(''<rollcall-vote>'',@xmlString,1),9999999);
DECLARE @xml XML=CAST(@xmlString AS XML);
这将首先将您的文件读入一个普通的字符串变量,然后切断所有内容,直到 <rollcall-vote>
。这个清理后的字符串可以转换为 XML。其余的应该像以前一样工作。
我对此很陌生,所以请多多包涵。我试图一次性将许多 XML 文件传输到 Microsoft SQL 服务器。不幸的是,我收到一条错误消息,提示我需要使用样式选项 2 进行转换。我不知道如何对这么多文件执行此操作。
这是我的查询:
CREATE TABLE [dbo].[staagingTable]
(
[Counter] INT NOT NULL,
[majority] [nvarchar](max) NULL,
[congress] [int] NULL,
[session] [nvarchar](max) NULL,
[chamber] [nvarchar](max) NULL,
[rollcall-num] [int] NULL,
[legis-num] [nvarchar](max) NULL,
[vote-question] [nvarchar](max) NULL,
[vote-type] [nvarchar](max) NULL,
[vote-result] [nvarchar](max) NULL,
[action-date] [nvarchar](max) NULL,
[action-time] [nvarchar](max) NULL,
[vote-desc] [nvarchar](max) NULL,
[sourceXML] [XML] NULL
);
GO
DECLARE @Counter INT=1;
DECLARE @command VARCHAR(MAX);
WHILE @Counter<800
BEGIN
SET @command=
'
DECLARE @xml XML=
(
SELECT BulkColumn
FROM OPENROWSET (BULK ''C:\Users\Owner\Documents\congress\House votes4 congress 2015\Passage\roll' + REPLACE(STR(@Counter,3),' ','0') + '.xml'', SINGLE_BLOB) AS c
);
INSERT INTO dbo.staagingTable(Counter, majority, congress,[session], chamber, [rollcall-num], [legis-num], [vote-question], [vote-type], [vote-result], [action-date], [action-time], [vote-desc], [sourceXML])
SELECT
' + CAST(@Counter AS VARCHAR(10)) + ',
v.value(N''majority[1]'', N''nvarchar(max)''),
v.value(N''congress[1]'', N''int''),
v.value(N''session[1]'', N''nvarchar(max)''),
v.value(N''chamber[1]'', N''nvarchar(max)''),
v.value(N''rollcall-num[1]'', N''int''),
v.value(N''legis-num[1]'', N''nvarchar(max)''),
v.value(N''vote-question[1]'', N''nvarchar(max)''),
v.value(N''vote-type[1]'', N''nvarchar(max)''),
v.value(N''vote-result[1]'', N''nvarchar(max)''),
v.value(N''action-date[1]'', N''nvarchar(max)''),
v.value(N''action-time[1]'', N''nvarchar(max)''),
v.value(N''vote-desc[1]'', N''nvarchar(max)''),
@xml
FROM
@xml.nodes(N''/rollcall-vote/vote-metadata'') AS A(v);
';
BEGIN TRY
EXEC(@command);
END TRY
BEGIN CATCH
PRINT ERROR_MESSAGE()
END CATCH;
SET @Counter = @Counter + 1;
END
SELECT * FROM dbo.staagingTable;
GO
DROP TABLE dbo.staagingTable;
我需要的是解决这个问题的方法,它可以一次处理所有文件。由于文件不存在而无法批量加载的错误不是问题,因为这些文件实际上不存在。在下图中,重要的错误旁边有一个紫色符号。这些并不是唯一的。有数百个。这些只是我滚动到的一些。您在下面看到的两种类型的错误是执行我的查询时唯一产生的错误。
这些文件都包含相同的列、相同的数据类型,但列的值不同。下面是这样一个 xml 文档的婴儿版本。
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE rollcall-vote PUBLIC "-//US Congress//DTDs/vote v1.0 20031119 //EN" "http://clerk.house.gov/evs/vote.dtd">
<?xml-stylesheet type="text/xsl" href="http://clerk.house.gov/evs/vote.xsl"?>
<rollcall-vote>
<vote-metadata>
<majority>R</majority>
<congress>114</congress>
<session>1st</session>
<chamber>U.S. House of Representatives</chamber>
<rollcall-num>6</rollcall-num>
<legis-num>H RES 5</legis-num>
<vote-question>On Agreeing to the Resolution</vote-question>
<vote-type>YEA-AND-NAY</vote-type>
<vote-result>Passed</vote-result>
<action-date>6-Jan-2015</action-date>
<action-time time-etz="17:30">5:30 PM</action-time>
<vote-desc>Adopting rules for the One Hundred Fourteenth Congress</vote-desc>
<vote-totals>
<totals-by-party-header>
<party-header>Party</party-header>
<yea-header>Yeas</yea-header>
<nay-header>Nays</nay-header>
<present-header>Answered “Presentâ€</present-header>
<not-voting-header>Not Voting</not-voting-header>
</totals-by-party-header>
<totals-by-party>
<party>Republican</party>
<yea-total>234</yea-total>
<nay-total>4</nay-total>
<present-total>1</present-total>
<not-voting-total>3</not-voting-total>
</totals-by-party>
<totals-by-party>
<party>Democratic</party>
<yea-total>0</yea-total>
<nay-total>168</nay-total>
<present-total>0</present-total>
<not-voting-total>5</not-voting-total>
</totals-by-party>
<totals-by-party>
<party>Independent</party>
<yea-total>0</yea-total>
<nay-total>0</nay-total>
<present-total>0</present-total>
<not-voting-total>0</not-voting-total>
</totals-by-party>
<totals-by-vote>
<total-stub>Totals</total-stub>
<yea-total>234</yea-total>
<nay-total>172</nay-total>
<present-total>1</present-total>
<not-voting-total>8</not-voting-total>
</totals-by-vote>
</vote-totals>
</vote-metadata>
<vote-data>
<recorded-vote>
<legislator name-id="A000374" sort-field="Abraham" unaccented-name="Abraham" party="R" state="LA" role="legislator">Abraham</legislator>
<vote>Yea</vote>
</recorded-vote>
<recorded-vote>
<legislator name-id="A000370" sort-field="Adams" unaccented-name="Adams" party="D" state="NC" role="legislator">Adams</legislator>
<vote>Nay</vote>
</recorded-vote>
<recorded-vote>
<legislator name-id="A000055" sort-field="Aderholt" unaccented-name="Aderholt" party="R" state="AL" role="legislator">Aderholt</legislator>
<vote>Yea</vote>
</recorded-vote>
</vote-data>
</rollcall-vote>
只需将您的阅读部分更改为:
--DECLARE @xmlString NVARCHAR(MAX)= --try one of these...
DECLARE @xmlString VARCHAR(MAX)=
(
SELECT BulkColumn
FROM OPENROWSET (BULK ''C:\Whosebug\xml' + REPLACE(STR(@Counter,3),' ','0') + '.xml'', SINGLE_BLOB) AS c
);
SET @xmlString=SUBSTRING(@xmlString,CHARINDEX(''<rollcall-vote>'',@xmlString,1),9999999);
DECLARE @xml XML=CAST(@xmlString AS XML);
这将首先将您的文件读入一个普通的字符串变量,然后切断所有内容,直到 <rollcall-vote>
。这个清理后的字符串可以转换为 XML。其余的应该像以前一样工作。