在将 XML 数据存储到 SQL 的过程中,在 SQL 中使用 OPENXML 查询 XML 文件
Querying XML file with OPENXML in SQL in the process of storing XML data to SQL
我正在使用 IRS -900 税务文件
https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml
使用 SQL OPENXML
创建包含所有元素、属性及其关联值的单个 table。我建立查询只是为了看看我是否能得到如下所示的结果。但我只得到一个空的 table.
我还尝试使用在线实用程序创建 xpath
引用或文档的 XML 树来识别这个长 XML 文件中的元素和属性。
请推荐任何简单的工具来轻松列出所有元素和属性,因为我认为 xpath
参考是问题所在。
这是我的代码
--在 sql 服务器中为 xml 文档创建了一个 table
--示例 XML: https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml
USE [IRS-900]
GO
CREATE TABLE [200931393493000150_public] (
Id INT IDENTITY PRIMARY KEY
,XMLData XML
,LoadedDateTime DATETIME
)
INSERT INTO [200931393493000150_public] (
XMLData
,LoadedDateTime
)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn
,GETDATE()
FROM OPENROWSET(BULK 'C:\Users0931393493000150_public.xml', SINGLE_BLOB) AS x;
--select * from [dbo].[200931393493000150_public]
DECLARE @x XML -- to load the binary file to a readable xml document in sql
SELECT @x = p
FROM
-- to load the document I used openrowset
openrowset(BULK 'C:\Users0931393493000150_public.xml', single_blob) AS [200931393493000150_public](p)
DECLARE @hdoc INT -- out put parameter
EXEC sp_xml_preparedocument @hDoc OUTPUT
,@x
SELECT *
--into table x - will load the result into sql table on the fly
-- xpath to the element , 1 is for attributes and 2 for elements
--from openxml(@hdoc, '//*',1)
--from openxml(@hdoc, '/root//*',1)
--from openxml(@hdoc, '/root//*',1)
--with specifies columns to be retreived
FROM openxml(@hdoc, '/root/returnheader/Filer/USAddress', 2)
-- flag 1 for attributes
-- flag 2 for elements
WITH (
EIN CHAR(10) '/../../@EIN'
,-- to refer the xpath for the attribute
AddressLine1 VARCHAR(50)
,AddressLine2 INT
,City VARCHAR(50)
,STATE VARCHAR(50)
,ZIPCode CHAR(5)
)
--catch release
EXEC sp_xml_removedocument @hDoc
GO
-- I have got empty table with the corresponding 6 columns
Microsoft 专有 OPENXML
及其伙伴 sp_xml_preparedocument
和 sp_xml_removedocument
大部分保留只是为了与过时的 SQL Server 2000 向后兼容。
从 SQL Server 2005 开始,最好使用 XQuery 方法 .nodes()
和 .value()
来实现您的需要。
SQL
DECLARE @targetTable TABLE
(
ID INT IDENTITY PRIMARY KEY,
EIN CHAR(10),
AddressLine1 VARCHAR(50),
AddressLine2 VARCHAR(50),
City VARCHAR(50),
State CHAR(2),
ZIPCode CHAR(5)
);
-- directly from the XML file as a virtual DB table on the file system
;WITH XMLNAMESPACES (DEFAULT 'http://www.irs.gov/efile')
, rs (xmldata) AS
(
SELECT TRY_CAST(BulkColumn AS XML) AS BulkColumn
FROM OPENROWSET(BULK 'e:\Temp0931393493000150_public.xml', SINGLE_BLOB) AS x
)
INSERT INTO @targetTable
SELECT c.value('(EIN/text())[1]', 'CHAR(10)') AS EIN
, c.value('(USAddress/AddressLine1/text())[1]','VARCHAR(50)') AS AddressLine1
, c.value('(USAddress/AddressLine2/text())[1]','VARCHAR(50)') AS AddressLine2
, c.value('(USAddress/City/text())[1]','VARCHAR(100)') AS City
, c.value('(USAddress/State/text())[1]','CHAR(2)') AS State
, c.value('(USAddress/ZIPCode/text())[1]','CHAR(5)') AS ZIPCode
FROM rs AS tbl
CROSS APPLY tbl.xmldata.nodes('/Return/ReturnHeader/Filer') AS t(c);
-- test
SELECT * FROM @targetTable;
输出
+----+------------+-------------------------------------+--------------+------------+-------+---------+
| ID | EIN | AddressLine1 | AddressLine2 | City | State | ZIPCode |
+----+------------+-------------------------------------+--------------+------------+-------+---------+
| 1 | 541599550 | c/o Piascik Assoc PC 4470 Cox Rd No | 250 | Glen Allen | VA | 23060 |
+----+------------+-------------------------------------+--------------+------------+-------+---------+
我正在使用 IRS -900 税务文件
https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml
使用 SQL OPENXML
创建包含所有元素、属性及其关联值的单个 table。我建立查询只是为了看看我是否能得到如下所示的结果。但我只得到一个空的 table.
我还尝试使用在线实用程序创建 xpath
引用或文档的 XML 树来识别这个长 XML 文件中的元素和属性。
请推荐任何简单的工具来轻松列出所有元素和属性,因为我认为 xpath
参考是问题所在。
这是我的代码
--在 sql 服务器中为 xml 文档创建了一个 table --示例 XML: https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml
USE [IRS-900]
GO
CREATE TABLE [200931393493000150_public] (
Id INT IDENTITY PRIMARY KEY
,XMLData XML
,LoadedDateTime DATETIME
)
INSERT INTO [200931393493000150_public] (
XMLData
,LoadedDateTime
)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn
,GETDATE()
FROM OPENROWSET(BULK 'C:\Users0931393493000150_public.xml', SINGLE_BLOB) AS x;
--select * from [dbo].[200931393493000150_public]
DECLARE @x XML -- to load the binary file to a readable xml document in sql
SELECT @x = p
FROM
-- to load the document I used openrowset
openrowset(BULK 'C:\Users0931393493000150_public.xml', single_blob) AS [200931393493000150_public](p)
DECLARE @hdoc INT -- out put parameter
EXEC sp_xml_preparedocument @hDoc OUTPUT
,@x
SELECT *
--into table x - will load the result into sql table on the fly
-- xpath to the element , 1 is for attributes and 2 for elements
--from openxml(@hdoc, '//*',1)
--from openxml(@hdoc, '/root//*',1)
--from openxml(@hdoc, '/root//*',1)
--with specifies columns to be retreived
FROM openxml(@hdoc, '/root/returnheader/Filer/USAddress', 2)
-- flag 1 for attributes
-- flag 2 for elements
WITH (
EIN CHAR(10) '/../../@EIN'
,-- to refer the xpath for the attribute
AddressLine1 VARCHAR(50)
,AddressLine2 INT
,City VARCHAR(50)
,STATE VARCHAR(50)
,ZIPCode CHAR(5)
)
--catch release
EXEC sp_xml_removedocument @hDoc
GO
-- I have got empty table with the corresponding 6 columns
Microsoft 专有 OPENXML
及其伙伴 sp_xml_preparedocument
和 sp_xml_removedocument
大部分保留只是为了与过时的 SQL Server 2000 向后兼容。
从 SQL Server 2005 开始,最好使用 XQuery 方法 .nodes()
和 .value()
来实现您的需要。
SQL
DECLARE @targetTable TABLE
(
ID INT IDENTITY PRIMARY KEY,
EIN CHAR(10),
AddressLine1 VARCHAR(50),
AddressLine2 VARCHAR(50),
City VARCHAR(50),
State CHAR(2),
ZIPCode CHAR(5)
);
-- directly from the XML file as a virtual DB table on the file system
;WITH XMLNAMESPACES (DEFAULT 'http://www.irs.gov/efile')
, rs (xmldata) AS
(
SELECT TRY_CAST(BulkColumn AS XML) AS BulkColumn
FROM OPENROWSET(BULK 'e:\Temp0931393493000150_public.xml', SINGLE_BLOB) AS x
)
INSERT INTO @targetTable
SELECT c.value('(EIN/text())[1]', 'CHAR(10)') AS EIN
, c.value('(USAddress/AddressLine1/text())[1]','VARCHAR(50)') AS AddressLine1
, c.value('(USAddress/AddressLine2/text())[1]','VARCHAR(50)') AS AddressLine2
, c.value('(USAddress/City/text())[1]','VARCHAR(100)') AS City
, c.value('(USAddress/State/text())[1]','CHAR(2)') AS State
, c.value('(USAddress/ZIPCode/text())[1]','CHAR(5)') AS ZIPCode
FROM rs AS tbl
CROSS APPLY tbl.xmldata.nodes('/Return/ReturnHeader/Filer') AS t(c);
-- test
SELECT * FROM @targetTable;
输出
+----+------------+-------------------------------------+--------------+------------+-------+---------+
| ID | EIN | AddressLine1 | AddressLine2 | City | State | ZIPCode |
+----+------------+-------------------------------------+--------------+------------+-------+---------+
| 1 | 541599550 | c/o Piascik Assoc PC 4470 Cox Rd No | 250 | Glen Allen | VA | 23060 |
+----+------------+-------------------------------------+--------------+------------+-------+---------+