解析 python 中的 XML 并将其添加到数据库
Parse XML in python and add it to database
我正在尝试编写一个 Python 脚本来遍历行并将它们放入我的数据库中
这是我的xml的结构:
Root>
-<SvcNf>
-<PersonNf>
-<PersonList>
-<Row>
<SysName>MI6</SysName>
<ServerDt>2016-10-28 03:00:12 +03:00</ServerDt>
<UID>9457A55E17341AA7ASDEDS057A8BFFF3</UID>
<PersID>007</PersID>
<Emp_name>James Bond</Emp_name>
<EventID>25</EventID>
<EventTXT>Drinking alcohol</EventTXT>
<CauseEventID>03</CauseEventID>
<CauseEventTXT>Martini with vodka</CauseEventTXT>
<EventBegda>2017-10-18</EventBegda>
<EventEndda>2017-10-18</EventEndda>
<AccrualsSum>171.0</AccrualsSum>
<AccrualsProz>0.0</AccrualsProz>
<AccrualsName>Chinees_</AccrualsName>
<OrderNum>P-336</OrderNum>
<Perg>0</Persg>
<Perk>15</Persk>
<Awart/>
</Row>
-<Row>
.....
</Row>
<Row/>
</PersonList>
</PersonNf>
</SvcNf>
</Root>
所以,当我使用这段代码解析 XML:
ResultSet_Py_List = []
root = ET.parse(events)
nodes = root.findall('.//Row')
for node in nodes:
for child in node:
ResultSet_Py_List.append(child.text)
我想要的标称行值是
['MI6', '2016-10-28 03:00:12 +03:00', '9457A55E17341AA7ASDEDS057A8BFFF3' 等]
[Row2]
[第 3 行]
现在,它是这样的:
[Row1,Row2,Row3]
[编辑]
所有字符都像 Ф
.
治愈它:
tostring(doc.getroot(), 'unicode')
[编辑]
我使用了第二个示例,但现在出现 Oracle 错误:ORA-01704:字符串文字太长。
考虑使用 Oracle 的 XML 处理程序并避免任何嵌套循环:
SELECT e.SysName, e.ServerDt, e."UID", e.PersID, e.Emp_name, e.EventID, e.EventTXT,
e.CauseEventID, e.CauseEventTXT, e.EventBegda, e.EventEndda,
e.AccrualsSum, e.AccrualsProz, e.OrderNum, e.Perg, e.Perk, e.Awart
FROM XMLTABLE('/Root/SvcNf/PersonNf/PersonList/Row'
PASSING XMLTYPE('<Root>
<SvcNf>
<PersonNf>
<PersonList>
<Row>
<SysName>MI6</SysName>
<ServerDt>2016-10-28 03:00:12 +03:00</ServerDt>
<UID>9457A55E17341AA7ASDEDS057A8BFFF3</UID>
<PersID>007</PersID>
<Emp_name>James Bond</Emp_name>
<EventID>25</EventID>
<EventTXT>Drinking alcohol</EventTXT>
<CauseEventID>03</CauseEventID>
<CauseEventTXT>Martini with vodka</CauseEventTXT>
<EventBegda>2017-10-18</EventBegda>
<EventEndda>2017-10-18</EventEndda>
<AccrualsSum>171.0</AccrualsSum>
<AccrualsProz>0.0</AccrualsProz>
<AccrualsName>Chinees_</AccrualsName>
<OrderNum>P-336</OrderNum>
<Perg>0</Perg>
<Perk>15</Perk>
<Awart/>
</Row>
</PersonList>
</PersonNf>
</SvcNf>
</Root>')
COLUMNS
SysName VARCHAR2(25) PATH 'SysName',
ServerDt VARCHAR2(25) PATH 'ServerDt',
"UID" VARCHAR2(25) PATH 'UID',
PersID VARCHAR2(25) PATH 'PersID',
Emp_name VARCHAR2(25) PATH 'Emp_name',
EventID NUMBER PATH 'EventID',
EventTXT VARCHAR2(25) PATH 'EventTXT',
CauseEventID VARCHAR2(25) PATH 'CauseEventID',
CauseEventTXT VARCHAR2(25) PATH 'CauseEventTXT',
EventBegda VARCHAR2(25) PATH 'EventBegda',
EventEndda VARCHAR2(25) PATH 'EventEndda',
AccrualsSum VARCHAR2(25) PATH 'AccrualsSum',
AccrualsProz VARCHAR2(25) PATH 'AccrualsProz',
OrderNum VARCHAR2(25) PATH 'OrderNum',
Perg NUMBER PATH 'Perg',
Perk NUMBER PATH 'Perk',
Awart VARCHAR2(25) PATH 'Awart') AS e;
Rextester 演示
要在 Python 数据库游标中实施,例如使用 cx_Oracle 进行追加查询:
sql = """INSERT INTO mytable (Col1, Col2, Col3, ...)
SELECT ...same as above...
FROM XMLTABLE('/Root/SvcNf/PersonNf/PersonList/Row'
PASSING XMLTYPE(:i_param)
COLUMNS
...same as above...) AS e
"""
# PARSE XML FILE
doc = ET.parse(events)
xmlstr = ET.tostring(doc.getroot()).decode('utf-8')
# PASS XML STRING AS PARAMETER
cur.execute(sql, {'i_param':xmlstr})
dbconn.commit()
对于超过 Oracle 在 SQL 中的 4,000 字节的非常大的 XML 内容,请使用 PL/SQL,varchar2 限制为 32,767 字节,如 @NickS 所示:
sql = """
DECLARE
xml_value varchar2(32767);
BEGIN
xml_value := :i_param;
INSERT INTO mytable (Col1, Col2, Col3, ...)
SELECT ...same as above...
FROM XMLTABLE('/Root/SvcNf/PersonNf/PersonList/Row'
PASSING XMLTYPE(xml_value)
COLUMNS
...same as above...) AS e
commit;
END;
"""
# PARSE XML FILE
doc = ET.parse(events)
xmlstr = ET.tostring(doc.getroot()).decode('utf-8')
# PASS XML STRING AS PARAMETER
cur.execute(sql, {'i_param':xmlstr})
dbconn.commit()
要使用原始设置,只需添加一个 inner 列表,然后附加到更大的 ResultSet_Py_List,其中每个 <Row>
保存在嵌套列表中用于数据库插入。
import xml.etree.ElementTree as et
doc = et.parse('Source.xml')
nodes = doc.findall('.//Row')
ResultSet_Py_List = []
for node in nodes:
inner = []
for child in node:
inner.append(child.text)
ResultSet_Py_List.append(inner)
# ALTERNATIVELY WITH NESTED LIST COMPREHENSION
ResultSet_Py_List = [[child.text for child in node] for node in nodes]
然后 运行 cx_Oracle 的 executemany
其中参数占位符与列表项的长度相等:
cursor.prepare("INSERT INTO myTable (Col1, Col2, Col3, ...) VALUES (:1, :2, :3, ...)")
cursor.executemany(None, ResultSet_Py_List)
db.commit()
也许有人会有所帮助。它的初学者答案:)
我使用 unicode(俄罗斯符号)工作,但我不知道为什么 xml 不能用 正常解析。
当我尝试做 时,我的内核和 python 都死了。
所以,我从 XML 做 CSV 并用 将它插入我的数据库
import pandas as pd
import xml.etree.ElementTree as ET
import cx_Oracle
import csv
doc = ET.parse(events)
nodes = doc.findall('.//Row')
#RANGE(num) - COLUMN COUNTS
(SysName, ServerDt, UIDS, EmplID, ....T) = (list() for i in range(18))
for node in nodes:
for elem in node.findall("*"):
try:
if elem.tag == "SysName":
SysName.append(elem.text)
if elem.tag == "UID":
UIDS.append(elem.text)
if elem.tag == "ServerDt":
ServerDt.append(elem.text)
if elem.tag == "PersID":
PersID.append(elem.text)
...
except AttributeError:
print(elem.tag)
#ER nonetype' object has no attribute 'text' python &
#ER arrays must all be same length
s1 = pd.Series(SysName)
s2 = pd.Series(ServerDt)
s3 = pd.Series(UIDS)
.....
#TEMPORY CSV. DONT KNOW WHY, BUT COLUMNS ARE MIXED WITH THIS ITERATION
df= pd.DataFrame({"SysName": s1,
"ServerDt": s2,
"UIDS": s3,
"PersID": s4,
"Emp_name": s5,
"EVENTID": s6,
...})
file_name = 'events.csv'
df.to_csv(file_name, sep='\t')
print("File name: ", my_file.name, "created")
ResultSet_Py_List = []
ora_conn = cx_Oracle.connect('login/pass@TNSNAME')
ora_cursor = ora_conn.cursor()
my_file = open(file_name, 'r', newline='')
#PANDA ER: 'utf8' codec can't decode byte 0xe9 in position 10: invalid continuation byte
reader = csv.reader(my_file, dialect='excel', delimiter='\t' )
for index, row in enumerate(reader):
#Without header CSV
if index > 0:
try:
ResultSet_Py_List.append(row)
except AttributeError:
pass
print(str(len(ResultSet_Py_List)) + ' Records from Source')
sql_del = """delete from HR.EVENTS_TST"""
ora_cursor.execute(sql_del)
ora_cursor.execute("commit")
print("Table is clean")
#COLUMNS ARE MIXED IN CSV ITERATION
sql_insert = """
INSERT INTO HR.EVENTS_TST (ROW_NUM
,ACCRUALSNAME
, ACCRUALSPROZ
, ACCRUALSSUM
, AWART
, CAUSEEVENTID
...)
VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14
,:15,:16,:17,:18, :19)
"""
ora_cursor.prepare(sql_insert)
ora_cursor.executemany(None, ResultSet_Py_List)
ora_conn.commit()
ora_cursor.execute("commit")
print("Data imported")
我正在尝试编写一个 Python 脚本来遍历行并将它们放入我的数据库中
这是我的xml的结构:
Root>
-<SvcNf>
-<PersonNf>
-<PersonList>
-<Row>
<SysName>MI6</SysName>
<ServerDt>2016-10-28 03:00:12 +03:00</ServerDt>
<UID>9457A55E17341AA7ASDEDS057A8BFFF3</UID>
<PersID>007</PersID>
<Emp_name>James Bond</Emp_name>
<EventID>25</EventID>
<EventTXT>Drinking alcohol</EventTXT>
<CauseEventID>03</CauseEventID>
<CauseEventTXT>Martini with vodka</CauseEventTXT>
<EventBegda>2017-10-18</EventBegda>
<EventEndda>2017-10-18</EventEndda>
<AccrualsSum>171.0</AccrualsSum>
<AccrualsProz>0.0</AccrualsProz>
<AccrualsName>Chinees_</AccrualsName>
<OrderNum>P-336</OrderNum>
<Perg>0</Persg>
<Perk>15</Persk>
<Awart/>
</Row>
-<Row>
.....
</Row>
<Row/>
</PersonList>
</PersonNf>
</SvcNf>
</Root>
所以,当我使用这段代码解析 XML:
ResultSet_Py_List = []
root = ET.parse(events)
nodes = root.findall('.//Row')
for node in nodes:
for child in node:
ResultSet_Py_List.append(child.text)
我想要的标称行值是
['MI6', '2016-10-28 03:00:12 +03:00', '9457A55E17341AA7ASDEDS057A8BFFF3' 等]
[Row2]
[第 3 行]
现在,它是这样的: [Row1,Row2,Row3]
[编辑]
所有字符都像 Ф
.
治愈它:
tostring(doc.getroot(), 'unicode')
[编辑]
我使用了第二个示例,但现在出现 Oracle 错误:ORA-01704:字符串文字太长。
考虑使用 Oracle 的 XML 处理程序并避免任何嵌套循环:
SELECT e.SysName, e.ServerDt, e."UID", e.PersID, e.Emp_name, e.EventID, e.EventTXT,
e.CauseEventID, e.CauseEventTXT, e.EventBegda, e.EventEndda,
e.AccrualsSum, e.AccrualsProz, e.OrderNum, e.Perg, e.Perk, e.Awart
FROM XMLTABLE('/Root/SvcNf/PersonNf/PersonList/Row'
PASSING XMLTYPE('<Root>
<SvcNf>
<PersonNf>
<PersonList>
<Row>
<SysName>MI6</SysName>
<ServerDt>2016-10-28 03:00:12 +03:00</ServerDt>
<UID>9457A55E17341AA7ASDEDS057A8BFFF3</UID>
<PersID>007</PersID>
<Emp_name>James Bond</Emp_name>
<EventID>25</EventID>
<EventTXT>Drinking alcohol</EventTXT>
<CauseEventID>03</CauseEventID>
<CauseEventTXT>Martini with vodka</CauseEventTXT>
<EventBegda>2017-10-18</EventBegda>
<EventEndda>2017-10-18</EventEndda>
<AccrualsSum>171.0</AccrualsSum>
<AccrualsProz>0.0</AccrualsProz>
<AccrualsName>Chinees_</AccrualsName>
<OrderNum>P-336</OrderNum>
<Perg>0</Perg>
<Perk>15</Perk>
<Awart/>
</Row>
</PersonList>
</PersonNf>
</SvcNf>
</Root>')
COLUMNS
SysName VARCHAR2(25) PATH 'SysName',
ServerDt VARCHAR2(25) PATH 'ServerDt',
"UID" VARCHAR2(25) PATH 'UID',
PersID VARCHAR2(25) PATH 'PersID',
Emp_name VARCHAR2(25) PATH 'Emp_name',
EventID NUMBER PATH 'EventID',
EventTXT VARCHAR2(25) PATH 'EventTXT',
CauseEventID VARCHAR2(25) PATH 'CauseEventID',
CauseEventTXT VARCHAR2(25) PATH 'CauseEventTXT',
EventBegda VARCHAR2(25) PATH 'EventBegda',
EventEndda VARCHAR2(25) PATH 'EventEndda',
AccrualsSum VARCHAR2(25) PATH 'AccrualsSum',
AccrualsProz VARCHAR2(25) PATH 'AccrualsProz',
OrderNum VARCHAR2(25) PATH 'OrderNum',
Perg NUMBER PATH 'Perg',
Perk NUMBER PATH 'Perk',
Awart VARCHAR2(25) PATH 'Awart') AS e;
Rextester 演示
要在 Python 数据库游标中实施,例如使用 cx_Oracle 进行追加查询:
sql = """INSERT INTO mytable (Col1, Col2, Col3, ...)
SELECT ...same as above...
FROM XMLTABLE('/Root/SvcNf/PersonNf/PersonList/Row'
PASSING XMLTYPE(:i_param)
COLUMNS
...same as above...) AS e
"""
# PARSE XML FILE
doc = ET.parse(events)
xmlstr = ET.tostring(doc.getroot()).decode('utf-8')
# PASS XML STRING AS PARAMETER
cur.execute(sql, {'i_param':xmlstr})
dbconn.commit()
对于超过 Oracle 在 SQL 中的 4,000 字节的非常大的 XML 内容,请使用 PL/SQL,varchar2 限制为 32,767 字节,如
sql = """
DECLARE
xml_value varchar2(32767);
BEGIN
xml_value := :i_param;
INSERT INTO mytable (Col1, Col2, Col3, ...)
SELECT ...same as above...
FROM XMLTABLE('/Root/SvcNf/PersonNf/PersonList/Row'
PASSING XMLTYPE(xml_value)
COLUMNS
...same as above...) AS e
commit;
END;
"""
# PARSE XML FILE
doc = ET.parse(events)
xmlstr = ET.tostring(doc.getroot()).decode('utf-8')
# PASS XML STRING AS PARAMETER
cur.execute(sql, {'i_param':xmlstr})
dbconn.commit()
要使用原始设置,只需添加一个 inner 列表,然后附加到更大的 ResultSet_Py_List,其中每个 <Row>
保存在嵌套列表中用于数据库插入。
import xml.etree.ElementTree as et
doc = et.parse('Source.xml')
nodes = doc.findall('.//Row')
ResultSet_Py_List = []
for node in nodes:
inner = []
for child in node:
inner.append(child.text)
ResultSet_Py_List.append(inner)
# ALTERNATIVELY WITH NESTED LIST COMPREHENSION
ResultSet_Py_List = [[child.text for child in node] for node in nodes]
然后 运行 cx_Oracle 的 executemany
其中参数占位符与列表项的长度相等:
cursor.prepare("INSERT INTO myTable (Col1, Col2, Col3, ...) VALUES (:1, :2, :3, ...)")
cursor.executemany(None, ResultSet_Py_List)
db.commit()
也许有人会有所帮助。它的初学者答案:)
我使用 unicode(俄罗斯符号)工作,但我不知道为什么 xml 不能用
所以,我从 XML 做 CSV 并用
import pandas as pd
import xml.etree.ElementTree as ET
import cx_Oracle
import csv
doc = ET.parse(events)
nodes = doc.findall('.//Row')
#RANGE(num) - COLUMN COUNTS
(SysName, ServerDt, UIDS, EmplID, ....T) = (list() for i in range(18))
for node in nodes:
for elem in node.findall("*"):
try:
if elem.tag == "SysName":
SysName.append(elem.text)
if elem.tag == "UID":
UIDS.append(elem.text)
if elem.tag == "ServerDt":
ServerDt.append(elem.text)
if elem.tag == "PersID":
PersID.append(elem.text)
...
except AttributeError:
print(elem.tag)
#ER nonetype' object has no attribute 'text' python &
#ER arrays must all be same length
s1 = pd.Series(SysName)
s2 = pd.Series(ServerDt)
s3 = pd.Series(UIDS)
.....
#TEMPORY CSV. DONT KNOW WHY, BUT COLUMNS ARE MIXED WITH THIS ITERATION
df= pd.DataFrame({"SysName": s1,
"ServerDt": s2,
"UIDS": s3,
"PersID": s4,
"Emp_name": s5,
"EVENTID": s6,
...})
file_name = 'events.csv'
df.to_csv(file_name, sep='\t')
print("File name: ", my_file.name, "created")
ResultSet_Py_List = []
ora_conn = cx_Oracle.connect('login/pass@TNSNAME')
ora_cursor = ora_conn.cursor()
my_file = open(file_name, 'r', newline='')
#PANDA ER: 'utf8' codec can't decode byte 0xe9 in position 10: invalid continuation byte
reader = csv.reader(my_file, dialect='excel', delimiter='\t' )
for index, row in enumerate(reader):
#Without header CSV
if index > 0:
try:
ResultSet_Py_List.append(row)
except AttributeError:
pass
print(str(len(ResultSet_Py_List)) + ' Records from Source')
sql_del = """delete from HR.EVENTS_TST"""
ora_cursor.execute(sql_del)
ora_cursor.execute("commit")
print("Table is clean")
#COLUMNS ARE MIXED IN CSV ITERATION
sql_insert = """
INSERT INTO HR.EVENTS_TST (ROW_NUM
,ACCRUALSNAME
, ACCRUALSPROZ
, ACCRUALSSUM
, AWART
, CAUSEEVENTID
...)
VALUES (:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14
,:15,:16,:17,:18, :19)
"""
ora_cursor.prepare(sql_insert)
ora_cursor.executemany(None, ResultSet_Py_List)
ora_conn.commit()
ora_cursor.execute("commit")
print("Data imported")