使用 Elemttree 解析非阻塞 XML
Parsing non blocked XML with Elemttree
我正在尝试解析一些 XML 数据并将其放入嵌套字典中供以后使用。然而,由于 XML 数据不是结构化的,据我所知,它是子类型格式或阻塞数据格式,我不确定最佳方法。我目前正在尝试使用 XPath 并认为这是此数据结构的最佳选择,但我不确定如何以允许我将其正确添加到嵌套字典中的方式解析它。我试过下面的方法,但你可能已经猜到它是不正确的。有人可以就解析此数据的最佳方法提出建议吗?
要解析的数据
<address addr="192.168.1.74" addrtype="ipv4"/>
<address addr="FC:75:16:03:D0:2A" addrtype="mac" vendor="D-Link International"/>
<hostnames>
</hostnames>
<ports><extraports state="closed" count="994">
<extrareasons reason="resets" count="994"/>
</extraports>
<port protocol="tcp" portid="80"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="http" product="lighttpd" method="probed" conf="10"><cpe>cpe:/a:lighttpd:lighttpd</cpe></service></port>
<port protocol="tcp" portid="139"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="netbios-ssn" product="Samba smbd" version="3.X" extrainfo="workgroup: WORKGROUP" method="probed" conf="10"/></port>
<port protocol="tcp" portid="443"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="http" product="lighttpd" tunnel="ssl" method="probed" conf="10"><cpe>cpe:/a:lighttpd:lighttpd</cpe></service></port>
<port protocol="tcp" portid="445"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="netbios-ssn" product="Samba smbd" version="3.X" extrainfo="workgroup: WORKGROUP" method="probed" conf="10"/></port>
<port protocol="tcp" portid="515"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="printer" product="LPRng" extrainfo="Not authorized" method="probed" conf="10"/></port>
<port protocol="tcp" portid="3306"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="mysql" product="MySQL" extrainfo="unauthorized" method="probed" conf="10"><cpe>cpe:/a:mysql:mysql</cpe></service></port>
</ports>
<times srtt="16241" rttvar="1850" to="100000"/>
</host>
<host starttime="1443920156" endtime="1443920210"><status state="up" reason="arp-response" reason_ttl="0"/>
<address addr="192.168.1.126" addrtype="ipv4"/>
<address addr="00:0C:29:30:A1:C9" addrtype="mac" vendor="VMware"/>
<hostnames>
</hostnames>
<ports><extraports state="filtered" count="984">
<extrareasons reason="no-responses" count="984"/>
</extraports>
<port protocol="tcp" portid="53"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="domain" product="Microsoft DNS" version="6.1.7601" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="88"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="kerberos-sec" product="Windows 2003 Kerberos" extrainfo="server time: 2015-10-04 00:56:07Z" ostype="Windows" method="probed" conf="10"><cpe>cpe:/a:microsoft:kerberos</cpe><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="135"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="139"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="netbios-ssn" method="probed" conf="10"/></port>
<port protocol="tcp" portid="389"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ldap" method="probed" conf="10"/></port>
<port protocol="tcp" portid="445"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="netbios-ssn" method="probed" conf="10"/></port>
<port protocol="tcp" portid="464"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="kpasswd5" method="table" conf="3"/></port>
<port protocol="tcp" portid="593"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ncacn_http" product="Microsoft Windows RPC over HTTP" version="1.0" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="636"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="tcpwrapped" method="probed" conf="8"/></port>
<port protocol="tcp" portid="3268"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ldap" method="probed" conf="10"/></port>
<port protocol="tcp" portid="3269"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="tcpwrapped" method="probed" conf="8"/></port>
<port protocol="tcp" portid="49154"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49155"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49157"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ncacn_http" product="Microsoft Windows RPC over HTTP" version="1.0" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49158"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49161"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
</ports>
<times srtt="5951" rttvar="5849" to="100000"/>
</host>
<runstats><finished time="1443920210" timestr="Sun Oct 4 01:56:50 2015" elapsed="53.38" summary="Nmap done at Sun Oct 4 01:56:50 2015; 2 IP addresses (2 hosts up) scanned in 53.38 seconds" exit="success"/><hosts up="2" down="0" total="2"/>
</runstats>
</nmaprun>
当前尝试
from xml.etree import ElementTree
import os
nmap_file = 'Test.xml'
dictionary = {}
dictionary['host'] = {}
dictionary['host']['port'] = {}
dictionary['host']['port']['service'] = {}
with open(nmap_file, 'rt') as f:
tree = ElementTree.parse(f)
for node in tree.findall('.//address'):
if (node.attrib.get('addrtype') == 'ipv4'):
host = node.attrib.get('addr')
dictionary['host'] = host
for node in tree.findall('.//port'):
port = node.attrib.get('portid')
dictionary['host']['port'] = port
for node in tree.findall('.//service'):
product = node.attrib.get('product')
dictionary['host']['port']['service'] = product
print dictionary
考虑使用 lxml 模块、xpath 和源自嵌套字典的列表的稍微不同的方法。请参阅有关创建 nested dictionaries.
的信息教程
import lxml.etree as et
import os
nmap_file = 'Test.xml'
with open(nmap_file, 'rt') as f:
tree = et.parse(f)
dictionary = {}
hosts = tree.xpath('//host')
for i in range(1, len(hosts)+1):
hostlist = []
portlist = []
servicelist = []
addrnodes = tree.xpath("//host[{}]/address[@addrtype='ipv4']/@addr".format(i))
hostlist.append(addrnodes[0])
portnodes = tree.xpath("//host[{}]/ports/port[string-length(service/@product)>0]/@portid".format(i))
for pt in portnodes:
portlist.append(pt)
servicenodes = tree.xpath("//host[{}]/ports/port/service/@product".format(i))
for srv in servicenodes:
servicelist.append(srv)
for h in hostlist:
dictionary[h] = {}
for h in hostlist:
for p, s in zip(portlist, servicelist):
dictionary[h][p] = s
print(dictionary)
输出(请注意:只有列出服务产品的端口才会出现在字典中)
{'192.168.1.74': {'80': 'lighttpd',
'445': 'Samba smbd',
'139': 'Samba smbd',
'443': 'lighttpd',
'515': 'LPRng',
'3306': 'MySQL'}}
{'192.168.1.126': {'49161': 'Microsoft Windows RPC',
'135': 'Microsoft Windows RPC',
'53': 'Microsoft DNS',
'49157': 'Microsoft Windows RPC over HTTP',
'593': 'Microsoft Windows RPC over HTTP',
'49155': 'Microsoft Windows RPC',
'49158': 'Microsoft Windows RPC',
'88': 'Windows 2003 Kerberos',
'49154': 'Microsoft Windows RPC'}}
我正在尝试解析一些 XML 数据并将其放入嵌套字典中供以后使用。然而,由于 XML 数据不是结构化的,据我所知,它是子类型格式或阻塞数据格式,我不确定最佳方法。我目前正在尝试使用 XPath 并认为这是此数据结构的最佳选择,但我不确定如何以允许我将其正确添加到嵌套字典中的方式解析它。我试过下面的方法,但你可能已经猜到它是不正确的。有人可以就解析此数据的最佳方法提出建议吗?
要解析的数据
<address addr="192.168.1.74" addrtype="ipv4"/>
<address addr="FC:75:16:03:D0:2A" addrtype="mac" vendor="D-Link International"/>
<hostnames>
</hostnames>
<ports><extraports state="closed" count="994">
<extrareasons reason="resets" count="994"/>
</extraports>
<port protocol="tcp" portid="80"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="http" product="lighttpd" method="probed" conf="10"><cpe>cpe:/a:lighttpd:lighttpd</cpe></service></port>
<port protocol="tcp" portid="139"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="netbios-ssn" product="Samba smbd" version="3.X" extrainfo="workgroup: WORKGROUP" method="probed" conf="10"/></port>
<port protocol="tcp" portid="443"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="http" product="lighttpd" tunnel="ssl" method="probed" conf="10"><cpe>cpe:/a:lighttpd:lighttpd</cpe></service></port>
<port protocol="tcp" portid="445"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="netbios-ssn" product="Samba smbd" version="3.X" extrainfo="workgroup: WORKGROUP" method="probed" conf="10"/></port>
<port protocol="tcp" portid="515"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="printer" product="LPRng" extrainfo="Not authorized" method="probed" conf="10"/></port>
<port protocol="tcp" portid="3306"><state state="open" reason="syn-ack" reason_ttl="64"/><service name="mysql" product="MySQL" extrainfo="unauthorized" method="probed" conf="10"><cpe>cpe:/a:mysql:mysql</cpe></service></port>
</ports>
<times srtt="16241" rttvar="1850" to="100000"/>
</host>
<host starttime="1443920156" endtime="1443920210"><status state="up" reason="arp-response" reason_ttl="0"/>
<address addr="192.168.1.126" addrtype="ipv4"/>
<address addr="00:0C:29:30:A1:C9" addrtype="mac" vendor="VMware"/>
<hostnames>
</hostnames>
<ports><extraports state="filtered" count="984">
<extrareasons reason="no-responses" count="984"/>
</extraports>
<port protocol="tcp" portid="53"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="domain" product="Microsoft DNS" version="6.1.7601" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="88"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="kerberos-sec" product="Windows 2003 Kerberos" extrainfo="server time: 2015-10-04 00:56:07Z" ostype="Windows" method="probed" conf="10"><cpe>cpe:/a:microsoft:kerberos</cpe><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="135"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="139"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="netbios-ssn" method="probed" conf="10"/></port>
<port protocol="tcp" portid="389"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ldap" method="probed" conf="10"/></port>
<port protocol="tcp" portid="445"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="netbios-ssn" method="probed" conf="10"/></port>
<port protocol="tcp" portid="464"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="kpasswd5" method="table" conf="3"/></port>
<port protocol="tcp" portid="593"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ncacn_http" product="Microsoft Windows RPC over HTTP" version="1.0" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="636"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="tcpwrapped" method="probed" conf="8"/></port>
<port protocol="tcp" portid="3268"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ldap" method="probed" conf="10"/></port>
<port protocol="tcp" portid="3269"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="tcpwrapped" method="probed" conf="8"/></port>
<port protocol="tcp" portid="49154"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49155"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49157"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="ncacn_http" product="Microsoft Windows RPC over HTTP" version="1.0" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49158"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
<port protocol="tcp" portid="49161"><state state="open" reason="syn-ack" reason_ttl="128"/><service name="msrpc" product="Microsoft Windows RPC" ostype="Windows" method="probed" conf="10"><cpe>cpe:/o:microsoft:windows</cpe></service></port>
</ports>
<times srtt="5951" rttvar="5849" to="100000"/>
</host>
<runstats><finished time="1443920210" timestr="Sun Oct 4 01:56:50 2015" elapsed="53.38" summary="Nmap done at Sun Oct 4 01:56:50 2015; 2 IP addresses (2 hosts up) scanned in 53.38 seconds" exit="success"/><hosts up="2" down="0" total="2"/>
</runstats>
</nmaprun>
当前尝试
from xml.etree import ElementTree
import os
nmap_file = 'Test.xml'
dictionary = {}
dictionary['host'] = {}
dictionary['host']['port'] = {}
dictionary['host']['port']['service'] = {}
with open(nmap_file, 'rt') as f:
tree = ElementTree.parse(f)
for node in tree.findall('.//address'):
if (node.attrib.get('addrtype') == 'ipv4'):
host = node.attrib.get('addr')
dictionary['host'] = host
for node in tree.findall('.//port'):
port = node.attrib.get('portid')
dictionary['host']['port'] = port
for node in tree.findall('.//service'):
product = node.attrib.get('product')
dictionary['host']['port']['service'] = product
print dictionary
考虑使用 lxml 模块、xpath 和源自嵌套字典的列表的稍微不同的方法。请参阅有关创建 nested dictionaries.
的信息教程import lxml.etree as et
import os
nmap_file = 'Test.xml'
with open(nmap_file, 'rt') as f:
tree = et.parse(f)
dictionary = {}
hosts = tree.xpath('//host')
for i in range(1, len(hosts)+1):
hostlist = []
portlist = []
servicelist = []
addrnodes = tree.xpath("//host[{}]/address[@addrtype='ipv4']/@addr".format(i))
hostlist.append(addrnodes[0])
portnodes = tree.xpath("//host[{}]/ports/port[string-length(service/@product)>0]/@portid".format(i))
for pt in portnodes:
portlist.append(pt)
servicenodes = tree.xpath("//host[{}]/ports/port/service/@product".format(i))
for srv in servicenodes:
servicelist.append(srv)
for h in hostlist:
dictionary[h] = {}
for h in hostlist:
for p, s in zip(portlist, servicelist):
dictionary[h][p] = s
print(dictionary)
输出(请注意:只有列出服务产品的端口才会出现在字典中)
{'192.168.1.74': {'80': 'lighttpd',
'445': 'Samba smbd',
'139': 'Samba smbd',
'443': 'lighttpd',
'515': 'LPRng',
'3306': 'MySQL'}}
{'192.168.1.126': {'49161': 'Microsoft Windows RPC',
'135': 'Microsoft Windows RPC',
'53': 'Microsoft DNS',
'49157': 'Microsoft Windows RPC over HTTP',
'593': 'Microsoft Windows RPC over HTTP',
'49155': 'Microsoft Windows RPC',
'49158': 'Microsoft Windows RPC',
'88': 'Windows 2003 Kerberos',
'49154': 'Microsoft Windows RPC'}}