python ElemetTree 查找包含字符串的元素
python ElemetTree find elements with string
我有很大的 xml 文件(cca 2GB),下面只是这个文件的一部分。在此文件中,我想搜索以以下内容开头的所有元素:
<ManagedElement sourceType = "SIU">
和 <ManagedElement sourceType = "CELLO">
然后从中获取一些数据。下面是这些 ManagedElement 下的示例。
这是我的代码:
#!/usr/bin/python
from xml.etree import ElementTree as ElementTree
with open('/var/opt/arne/OSSRC_Model_2014_11_24_3_19_36_PM.xml', 'rt') as f:
tree = ElementTree.parse(f)
with open('/home/gf', 'a') as f1:
for item in tree.findall('.//ManagedElement'):
rank = item.find('.//ipAddress')
rank1=rank.get('string')
name = item.find('.//ManagedElementId')
name1=name.get('string')
site = item.find('.//associatedSite')
site1=site.get('string')
f1.write((name1 + ',' + rank1 + ',' + site1 + '\n'))
问题是还有其他 ManagedElements 而我只想要带有字符串 SIU 和 CELLO 的那些。
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE Model SYSTEM "/opt/arne/etc/arne12_2.dtd">
<Model version = "1" importVersion = "12.2">
<!--Validate: /opt/arne/bin/import.sh -f 4_siu_create.xml \ -val:rall -->
<Create>
<SubNetwork userLabel = "ZLNOUR_SIU" networkType = "IPRAN">
<ManagedElement sourceType = "SIU">
<ManagedElementId string = "siu009722"/>
<primaryType type = "STN"/>
<managedElementType types = ""/>
<associatedSite string = "Site=site00972"/>
<nodeVersion string = "T11A"/>
<platformVersion string = ""/>
<swVersion string = ""/>
<vendorName string = ""/>
<userDefinedState string = ""/>
<managedServiceAvailability int = "1"/>
<isManaged boolean = "true"/>
<connectionStatus string = "OFF"/>
<Connectivity>
<DEFAULT>
<emUrl url = "http://10.131.203.117:80/"/>
<ipAddress string = "10.131.203.117"/>
<oldIpAddress string = "int dummy=0"/>
<hostname string = ""/>
<nodeSecurityState state = "ON"/>
<boardId string = ""/>
<Protocol number = "0">
<protocolType string = "SNMP"/>
<port int = "161"/>
<protocolVersion string = "v2c"/>
<securityName string = ""/>
<authenticationMethod string = ""/>
<encryptionMethod string = ""/>
<communityString string = "public"/>
<context string = ""/>
<namingUrl string = ""/>
<namingPort int = ""/>
<notificationIRPAgentVersion string = ""/>
<alarmIRPAgentVersion string = ""/>
<notificationIRPNamingContext context = ""/>
<alarmIRPNamingContext context = ""/>
</Protocol>
<Protocol number = "1">
<protocolType string = "SSH"/>
<port int = "22"/>
<protocolVersion string = ""/>
<securityName string = ""/>
<authenticationMethod string = ""/>
<encryptionMethod string = ""/>
<communityString string = ""/>
<context string = ""/>
<namingUrl string = ""/>
<namingPort int = ""/>
<notificationIRPAgentVersion string = ""/>
<alarmIRPAgentVersion string = ""/>
<notificationIRPNamingContext context = ""/>
<alarmIRPNamingContext context = ""/>
</Protocol>
<Browser>
<browser string = ""/>
<browserURL string = ""/>
<bookname string = ""/>
</Browser>
</DEFAULT>
</Connectivity>
<Tss>
<Entry>
<System string = "siu009722"/>
<Type string = "NORMAL"/>
<User string = "admin"/>
<Password string = "siu009722"/>
</Entry>
<Entry>
<System string = "siu009722"/>
<Type string = "SECURE"/>
<User string = "admin"/>
<Password string = "siu009722"/>
</Entry>
</Tss>
<Relationship>
<AssociableNode TO_FDN = "FtpServer=SMRSSLAVE-rtwaned1o,FtpService=swstore-rtwaned1o" AssociationType = "ManagedElement_to_ftpSwStore"/>
<AssociableNode TO_FDN = "FtpServer=SMRSSLAVE-rtwaned1o,FtpService=cmdown-rtwaned1o" AssociationType = "ManagedElement_to_neTransientCmDown"/>
<AssociableNode TO_FDN = "FtpServer=SMRSSLAVE-rtwaned1o,FtpService=cmup-rtwaned1o" AssociationType = "ManagedElement_to_neTransientCmUp"/>
<AssociableNode TO_FDN = "FtpServer=SMRSSLAVE-rtwaned1o,FtpService=pmup-rtwaned1o" AssociationType = "ManagedElement_to_neTransientPm"/>
<AssociableNode TO_FDN = "ManagementNode=ONRM" AssociationType = "MgmtAssociation"/>
<AssociableNode TO_FDN = "SubNetwork=ZLNOUR3,MeContext=rbs009721,ManagedElement=1,NodeBFunction=1" FROM_FDN = "SubNetwork=ZLNOUR_SIU,ManagedElement=siu009722,StnFunction=STN_ManagedFunction" AssociationType = "StnFunction_to_NodeBFunction"/>
</Relationship>
</ManagedElement>
<ManagedElement sourceType="CELLO">
<ManagedElementId string="3GMUP"/>
<primaryType type="RBS"/>
<managedElementType types=""/>
<associatedSite string="Site=Podgorica"/>
<nodeVersion string=""/>
<platformVersion string=""/>
<swVersion string=""/>
<vendorName string="Ericsson"/>
<userDefinedState string=""/>
<managedServiceAvailability int="1"/>
<isManaged boolean="true"/>
<neMIMVersion string="T.1.201"/>
<connectionStatus string="ON"/>
<ManagedFunction>
<functionType string="NodeB"/>
<supportSystemControl boolean="false"/>
</ManagedFunction>
<Connectivity>
<DEFAULT>
<emUrl url="http://10.132.11.226:80/em/index.html"/>
<ipAddress string="10.132.11.226"/>
<oldIpAddress string=""/>
<hostname string=""/>
<nodeSecurityState state="ON"/>
<boardId string=""/>
<Protocol number="0">
<protocolType string="TELNET"/>
<port int="23"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string=""/>
<namingPort int=""/>
<notificationIRPAgentVersion string=""/>
<alarmIRPAgentVersion string=""/>
<notificationIRPNamingContext context=""/>
<alarmIRPNamingContext context=""/>
</Protocol>
<Protocol number="1">
<protocolType string="CORBA"/>
<port int="0"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string="http://10.132.11.226:80/cello/ior_files/nameroot.ior"/>
<namingPort int="0"/>
<notificationIRPAgentVersion string="3.2"/>
<alarmIRPAgentVersion string="3.2"/>
<notificationIRPNamingContext context="NOTIFICATION_IRP_VERSION_1_1"/>
<alarmIRPNamingContext context="ALARM_IRP_VERSION_1_1"/>
</Protocol>
<Protocol number="2">
<protocolType string="SFTP"/>
<port int="22"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string=""/>
<namingPort int=""/>
<notificationIRPAgentVersion string=""/>
<alarmIRPAgentVersion string=""/>
<notificationIRPNamingContext context=""/>
<alarmIRPNamingContext context=""/>
</Protocol>
<Protocol number="3">
<protocolType string="FTP"/>
<port int="21"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string=""/>
<namingPort int=""/>
<notificationIRPAgentVersion string=""/>
<alarmIRPAgentVersion string=""/>
<notificationIRPNamingContext context=""/>
<alarmIRPNamingContext context=""/>
</Protocol>
<Protocol number="4">
<protocolType string="SSH"/>
<port int="22"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string=""/>
<namingPort int=""/>
<notificationIRPAgentVersion string=""/>
<alarmIRPAgentVersion string=""/>
<notificationIRPNamingContext context=""/>
<alarmIRPNamingContext context=""/>
</Protocol>
<Browser>
<browser string=""/>
<browserURL string=""/>
<bookname string=""/>
</Browser>
</DEFAULT>
</Connectivity>
<Tss>
<Entry>
<System string="3GMUP"/>
<Type string="SECURE"/>
<User string="rbs"/>
<Password string="rbs"/>
</Entry>
<Entry>
<System string="3GMUP"/>
<Type string="NORMAL"/>
<User string="rbs"/>
<Password string="rbs"/>
</Entry>
</Tss>
<Relationship>
<AssociableNode TO_FDN="FtpServer=SMRSMASTERWRAN-oss1,FtpService=smo-backup-oss1" AssociationType="ManagedElement_to_ftpBackupStore"/>
<AssociableNode TO_FDN="FtpServer=SMRSMASTERWRAN-oss1,FtpService=smo-keystore-oss1" AssociationType="ManagedElement_to_ftpLicenseKeyStore"/>
<AssociableNode TO_FDN="FtpServer=SMRSMASTERWRAN-oss1,FtpService=smo-swstore-oss1" AssociationType="ManagedElement_to_ftpSwStore"/>
<AssociableNode TO_FDN="Group=CD34082729" AssociationType="Group_to_MeContext"/>
<AssociableNode TO_FDN="ManagementNode=ONRM" AssociationType="MgmtAssociation"/>
<AssociableNode TO_FDN="SubNetwork=IPRAN,ManagedElement=MUP_BD,StnFunction=STN_ManagedFunction" FROM_FDN="SubNetwork=RNC111,MeContext=3GMUP,ManagedElement=1,NodeBFunction=1" AssociationType="StnFunction_to_NodeBFunction"/>
<AssociableNode TO_FDN="SubNetwork=RNC111,Group=NodeB" AssociationType="Group_to_MeContext"/>
</Relationship>
</ManagedElement>
</SubNetwork>
</Create>
</Model>
仅获取具有 sourceType
属性等于 "CELLO"
或等于 "SIU"
的 ManagedElement
的正确 XPath 如下:
.//ManagedElement[@sourceType="CELLO" or @sourceType="SIU"]
不幸的是,XPath OR
运算符似乎 not supported by xml.etree.ElementTree
, so simply passing above XPath to findall()
method won't work. Now, your possible options are to use lxml 库具有更好的 XPath 支持(特别是如果您计划在代码中更广泛地使用复杂的 XPath),或者您可以创建 lambda 表达式以手动按 sourceType
属性值过滤 ManagedElement
。例如,不仅仅是 :
tree.findall('.//ManagedElement')
...您可以尝试这样的操作:
filter(lambda x: x.get('sourceType') in ['CELLO','SIU'], tree.findall('.//ManagedElement'))
我有很大的 xml 文件(cca 2GB),下面只是这个文件的一部分。在此文件中,我想搜索以以下内容开头的所有元素:
<ManagedElement sourceType = "SIU">
和 <ManagedElement sourceType = "CELLO">
然后从中获取一些数据。下面是这些 ManagedElement 下的示例。
这是我的代码:
#!/usr/bin/python
from xml.etree import ElementTree as ElementTree
with open('/var/opt/arne/OSSRC_Model_2014_11_24_3_19_36_PM.xml', 'rt') as f:
tree = ElementTree.parse(f)
with open('/home/gf', 'a') as f1:
for item in tree.findall('.//ManagedElement'):
rank = item.find('.//ipAddress')
rank1=rank.get('string')
name = item.find('.//ManagedElementId')
name1=name.get('string')
site = item.find('.//associatedSite')
site1=site.get('string')
f1.write((name1 + ',' + rank1 + ',' + site1 + '\n'))
问题是还有其他 ManagedElements 而我只想要带有字符串 SIU 和 CELLO 的那些。
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE Model SYSTEM "/opt/arne/etc/arne12_2.dtd">
<Model version = "1" importVersion = "12.2">
<!--Validate: /opt/arne/bin/import.sh -f 4_siu_create.xml \ -val:rall -->
<Create>
<SubNetwork userLabel = "ZLNOUR_SIU" networkType = "IPRAN">
<ManagedElement sourceType = "SIU">
<ManagedElementId string = "siu009722"/>
<primaryType type = "STN"/>
<managedElementType types = ""/>
<associatedSite string = "Site=site00972"/>
<nodeVersion string = "T11A"/>
<platformVersion string = ""/>
<swVersion string = ""/>
<vendorName string = ""/>
<userDefinedState string = ""/>
<managedServiceAvailability int = "1"/>
<isManaged boolean = "true"/>
<connectionStatus string = "OFF"/>
<Connectivity>
<DEFAULT>
<emUrl url = "http://10.131.203.117:80/"/>
<ipAddress string = "10.131.203.117"/>
<oldIpAddress string = "int dummy=0"/>
<hostname string = ""/>
<nodeSecurityState state = "ON"/>
<boardId string = ""/>
<Protocol number = "0">
<protocolType string = "SNMP"/>
<port int = "161"/>
<protocolVersion string = "v2c"/>
<securityName string = ""/>
<authenticationMethod string = ""/>
<encryptionMethod string = ""/>
<communityString string = "public"/>
<context string = ""/>
<namingUrl string = ""/>
<namingPort int = ""/>
<notificationIRPAgentVersion string = ""/>
<alarmIRPAgentVersion string = ""/>
<notificationIRPNamingContext context = ""/>
<alarmIRPNamingContext context = ""/>
</Protocol>
<Protocol number = "1">
<protocolType string = "SSH"/>
<port int = "22"/>
<protocolVersion string = ""/>
<securityName string = ""/>
<authenticationMethod string = ""/>
<encryptionMethod string = ""/>
<communityString string = ""/>
<context string = ""/>
<namingUrl string = ""/>
<namingPort int = ""/>
<notificationIRPAgentVersion string = ""/>
<alarmIRPAgentVersion string = ""/>
<notificationIRPNamingContext context = ""/>
<alarmIRPNamingContext context = ""/>
</Protocol>
<Browser>
<browser string = ""/>
<browserURL string = ""/>
<bookname string = ""/>
</Browser>
</DEFAULT>
</Connectivity>
<Tss>
<Entry>
<System string = "siu009722"/>
<Type string = "NORMAL"/>
<User string = "admin"/>
<Password string = "siu009722"/>
</Entry>
<Entry>
<System string = "siu009722"/>
<Type string = "SECURE"/>
<User string = "admin"/>
<Password string = "siu009722"/>
</Entry>
</Tss>
<Relationship>
<AssociableNode TO_FDN = "FtpServer=SMRSSLAVE-rtwaned1o,FtpService=swstore-rtwaned1o" AssociationType = "ManagedElement_to_ftpSwStore"/>
<AssociableNode TO_FDN = "FtpServer=SMRSSLAVE-rtwaned1o,FtpService=cmdown-rtwaned1o" AssociationType = "ManagedElement_to_neTransientCmDown"/>
<AssociableNode TO_FDN = "FtpServer=SMRSSLAVE-rtwaned1o,FtpService=cmup-rtwaned1o" AssociationType = "ManagedElement_to_neTransientCmUp"/>
<AssociableNode TO_FDN = "FtpServer=SMRSSLAVE-rtwaned1o,FtpService=pmup-rtwaned1o" AssociationType = "ManagedElement_to_neTransientPm"/>
<AssociableNode TO_FDN = "ManagementNode=ONRM" AssociationType = "MgmtAssociation"/>
<AssociableNode TO_FDN = "SubNetwork=ZLNOUR3,MeContext=rbs009721,ManagedElement=1,NodeBFunction=1" FROM_FDN = "SubNetwork=ZLNOUR_SIU,ManagedElement=siu009722,StnFunction=STN_ManagedFunction" AssociationType = "StnFunction_to_NodeBFunction"/>
</Relationship>
</ManagedElement>
<ManagedElement sourceType="CELLO">
<ManagedElementId string="3GMUP"/>
<primaryType type="RBS"/>
<managedElementType types=""/>
<associatedSite string="Site=Podgorica"/>
<nodeVersion string=""/>
<platformVersion string=""/>
<swVersion string=""/>
<vendorName string="Ericsson"/>
<userDefinedState string=""/>
<managedServiceAvailability int="1"/>
<isManaged boolean="true"/>
<neMIMVersion string="T.1.201"/>
<connectionStatus string="ON"/>
<ManagedFunction>
<functionType string="NodeB"/>
<supportSystemControl boolean="false"/>
</ManagedFunction>
<Connectivity>
<DEFAULT>
<emUrl url="http://10.132.11.226:80/em/index.html"/>
<ipAddress string="10.132.11.226"/>
<oldIpAddress string=""/>
<hostname string=""/>
<nodeSecurityState state="ON"/>
<boardId string=""/>
<Protocol number="0">
<protocolType string="TELNET"/>
<port int="23"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string=""/>
<namingPort int=""/>
<notificationIRPAgentVersion string=""/>
<alarmIRPAgentVersion string=""/>
<notificationIRPNamingContext context=""/>
<alarmIRPNamingContext context=""/>
</Protocol>
<Protocol number="1">
<protocolType string="CORBA"/>
<port int="0"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string="http://10.132.11.226:80/cello/ior_files/nameroot.ior"/>
<namingPort int="0"/>
<notificationIRPAgentVersion string="3.2"/>
<alarmIRPAgentVersion string="3.2"/>
<notificationIRPNamingContext context="NOTIFICATION_IRP_VERSION_1_1"/>
<alarmIRPNamingContext context="ALARM_IRP_VERSION_1_1"/>
</Protocol>
<Protocol number="2">
<protocolType string="SFTP"/>
<port int="22"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string=""/>
<namingPort int=""/>
<notificationIRPAgentVersion string=""/>
<alarmIRPAgentVersion string=""/>
<notificationIRPNamingContext context=""/>
<alarmIRPNamingContext context=""/>
</Protocol>
<Protocol number="3">
<protocolType string="FTP"/>
<port int="21"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string=""/>
<namingPort int=""/>
<notificationIRPAgentVersion string=""/>
<alarmIRPAgentVersion string=""/>
<notificationIRPNamingContext context=""/>
<alarmIRPNamingContext context=""/>
</Protocol>
<Protocol number="4">
<protocolType string="SSH"/>
<port int="22"/>
<protocolVersion string=""/>
<securityName string=""/>
<authenticationMethod string=""/>
<encryptionMethod string=""/>
<communityString string=""/>
<context string=""/>
<namingUrl string=""/>
<namingPort int=""/>
<notificationIRPAgentVersion string=""/>
<alarmIRPAgentVersion string=""/>
<notificationIRPNamingContext context=""/>
<alarmIRPNamingContext context=""/>
</Protocol>
<Browser>
<browser string=""/>
<browserURL string=""/>
<bookname string=""/>
</Browser>
</DEFAULT>
</Connectivity>
<Tss>
<Entry>
<System string="3GMUP"/>
<Type string="SECURE"/>
<User string="rbs"/>
<Password string="rbs"/>
</Entry>
<Entry>
<System string="3GMUP"/>
<Type string="NORMAL"/>
<User string="rbs"/>
<Password string="rbs"/>
</Entry>
</Tss>
<Relationship>
<AssociableNode TO_FDN="FtpServer=SMRSMASTERWRAN-oss1,FtpService=smo-backup-oss1" AssociationType="ManagedElement_to_ftpBackupStore"/>
<AssociableNode TO_FDN="FtpServer=SMRSMASTERWRAN-oss1,FtpService=smo-keystore-oss1" AssociationType="ManagedElement_to_ftpLicenseKeyStore"/>
<AssociableNode TO_FDN="FtpServer=SMRSMASTERWRAN-oss1,FtpService=smo-swstore-oss1" AssociationType="ManagedElement_to_ftpSwStore"/>
<AssociableNode TO_FDN="Group=CD34082729" AssociationType="Group_to_MeContext"/>
<AssociableNode TO_FDN="ManagementNode=ONRM" AssociationType="MgmtAssociation"/>
<AssociableNode TO_FDN="SubNetwork=IPRAN,ManagedElement=MUP_BD,StnFunction=STN_ManagedFunction" FROM_FDN="SubNetwork=RNC111,MeContext=3GMUP,ManagedElement=1,NodeBFunction=1" AssociationType="StnFunction_to_NodeBFunction"/>
<AssociableNode TO_FDN="SubNetwork=RNC111,Group=NodeB" AssociationType="Group_to_MeContext"/>
</Relationship>
</ManagedElement>
</SubNetwork>
</Create>
</Model>
仅获取具有 sourceType
属性等于 "CELLO"
或等于 "SIU"
的 ManagedElement
的正确 XPath 如下:
.//ManagedElement[@sourceType="CELLO" or @sourceType="SIU"]
不幸的是,XPath OR
运算符似乎 not supported by xml.etree.ElementTree
, so simply passing above XPath to findall()
method won't work. Now, your possible options are to use lxml 库具有更好的 XPath 支持(特别是如果您计划在代码中更广泛地使用复杂的 XPath),或者您可以创建 lambda 表达式以手动按 sourceType
属性值过滤 ManagedElement
。例如,不仅仅是 :
tree.findall('.//ManagedElement')
...您可以尝试这样的操作:
filter(lambda x: x.get('sourceType') in ['CELLO','SIU'], tree.findall('.//ManagedElement'))