从特定 xml 节点提取值
Extract value from a particular xml node
我如何从下面的 xml 源
中提取与 "main.mp3" 挂载点相关的源 IP
我当前的代码排在列表的第一个,恰好是 listen.mp3 挂载点,但是我希望提取绑定到特定的挂载点
提取源IP的代码:
SERVER = 'http://localhost:8382/admin/stats.xml'
authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
authinfo.add_password(None, SERVER, 'xxxxxx', 'xxxxxxx')
page = 'http://localhost:8382/admin/stats.xml'
handler = urllib2.HTTPBasicAuthHandler(authinfo)
myopener = urllib2.build_opener(handler)
opened = urllib2.install_opener(myopener)
output = urllib2.urlopen(page)
soup = BeautifulSoup(output.read(), "lxml")
print "source ip: ",soup.select('source_ip')[0].text
xml 来源 link:
<icestats>
<admin>ossama@hotmail.com</admin>
<client_connections>580473</client_connections>
<clients>32</clients>
<connections>1217611</connections>
<file_connections>220</file_connections>
<host>localhost</host>
<listener_connections>374451</listener_connections>
<listeners>29</listeners>
<location>Australia</location>
<server_id>Icecast 2.4.2</server_id>
<server_start>Thu, 15 Feb 2018 21:17:23 +1100</server_start>
<server_start_iso8601>2018-02-15T21:17:23+1100</server_start_iso8601>
<source_client_connections>99</source_client_connections>
<source_relay_connections>0</source_relay_connections>
<source_total_connections>99</source_total_connections>
<sources>2</sources>
<stats>0</stats>
<stats_connections>0</stats_connections>
<source mount="/listen.mp3">
<audio_info>channels=2;samplerate=44100;bitrate=64</audio_info>
<channels>2</channels>
<genre>Islamic Talk</genre>
<listener_peak>52</listener_peak>
<listeners>29</listeners>
<listenurl>http://localhost:8382/listen.mp3</listenurl>
<max_listeners>unlimited</max_listeners>
<public>1</public>
<samplerate>44100</samplerate>
<server_description>Qkradio Station Australia</server_description>
<server_name>listen.mp3</server_name>
<server_type>audio/mpeg</server_type>
<slow_listeners>220</slow_listeners>
<source_ip>127.0.0.1</source_ip>
<stream_start>Mon, 19 Feb 2018 23:08:01 +1100</stream_start>
<stream_start_iso8601>2018-02-19T23:08:01+1100</stream_start_iso8601>
<title>ibtihal.mp3 - 1</title>
<total_bytes_read>634036021</total_bytes_read>
<total_bytes_sent>13637049457</total_bytes_sent>
<user_agent>Liquidsoap/1.3.3 (Unix; OCaml 4.02.3)</user_agent>
</source>
<source mount="/main.mp3">
<audio_info>bitrate=170</audio_info>
<bitrate>170</bitrate>
<genre>Islam</genre>
<listener_peak>2</listener_peak>
<listeners>1</listeners>
<listenurl>http://localhost:8382/main.mp3</listenurl>
<max_listeners>unlimited</max_listeners>
<public>1</public>
<server_description>Quran Kareem Radio</server_description>
<server_name>Quran Kareem Radio</server_name>
<server_type>audio/mpeg</server_type>
<server_url>http://qkradio.com.au</server_url>
<slow_listeners>1</slow_listeners>
<source_ip>60.241.175.9</source_ip>
<stream_start>Tue, 20 Feb 2018 00:56:23 +1100</stream_start>
<stream_start_iso8601>2018-02-20T00:56:23+1100</stream_start_iso8601>
<total_bytes_read>582030204</total_bytes_read>
<total_bytes_sent>588819584</total_bytes_sent>
<user_agent>instreamer</user_agent>
</source>
</icestats>
您可以 select 容器 'source' 标签并在每个容器上收集 'source_ip' 标签,因此每个 IP 都与一个挂载点相关。
soup = BeautifulSoup(xml, "lxml")
for mount in soup.select('source[mount]'):
print "mount point: ", mount['mount']
print "source ip: ", mount.select_one('source_ip').text
或者您可以通过挂载点创建字典和 select IP,例如,
data = {
mount['mount']: mount.select_one('source_ip').text
for mount in soup.select('source[mount]')
}
print data['/listen.mp3']
如果您只想 select 来自特定点的 IP,只需 select 该节点上的 'source_ip' 标签。
ip = soup.select_one('source[mount=/main.mp3] source_ip').text
print "source ip:", ip
使用 ElementTree,附带 Python。
import xml.etree.ElementTree as ET
tree = ET.fromstring(xml)
for s in tree.iter('source'):
print(s.attrib) # dictionary with all attributes
print(s.text) # there might also be text
print(s.get('mount')) # this is the info you want
print(s.attrib['mount']) # or like this
我已将您的 XML 数据粘贴到 excercise.xml 文件中,它应该可以与 python 3.x
一起使用
import os
import xml.etree.ElementTree as ET
filename = 'excercise.xml'
cwd = os.getcwd()
fullFile = os.path.abspath(os.path.join(cwd,filename))
tree = ET.parse(fullFile)
root = tree.getroot()
#Below code will get Child tag and Child attributes
for child in root:
if (child.tag) == "source" and (child.attrib['mount'] == "/main.mp3") :
for element in child:
if element.tag == "source_ip":
print(element.text)
我如何从下面的 xml 源
中提取与 "main.mp3" 挂载点相关的源 IP我当前的代码排在列表的第一个,恰好是 listen.mp3 挂载点,但是我希望提取绑定到特定的挂载点
提取源IP的代码:
SERVER = 'http://localhost:8382/admin/stats.xml'
authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
authinfo.add_password(None, SERVER, 'xxxxxx', 'xxxxxxx')
page = 'http://localhost:8382/admin/stats.xml'
handler = urllib2.HTTPBasicAuthHandler(authinfo)
myopener = urllib2.build_opener(handler)
opened = urllib2.install_opener(myopener)
output = urllib2.urlopen(page)
soup = BeautifulSoup(output.read(), "lxml")
print "source ip: ",soup.select('source_ip')[0].text
xml 来源 link:
<icestats> <admin>ossama@hotmail.com</admin> <client_connections>580473</client_connections> <clients>32</clients> <connections>1217611</connections> <file_connections>220</file_connections> <host>localhost</host> <listener_connections>374451</listener_connections> <listeners>29</listeners> <location>Australia</location> <server_id>Icecast 2.4.2</server_id> <server_start>Thu, 15 Feb 2018 21:17:23 +1100</server_start> <server_start_iso8601>2018-02-15T21:17:23+1100</server_start_iso8601> <source_client_connections>99</source_client_connections> <source_relay_connections>0</source_relay_connections> <source_total_connections>99</source_total_connections> <sources>2</sources> <stats>0</stats> <stats_connections>0</stats_connections> <source mount="/listen.mp3"> <audio_info>channels=2;samplerate=44100;bitrate=64</audio_info> <channels>2</channels> <genre>Islamic Talk</genre> <listener_peak>52</listener_peak> <listeners>29</listeners> <listenurl>http://localhost:8382/listen.mp3</listenurl> <max_listeners>unlimited</max_listeners> <public>1</public> <samplerate>44100</samplerate> <server_description>Qkradio Station Australia</server_description> <server_name>listen.mp3</server_name> <server_type>audio/mpeg</server_type> <slow_listeners>220</slow_listeners> <source_ip>127.0.0.1</source_ip> <stream_start>Mon, 19 Feb 2018 23:08:01 +1100</stream_start> <stream_start_iso8601>2018-02-19T23:08:01+1100</stream_start_iso8601> <title>ibtihal.mp3 - 1</title> <total_bytes_read>634036021</total_bytes_read> <total_bytes_sent>13637049457</total_bytes_sent> <user_agent>Liquidsoap/1.3.3 (Unix; OCaml 4.02.3)</user_agent> </source> <source mount="/main.mp3"> <audio_info>bitrate=170</audio_info> <bitrate>170</bitrate> <genre>Islam</genre> <listener_peak>2</listener_peak> <listeners>1</listeners> <listenurl>http://localhost:8382/main.mp3</listenurl> <max_listeners>unlimited</max_listeners> <public>1</public> <server_description>Quran Kareem Radio</server_description> <server_name>Quran Kareem Radio</server_name> <server_type>audio/mpeg</server_type> <server_url>http://qkradio.com.au</server_url> <slow_listeners>1</slow_listeners> <source_ip>60.241.175.9</source_ip> <stream_start>Tue, 20 Feb 2018 00:56:23 +1100</stream_start> <stream_start_iso8601>2018-02-20T00:56:23+1100</stream_start_iso8601> <total_bytes_read>582030204</total_bytes_read> <total_bytes_sent>588819584</total_bytes_sent> <user_agent>instreamer</user_agent> </source> </icestats>
您可以 select 容器 'source' 标签并在每个容器上收集 'source_ip' 标签,因此每个 IP 都与一个挂载点相关。
soup = BeautifulSoup(xml, "lxml")
for mount in soup.select('source[mount]'):
print "mount point: ", mount['mount']
print "source ip: ", mount.select_one('source_ip').text
或者您可以通过挂载点创建字典和 select IP,例如,
data = {
mount['mount']: mount.select_one('source_ip').text
for mount in soup.select('source[mount]')
}
print data['/listen.mp3']
如果您只想 select 来自特定点的 IP,只需 select 该节点上的 'source_ip' 标签。
ip = soup.select_one('source[mount=/main.mp3] source_ip').text
print "source ip:", ip
使用 ElementTree,附带 Python。
import xml.etree.ElementTree as ET
tree = ET.fromstring(xml)
for s in tree.iter('source'):
print(s.attrib) # dictionary with all attributes
print(s.text) # there might also be text
print(s.get('mount')) # this is the info you want
print(s.attrib['mount']) # or like this
我已将您的 XML 数据粘贴到 excercise.xml 文件中,它应该可以与 python 3.x
一起使用import os
import xml.etree.ElementTree as ET
filename = 'excercise.xml'
cwd = os.getcwd()
fullFile = os.path.abspath(os.path.join(cwd,filename))
tree = ET.parse(fullFile)
root = tree.getroot()
#Below code will get Child tag and Child attributes
for child in root:
if (child.tag) == "source" and (child.attrib['mount'] == "/main.mp3") :
for element in child:
if element.tag == "source_ip":
print(element.text)