如何使用 ElementTree 访问 iTunes xml 中的值元素?
How to access value element in iTunes xml using ElementTree?
我正在尝试 'export' 从 xml 播放列表到 html table 进行共享。但是 iTunes 库文件使用键值对而不是更有意义的 XML 标签。有没有一种简单的方法也可以得到这些 key/value 对中的 <value>
?
这让我知道 <key>
的值,即曲目 ID 名称艺术家专辑艺术家等,但我似乎无法找到一种方法来获取下一个键的值,即 <integer>
49924,或 <string>
Ep。 35 |你做什么...我可以(应该)用 ElementTree 做这个还是应该转向正则表达式或其他一些库?谢谢!
data = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Major Version</key><integer>1</integer>
<key>Minor Version</key><integer>1</integer>
<key>Date</key><date>2019-01-21T07:31:15Z</date>
<key>Application Version</key><string>12.8.0.150</string>
<key>Features</key><integer>5</integer>
<key>Show Content Ratings</key><true/>
<key>Music Folder</key><string>file:///Users/Music/iTunes/iTunes%20Media/</string>
<key>Library Persistent ID</key><string>75E62CF156F5AE1B</string>
<key>Tracks</key>
<dict>
<key>49924</key>
<dict>
<key>Track ID</key><integer>49924</integer>
<key>Name</key><string>Ep. 35 | What Do Your Morals Taste Like? | Guest: Jonathan Haidt</string>
<key>Artist</key><string>Blaze Podcast Network</string>
<key>Album Artist</key><string>Blaze Podcast Network</string>
<key>Album</key><string>Something's Off with Andrew Heaton</string>
<key>Genre</key><string>News & Politics</string>
<key>Kind</key><string>MPEG audio file</string>
<key>Size</key><integer>48123940</integer>
<key>Total Time</key><integer>3004133</integer>
<key>Year</key><integer>2019</integer>
<key>Date Modified</key><date>2019-01-13T01:10:30Z</date>
<key>Date Added</key><date>2019-01-13T01:10:30Z</date>
<key>Bit Rate</key><integer>128</integer>
<key>Sample Rate</key><integer>44100</integer>
<key>Release Date</key><date>2019-01-11T12:00:00Z</date>
<key>Artwork Count</key><integer>1</integer>
<key>Persistent ID</key><string>5FAE7186A09E5D3E</string>
<key>Disabled</key><true/>
<key>Track Type</key><string>File</string>
<key>Purchased</key><true/>
<key>Podcast</key><true/>
<key>Unplayed</key><true/>
<key>Location</key><string>file:///Users/Music/iTunes/iTunes%20Media/Podcasts/Something's%20Off%20with%20Andrew%20Heaton/Ep.%2035%20_%20What%20Do%20Your%20Morals%20Taste%20Like_%20_%20Guest_%20Jonathan%20Haidt.mp3</string>
<key>File Folder Count</key><integer>4</integer>
<key>Library Folder Count</key><integer>1</integer>
</dict>
</dict>
</dict>
</plist>'''
from xml.etree import ElementTree as ET
xml = ET.fromstring(data)
lst = xml.findall('dict/dict/dict/key')
for item in lst:
print(item.text)
Question: How to access value element in iTunes xml
以下解决方案使用 lxml.etree.iterparse
附加一个 <key>
标签和以下 <value>
标签来构建 Python dict {key:value}
.
使用了模块和内置函数:
from lxml import etree
import io
class Playlist:
def __init__(self, fh):
"""
Initialize 'iterparse' to generate 'start' and 'end' events on all tags
:param fh: File Handle from the XML File to parse
"""
self.context = etree.iterparse(fh, events=("start", "end",))
def _parse(self):
"""
Yield only at 'end' event, except 'start' from tag 'dict'
:return: yield current Element
"""
for event, elem in self.context:
if elem.tag == 'plist' or \
(event == 'start' and not elem.tag == 'dict'):
continue
yield elem
def _parse_key_value(self, key=None):
_dict = {}
for elem in self._parse():
if elem.tag == 'key':
key = elem.text
continue
if elem.tag in ['integer', 'string', 'date']:
if not key is None:
_dict[key] = elem.text
key = None
else:
print('Missing key for value {}'.format(elem.text))
elif elem.tag in ['true', 'false']:
_dict[key] = elem.tag == 'true'
elif elem.tag == 'dict':
if not key is None:
_dict[key] = self._parse_dict(key)
key = None
else:
return elem, _dict
else:
print('Unknow tag {}'.format(elem.tag))
def _parse_dict(self, key=None):
elem = next(self._parse())
elem, _dict = self._parse_key_value(elem.text)
return _dict
def __iter__(self):
for elem in self._parse():
if elem.tag == 'dict':
yield self._parse_dict()
else:
print('Unknow tag {}'.format(elem.tag))
if __name__ == "__main__":
data = b'''<?xml...'''
with io.BytesIO(data) as in_xml:
for record in Playlist(in_xml):
print("record:{}".format(record))
for key, value in record.items():
print("{}:{}".format(key, value))
Output:
record:{'Major Version': '1', 'Minor Version': '1'... (omitted for brevity)
Major Version:1
Minor Version:1
Date:2019-01-24T10:31:15Z
Tracks:{'99244': {'Track ID': '99244', 'Artist': 'Blaze Podcast Network', ... (omitted for brevity)}}
测试 Python:3.5
我正在尝试 'export' 从 xml 播放列表到 html table 进行共享。但是 iTunes 库文件使用键值对而不是更有意义的 XML 标签。有没有一种简单的方法也可以得到这些 key/value 对中的 <value>
?
这让我知道 <key>
的值,即曲目 ID 名称艺术家专辑艺术家等,但我似乎无法找到一种方法来获取下一个键的值,即 <integer>
49924,或 <string>
Ep。 35 |你做什么...我可以(应该)用 ElementTree 做这个还是应该转向正则表达式或其他一些库?谢谢!
data = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Major Version</key><integer>1</integer>
<key>Minor Version</key><integer>1</integer>
<key>Date</key><date>2019-01-21T07:31:15Z</date>
<key>Application Version</key><string>12.8.0.150</string>
<key>Features</key><integer>5</integer>
<key>Show Content Ratings</key><true/>
<key>Music Folder</key><string>file:///Users/Music/iTunes/iTunes%20Media/</string>
<key>Library Persistent ID</key><string>75E62CF156F5AE1B</string>
<key>Tracks</key>
<dict>
<key>49924</key>
<dict>
<key>Track ID</key><integer>49924</integer>
<key>Name</key><string>Ep. 35 | What Do Your Morals Taste Like? | Guest: Jonathan Haidt</string>
<key>Artist</key><string>Blaze Podcast Network</string>
<key>Album Artist</key><string>Blaze Podcast Network</string>
<key>Album</key><string>Something's Off with Andrew Heaton</string>
<key>Genre</key><string>News & Politics</string>
<key>Kind</key><string>MPEG audio file</string>
<key>Size</key><integer>48123940</integer>
<key>Total Time</key><integer>3004133</integer>
<key>Year</key><integer>2019</integer>
<key>Date Modified</key><date>2019-01-13T01:10:30Z</date>
<key>Date Added</key><date>2019-01-13T01:10:30Z</date>
<key>Bit Rate</key><integer>128</integer>
<key>Sample Rate</key><integer>44100</integer>
<key>Release Date</key><date>2019-01-11T12:00:00Z</date>
<key>Artwork Count</key><integer>1</integer>
<key>Persistent ID</key><string>5FAE7186A09E5D3E</string>
<key>Disabled</key><true/>
<key>Track Type</key><string>File</string>
<key>Purchased</key><true/>
<key>Podcast</key><true/>
<key>Unplayed</key><true/>
<key>Location</key><string>file:///Users/Music/iTunes/iTunes%20Media/Podcasts/Something's%20Off%20with%20Andrew%20Heaton/Ep.%2035%20_%20What%20Do%20Your%20Morals%20Taste%20Like_%20_%20Guest_%20Jonathan%20Haidt.mp3</string>
<key>File Folder Count</key><integer>4</integer>
<key>Library Folder Count</key><integer>1</integer>
</dict>
</dict>
</dict>
</plist>'''
from xml.etree import ElementTree as ET
xml = ET.fromstring(data)
lst = xml.findall('dict/dict/dict/key')
for item in lst:
print(item.text)
Question: How to access value element in iTunes xml
以下解决方案使用 lxml.etree.iterparse
附加一个 <key>
标签和以下 <value>
标签来构建 Python dict {key:value}
.
使用了模块和内置函数:
from lxml import etree
import io
class Playlist:
def __init__(self, fh):
"""
Initialize 'iterparse' to generate 'start' and 'end' events on all tags
:param fh: File Handle from the XML File to parse
"""
self.context = etree.iterparse(fh, events=("start", "end",))
def _parse(self):
"""
Yield only at 'end' event, except 'start' from tag 'dict'
:return: yield current Element
"""
for event, elem in self.context:
if elem.tag == 'plist' or \
(event == 'start' and not elem.tag == 'dict'):
continue
yield elem
def _parse_key_value(self, key=None):
_dict = {}
for elem in self._parse():
if elem.tag == 'key':
key = elem.text
continue
if elem.tag in ['integer', 'string', 'date']:
if not key is None:
_dict[key] = elem.text
key = None
else:
print('Missing key for value {}'.format(elem.text))
elif elem.tag in ['true', 'false']:
_dict[key] = elem.tag == 'true'
elif elem.tag == 'dict':
if not key is None:
_dict[key] = self._parse_dict(key)
key = None
else:
return elem, _dict
else:
print('Unknow tag {}'.format(elem.tag))
def _parse_dict(self, key=None):
elem = next(self._parse())
elem, _dict = self._parse_key_value(elem.text)
return _dict
def __iter__(self):
for elem in self._parse():
if elem.tag == 'dict':
yield self._parse_dict()
else:
print('Unknow tag {}'.format(elem.tag))
if __name__ == "__main__":
data = b'''<?xml...'''
with io.BytesIO(data) as in_xml:
for record in Playlist(in_xml):
print("record:{}".format(record))
for key, value in record.items():
print("{}:{}".format(key, value))
Output:
record:{'Major Version': '1', 'Minor Version': '1'... (omitted for brevity) Major Version:1 Minor Version:1 Date:2019-01-24T10:31:15Z Tracks:{'99244': {'Track ID': '99244', 'Artist': 'Blaze Podcast Network', ... (omitted for brevity)}}
测试 Python:3.5