Python 写入文件时将 unicode 转换为 ASCII
Python unicode to ASCII conversion in write to file
我正在编写一个脚本来遍历音乐库并打印一个 .txt 文件,其中包含专辑名称日期……然后是曲目编号。在导入的标签(如 unicode)变为 (-) 之前,它可以完美运行。然后我得到一个:
File "C:/Users/Brian/Python files/CDinfoRF2.py", line 51, in music_album_info
mfile.write(header)
UnicodeEncodeError: 'ascii' codec can't encode character u'\u2013' in position 18: ordinal not in range(128).
代码:
#!usr/bin/env python
__author__ = 'Brian Kane'
"""This scripts takes a path argument to the root directory of the music files (mp3 here) and
writes various information about the disc to a text file which is named by the artist"""
import io
import os
from os.path import *
import string
from mutagen.mp3 import MP3
from mutagen.easyid3 import EasyID3
import unicodedata
def music_album_info(path):
count = 0
for root, dirs, files in os.walk(path): # walks through the data tree to get files
for name in files:
path_name = root +'\' + name
extension = os.path.splitext(name)[1][1:].strip().lower() # gets the file extension
if extension == 'mp3':
artist = EasyID3(path_name)['artist'][0] # gets unicode artist name
# print artist
track_num = EasyID3(path_name)['tracknumber'] # gets unicode tracknumer
album = EasyID3(path_name)['album'][0]
# print album
# print type(album)
album.encode('utf-8')
# length = EasyID3(path_name)['length'][0]
print album
date = EasyID3(path_name)['date'][0]
# print date
track_name = EasyID3(path_name)['title']
# print track_name
header = '\n' + 'Title: ' + album + ' Released ' + date + '\n\n'
# if EasyID3(path_name)['discnumber'] != []:
# disc = EasyID3(path_name)['discnumber']
# header = '\n' + 'Title: ' + album + 'Disc: ' + disc + ' Released ' + date + '\n\n'
file_name = artist + '.txt' # used to name file to be written to = artist
mp3info = EasyID3(path_name)
# print mp3info.items()
# print 'header in',header
if count < 1: # used to not write header over each track
# print 'header = ', header
mfile = open(file_name, 'a')
mfile.write(header)
count += 1
if extension == 'mp3': # avoid album art and errors which occur
track_info = '\t' + track_num[0] + '. ' + track_name[0] + '\n'
# if EasyID3(path_name)'length' in mp3info.items():
# length = EasyID3(path_name)['length'][0]
# print length
# track_info = '\t' + track_num[0] + '. ' + track_name[0] + ' ' + length[0] + '\n'
# print 'track_info = ',track_info
mfile.write(track_info)
count = 0 # reset for next artist - new .txt file
mfile.close()
path = 'C:\test' # this is a test path
# path = raw_input('Enter the path to the music file:\n>')
music_album_info(path)
您丢弃相册的UTF-8编码:
album.encode('utf-8')
字符串是不可变的;您基本上丢弃了 bytestring return 值。存储它:
album = album.encode('utf-8')
您可以改为使用 io.open()
function 打开一个文件对象,该文件对象自动将写入其中的所有 Unicode 编码为 UTF-8:
with io.open(file_name, 'a', encoding='utf-8') as mfile:
mfile.write(header)
在这种情况下,您根本不需要对 album
进行编码。
我正在编写一个脚本来遍历音乐库并打印一个 .txt 文件,其中包含专辑名称日期……然后是曲目编号。在导入的标签(如 unicode)变为 (-) 之前,它可以完美运行。然后我得到一个:
File "C:/Users/Brian/Python files/CDinfoRF2.py", line 51, in music_album_info
mfile.write(header)
UnicodeEncodeError: 'ascii' codec can't encode character u'\u2013' in position 18: ordinal not in range(128).
代码:
#!usr/bin/env python
__author__ = 'Brian Kane'
"""This scripts takes a path argument to the root directory of the music files (mp3 here) and
writes various information about the disc to a text file which is named by the artist"""
import io
import os
from os.path import *
import string
from mutagen.mp3 import MP3
from mutagen.easyid3 import EasyID3
import unicodedata
def music_album_info(path):
count = 0
for root, dirs, files in os.walk(path): # walks through the data tree to get files
for name in files:
path_name = root +'\' + name
extension = os.path.splitext(name)[1][1:].strip().lower() # gets the file extension
if extension == 'mp3':
artist = EasyID3(path_name)['artist'][0] # gets unicode artist name
# print artist
track_num = EasyID3(path_name)['tracknumber'] # gets unicode tracknumer
album = EasyID3(path_name)['album'][0]
# print album
# print type(album)
album.encode('utf-8')
# length = EasyID3(path_name)['length'][0]
print album
date = EasyID3(path_name)['date'][0]
# print date
track_name = EasyID3(path_name)['title']
# print track_name
header = '\n' + 'Title: ' + album + ' Released ' + date + '\n\n'
# if EasyID3(path_name)['discnumber'] != []:
# disc = EasyID3(path_name)['discnumber']
# header = '\n' + 'Title: ' + album + 'Disc: ' + disc + ' Released ' + date + '\n\n'
file_name = artist + '.txt' # used to name file to be written to = artist
mp3info = EasyID3(path_name)
# print mp3info.items()
# print 'header in',header
if count < 1: # used to not write header over each track
# print 'header = ', header
mfile = open(file_name, 'a')
mfile.write(header)
count += 1
if extension == 'mp3': # avoid album art and errors which occur
track_info = '\t' + track_num[0] + '. ' + track_name[0] + '\n'
# if EasyID3(path_name)'length' in mp3info.items():
# length = EasyID3(path_name)['length'][0]
# print length
# track_info = '\t' + track_num[0] + '. ' + track_name[0] + ' ' + length[0] + '\n'
# print 'track_info = ',track_info
mfile.write(track_info)
count = 0 # reset for next artist - new .txt file
mfile.close()
path = 'C:\test' # this is a test path
# path = raw_input('Enter the path to the music file:\n>')
music_album_info(path)
您丢弃相册的UTF-8编码:
album.encode('utf-8')
字符串是不可变的;您基本上丢弃了 bytestring return 值。存储它:
album = album.encode('utf-8')
您可以改为使用 io.open()
function 打开一个文件对象,该文件对象自动将写入其中的所有 Unicode 编码为 UTF-8:
with io.open(file_name, 'a', encoding='utf-8') as mfile:
mfile.write(header)
在这种情况下,您根本不需要对 album
进行编码。