获取 UnicodeDecodeError

Getting UnicodeDecodeError

我收到这个奇怪的 UnicodeDecodeError 和 我不知道为什么会导致此错误,但如果有人可以帮助我解决此问题,那就太好了:)

错误信息:

UnicodeDecodeError: 'charmap' 编解码器无法解码位置 6456:character 中的字节 0x81 映射到

完整的错误消息截图

screenshot of the Error message

我的代码:

import os
import json
import random
import csv
from pydub import AudioSegment


file_path = '/path/to/file/.tsv '
save_json_path = '/path/where/you/want/the/jsons/saved' 

def main(args):
    data = []
    directory = file_path.rpartition('/')[0]
    percent = int(100)
    
    with open(file_path) as f:
        lenght = sum(1 for ine in f)
    
    
    
    
    with open(file_path, newline='') as csvfile: 
        reader = csv.DictReader(csvfile, delimiter='\t')
        index = 1
        if(args.convert):
            print(str(lenght) + "files found")
        for row in reader:  
            file_name = row['path']
            filename = file_name.rpartition('.')[0] + ".wav"
            text = row['sentence']
            if(args.convert):
                data.append({
                "key": directory + "/clips/" + filename,
                "text": text
                })
                print("converting file " + str(index) + "/" + str(lenght) + " to wav", end="\r")
                src = directory + "/clips/" + file_name
                dst = directory + "/clips/" + filename
                sound = AudioSegment.from_mp3(src)
                sound.export(dst, format="wav")
                index = index + 1
            else:
                data.append({
                "key": directory + "/clips/" + file_name,
                "text": text
                })
                
    random.shuffle(data)

    print("creating JSON's")
    f = open(save_json_path +"/"+ "train.json", "w")
    
    with open(save_json_path +"/"+ 'train.json','w') as f:
        d = len(data)
        i=0
        while(i<int(d-d/percent)):
            r=data[i]
            line = json.dumps(r)
            f.write(line + "\n")
            i = i+1
    
    f = open(save_json_path +"/"+ "test.json", "w")

    with open(save_json_path +"/"+ 'test.json','w') as f:
        d = len(data)
        i=int(d-d/percent)
        while(i<d):
            r=data[i]
            line = json.dumps(r)
            f.write(line + "\n")
            i = i+1
    print("Done!")

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="""
    Utility script to convert commonvoice into wav and create the training and test json files for speechrecognition. """
    )  
    parser.add_argument('--convert', default=True, action='store_true',
                        help='says that the script should convert mp3 to wav')

    
    args = parser.parse_known_args()
    main(args)

看起来你在这个块中遇到了这个错误

with open(file_path) as f:
    length = sum(1 for line in f)

在另一个 post 中,虽然没有公认的答案,但显示这可能是因为您的文件编码。

尝试添加 encoding kwarg 以打开

with open(file_path, encoding="latin-1") as f:
   length = sum(1 for line in f)