将 mp4 转换为 wav 后使用 python 语音识别获取文件读取错误
Getting File Read Error using python Speech Recognition after converting mp4 to wav
我一直在使用以下脚本将 mp4 文件批量转换为 wav,第二个脚本将语音写入文本。
我 运行 成功了一段时间,并转换了大约 2000 个音频文件。所有文件的长度都小于 60 秒。
然后突然 speech_recognition 停止工作,每个文件都会出现以下错误。
"文件读取错误:无法将音频文件读取为 PCM WAV、AIFF/AIFF-C 或 Native FLAC;检查文件是否已损坏或为其他格式"
如有任何帮助,我们将不胜感激。
这是我将 mp4 转换为 wav 的代码:
#!/usr/bin/env python3
#convert mp4 to wav
import os
import sys
import glob
from pydub import AudioSegment
folder_path = input("Enter the path for the folder/directory : ")
print("\n Processing...")
#Remove quotes from string
if folder_path[0]=="\"":
folder_path = folder_path[1:]
if folder_path[-1] == "\"":
folder_path = folder_path[:-1]
os.chdir(folder_path)
folder = os.listdir(folder_path)
#Count files
wavList = glob.glob(folder_path + r"\*.wav")
mp4List = glob.glob(folder_path + r"\*.mp4")
if(input(str(len(mp4List)) + r" '.mp4' files & " + str(len(wavList)) + r" '.wav' files found. Continue (y/n) : ") != "y"):
print('canceled by user')
exit()
#loop through files
print(folder_path)
#for srcfile in folder: #loop in folder only
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for srcfile in files:
print(srcfile)
if(srcfile[-4:]==".mp4"):
wavfile = srcfile[:-3] + "wav"
print(wavfile)
if (os.path.isfile(wavfile)):
if sys.argv[0] == "a":
os.remove(wavfile)
if not (os.path.isfile(wavfile)):
infile = os.path.join(folder_path, srcfile)
print(infile)
wavpath = os.path.join(folder_path, wavfile)
print(wavpath)
audio = AudioSegment.from_file(infile, format = "mp4")
audio.export(wavpath, format = "WAV")
这是我的音频转文本功能。
我 t运行cated 它,因为我有很多语音识别引擎的选择,但它还没有那么远。
#!/usr/bin/env python3
import speech_recognition as sr
import os
import json
import atexit
text_count = 0
fail_count = 0
skip_count = 0
def get_audio_text(audio_file, TRANSLATE_OPTION):
txt = audio_file
# use the audio file as the audio source
r = sr.Recognizer()
try:
with sr.AudioFile(audio_file) as source:
audio = r.record(source) # ERROR HERE
except Exception as e:
errStr = "File Read Error: " + str(e)
print(errStr)
return errStr
if(TRANSLATE_OPTION == "s" or TRANSLATE_OPTION == "sphinx"):
# recognize speech using Sphinx
try:
txt = r.recognize_sphinx(audio)
except sr.UnknownValueError:
txt ="Sphinx could not understand audio"
except sr.RequestError as e:
txt ="Sphinx error; {0}".format(e)
elif(TRANSLATE_OPTION == "g" or TRANSLATE_OPTION == "google"):
# recognize speech using Google Speech Recognition
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
txt = r.recognize_google(audio)
except sr.UnknownValueError:
txt = "Google Speech Recognition could not understand audio"
except sr.RequestError as e:
txt = "Could not request results from Google Speech Recognition service; {0}".format(e)
return txt
运行ning on Windows 10. 尝试使用 python3.10 和 python3.9
我发现了我的问题。它根本不在这个函数中。我以某种方式开始将 mp4 而不是 .wav 提供给函数。
这是一个经典案例,我假设调用代码没有问题,然后发现确实如此。 (是的,我应该全部分享)。
正确的调用代码:
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for file in files:
#print(file, file[-4:]) #for debugging
if(file[-4:]==".wav"):
txt = get_audio_text(full_path, trans_optn)
调用代码不正确
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for file in files:
#print(file, file[-4:]) #for debugging
if(file[-4:]==".mp4"):
txt = get_audio_text(full_path, trans_optn)
老实说,我真的不知道我是怎么搞砸的。
我一直在使用以下脚本将 mp4 文件批量转换为 wav,第二个脚本将语音写入文本。 我 运行 成功了一段时间,并转换了大约 2000 个音频文件。所有文件的长度都小于 60 秒。 然后突然 speech_recognition 停止工作,每个文件都会出现以下错误。
"文件读取错误:无法将音频文件读取为 PCM WAV、AIFF/AIFF-C 或 Native FLAC;检查文件是否已损坏或为其他格式"
如有任何帮助,我们将不胜感激。
这是我将 mp4 转换为 wav 的代码:
#!/usr/bin/env python3
#convert mp4 to wav
import os
import sys
import glob
from pydub import AudioSegment
folder_path = input("Enter the path for the folder/directory : ")
print("\n Processing...")
#Remove quotes from string
if folder_path[0]=="\"":
folder_path = folder_path[1:]
if folder_path[-1] == "\"":
folder_path = folder_path[:-1]
os.chdir(folder_path)
folder = os.listdir(folder_path)
#Count files
wavList = glob.glob(folder_path + r"\*.wav")
mp4List = glob.glob(folder_path + r"\*.mp4")
if(input(str(len(mp4List)) + r" '.mp4' files & " + str(len(wavList)) + r" '.wav' files found. Continue (y/n) : ") != "y"):
print('canceled by user')
exit()
#loop through files
print(folder_path)
#for srcfile in folder: #loop in folder only
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for srcfile in files:
print(srcfile)
if(srcfile[-4:]==".mp4"):
wavfile = srcfile[:-3] + "wav"
print(wavfile)
if (os.path.isfile(wavfile)):
if sys.argv[0] == "a":
os.remove(wavfile)
if not (os.path.isfile(wavfile)):
infile = os.path.join(folder_path, srcfile)
print(infile)
wavpath = os.path.join(folder_path, wavfile)
print(wavpath)
audio = AudioSegment.from_file(infile, format = "mp4")
audio.export(wavpath, format = "WAV")
这是我的音频转文本功能。 我 t运行cated 它,因为我有很多语音识别引擎的选择,但它还没有那么远。
#!/usr/bin/env python3
import speech_recognition as sr
import os
import json
import atexit
text_count = 0
fail_count = 0
skip_count = 0
def get_audio_text(audio_file, TRANSLATE_OPTION):
txt = audio_file
# use the audio file as the audio source
r = sr.Recognizer()
try:
with sr.AudioFile(audio_file) as source:
audio = r.record(source) # ERROR HERE
except Exception as e:
errStr = "File Read Error: " + str(e)
print(errStr)
return errStr
if(TRANSLATE_OPTION == "s" or TRANSLATE_OPTION == "sphinx"):
# recognize speech using Sphinx
try:
txt = r.recognize_sphinx(audio)
except sr.UnknownValueError:
txt ="Sphinx could not understand audio"
except sr.RequestError as e:
txt ="Sphinx error; {0}".format(e)
elif(TRANSLATE_OPTION == "g" or TRANSLATE_OPTION == "google"):
# recognize speech using Google Speech Recognition
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
txt = r.recognize_google(audio)
except sr.UnknownValueError:
txt = "Google Speech Recognition could not understand audio"
except sr.RequestError as e:
txt = "Could not request results from Google Speech Recognition service; {0}".format(e)
return txt
运行ning on Windows 10. 尝试使用 python3.10 和 python3.9
我发现了我的问题。它根本不在这个函数中。我以某种方式开始将 mp4 而不是 .wav 提供给函数。
这是一个经典案例,我假设调用代码没有问题,然后发现确实如此。 (是的,我应该全部分享)。
正确的调用代码:
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for file in files:
#print(file, file[-4:]) #for debugging
if(file[-4:]==".wav"):
txt = get_audio_text(full_path, trans_optn)
调用代码不正确
for subdir, dirs, files in os.walk(folder_path):
os.chdir(subdir)
for file in files:
#print(file, file[-4:]) #for debugging
if(file[-4:]==".mp4"):
txt = get_audio_text(full_path, trans_optn)
老实说,我真的不知道我是怎么搞砸的。