如何将 PyTTSx 的输出保存到 wav 文件
How to save the output of PyTTSx to wav file
我正在尝试找到我的代码无法正常工作的解决方案。我使用了 Recording synthesized text-to-speech to a file in Python 的解决方案,但对我来说有点不奏效。问题是为什么 2 methods/functions text_to_wav 和 all_texts_to_files 对我不起作用。
import json
import pyttsx
from openpyxl import load_workbook
import subprocess
class Ver2ProjectWithTTS(object):
def __init__(self):
self.list_merge = []
def do_the_job(self):
self.read_json_file()
self.read_xml_file()
#self.say_something()
self.all_texts_to_files()
def read_json_file(self):
with open("json-example.json", 'r') as df:
json_data = json.load(df)
df.close()
for k in json_data['sentences']:
text_json = k['text']
speed_json = int(k['speed'])
volume_json = float(k['volume'])
dict_json = {'text': text_json, 'speed': speed_json, 'volume': volume_json}
self.list_merge.append(dict_json)
def read_xml_file(self):
tree = et.parse('xml-example.xml')
root = tree.getroot()
for k in range(0, len(root)):
text_xml = root[k][0].text
speed_xml = int(root[k][1].text)
volume_xml = float(root[k][2].text)
dict_xml = {'text': text_xml, 'speed': speed_xml, 'volume': volume_xml}
self.list_merge.append(dict_xml)
def say_something(self):
for item in self.list_merge:
engine = pyttsx.init()
engine.getProperty('rate')
engine.getProperty('volume')
engine.setProperty('rate', item['speed'])
engine.setProperty('volume', item['volume'])
engine.say(cleared_text)
engine.runAndWait()
def text_to_wav(self, text, file_name):
subprocess.call(["espeak", "-w"+file_name+".wav", text])
def all_texts_to_files(self):
for item in self.list_merge:
cleared_text = self.clear_text_from_underscores(item['text'])
self.text_to_wav(cleared_text, item['text'])
if __name__ == '__main__':
a = Ver2ProjectWithTTS()
a.do_the_job()
错误代码在这里:
#In my project:
line 91, in <module> a.do_the_job()
line 21, in do_the_job self.all_texts_to_files()
line 85, in all_texts_to_files self.text_to_wav(cleared_text, item['text'])
line 80, in text_to_wav subprocess.call(["espeak", "-w"+file_name+".wav", text])
#in subprocess:
line 523, in call return Popen(*popenargs, **kwargs).wait()
line 711, in __init__ errread, errwrite)
line 959, in _execute_child startupinfo)
WindowsError: [Error 2] The system cannot find the file specified
假设您在 os 中使用 Python,
您将需要指定子流程的完整路径,
当然还有完整的输出文件路径
例如;
espeak_path = "C:/Program Files/eSpeak/command_line/espeak.exe"
file_name = "C:/temp/test"
subprocess.call([espeak_path,"-w"+file_name+".wav", text])
from gtts import gTTS
import os
tts = gTTS(text='hi how r u', lang='en')
tts.save("good.wav")
os.system("mpg321 good.wav")
此代码将输出保存在您安装的 python 文件夹中。
对于各种音频格式只需更改扩展文件。
以防万一有人想知道如何在 linux 而不是 windows 上执行此操作:
os.system("espeak \"example text to speech\" --stdout > myfile.wav")
我正在尝试找到我的代码无法正常工作的解决方案。我使用了 Recording synthesized text-to-speech to a file in Python 的解决方案,但对我来说有点不奏效。问题是为什么 2 methods/functions text_to_wav 和 all_texts_to_files 对我不起作用。
import json
import pyttsx
from openpyxl import load_workbook
import subprocess
class Ver2ProjectWithTTS(object):
def __init__(self):
self.list_merge = []
def do_the_job(self):
self.read_json_file()
self.read_xml_file()
#self.say_something()
self.all_texts_to_files()
def read_json_file(self):
with open("json-example.json", 'r') as df:
json_data = json.load(df)
df.close()
for k in json_data['sentences']:
text_json = k['text']
speed_json = int(k['speed'])
volume_json = float(k['volume'])
dict_json = {'text': text_json, 'speed': speed_json, 'volume': volume_json}
self.list_merge.append(dict_json)
def read_xml_file(self):
tree = et.parse('xml-example.xml')
root = tree.getroot()
for k in range(0, len(root)):
text_xml = root[k][0].text
speed_xml = int(root[k][1].text)
volume_xml = float(root[k][2].text)
dict_xml = {'text': text_xml, 'speed': speed_xml, 'volume': volume_xml}
self.list_merge.append(dict_xml)
def say_something(self):
for item in self.list_merge:
engine = pyttsx.init()
engine.getProperty('rate')
engine.getProperty('volume')
engine.setProperty('rate', item['speed'])
engine.setProperty('volume', item['volume'])
engine.say(cleared_text)
engine.runAndWait()
def text_to_wav(self, text, file_name):
subprocess.call(["espeak", "-w"+file_name+".wav", text])
def all_texts_to_files(self):
for item in self.list_merge:
cleared_text = self.clear_text_from_underscores(item['text'])
self.text_to_wav(cleared_text, item['text'])
if __name__ == '__main__':
a = Ver2ProjectWithTTS()
a.do_the_job()
错误代码在这里:
#In my project:
line 91, in <module> a.do_the_job()
line 21, in do_the_job self.all_texts_to_files()
line 85, in all_texts_to_files self.text_to_wav(cleared_text, item['text'])
line 80, in text_to_wav subprocess.call(["espeak", "-w"+file_name+".wav", text])
#in subprocess:
line 523, in call return Popen(*popenargs, **kwargs).wait()
line 711, in __init__ errread, errwrite)
line 959, in _execute_child startupinfo)
WindowsError: [Error 2] The system cannot find the file specified
假设您在 os 中使用 Python, 您将需要指定子流程的完整路径, 当然还有完整的输出文件路径 例如;
espeak_path = "C:/Program Files/eSpeak/command_line/espeak.exe"
file_name = "C:/temp/test"
subprocess.call([espeak_path,"-w"+file_name+".wav", text])
from gtts import gTTS
import os
tts = gTTS(text='hi how r u', lang='en')
tts.save("good.wav")
os.system("mpg321 good.wav")
此代码将输出保存在您安装的 python 文件夹中。 对于各种音频格式只需更改扩展文件。
以防万一有人想知道如何在 linux 而不是 windows 上执行此操作:
os.system("espeak \"example text to speech\" --stdout > myfile.wav")