使用 python 的音频脏话过滤器
Audio profanity filter using python
我正在尝试编写一个 python 代码,用户在其中输入视频文件,程序将 mute/beep 其中的 curse/bad 单词并输出过滤后的视频文件,基本上是脏话过滤器。
首先,我将视频文件转换为 .wav 格式,然后尝试将音频亵渎过滤器应用到 .wav 文件中,并将该 .wav 文件写入视频。
到目前为止,我能够制作音频文件块并使用 speech_recognition 库从每个 5 秒的音频块中提取文本。但是,如果单词在块之间重叠,如果在诅咒单词列表中找到该块文本,我将无法检测到它们并应用检查条件,并且会减少该音频文件的 db,从而使其静音(建议其他方式发出哔声而不是静音)。
我很困惑我的方法是否正确。
我只想要一个使用 python 的音频亵渎过滤器,直到现在我才能提取文本并只制作块。
import speech_recognition as sr
import os
from pydub.utils import make_chunks
from pydub import AudioSegment
from pydub.playback import play
import codecs
import re
import fileinput
list=[]
#Curse words list
with codecs.open("words_final.txt", "r") as f0:
sentences_lines=f0.read().splitlines()
for sentences in sentences_lines:
list.append(sentences)
# print(list)
# create a speech recognition object
r = sr.Recognizer()
# a function that splits the audio file into chunks
# and applies speech recognition
def get_large_audio_transcription(path):
"""
Splitting the large audio file into chunks
and apply speech recognition on each of these chunks
"""
# open the audio file using pydub
sound = AudioSegment.from_wav(path)
chunk_length_ms = 5000 # pydub calculates in millisec
chunks = make_chunks(sound, chunk_length_ms) #Make chunks of one sec
folder_name = "audio-chunks"
# create a directory to store the audio chunks
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
whole_text = ""
# process each chunk
for i, audio_chunk in enumerate(chunks, start=1):
# export audio chunk and save it in
# the `folder_name` directory.
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
audio_chunk.export(chunk_filename, format="wav")
# recognize the chunk
with sr.AudioFile(chunk_filename) as source:
audio_listened = r.record(source)
# try converting it to text
try:
text = r.recognize_google(audio_listened,language="en-US")
wav_file=AudioSegment.from_file(chunk_filename, format = "wav")
# Reducing volume by 5
silent_wav_file = AudioSegment.silent(duration=8000)
# Playing silent file
play(silent_wav_file)
except sr.UnknownValueError as e:
print("Error:", str(e))
else:
text = f"{text.capitalize()}. "
print(chunk_filename, ":", text)
whole_text += text
# return the text for all chunks detected
return whole_text
path = "Welcome.wav"
print("\nFull text:", get_large_audio_transcription(path))
#Will implement a loop to sum all chunks a make a final filtered .wav file
首先从视频文件中提取audio.wav。然后使用 从该音频中获取每个单词的时间戳。然后,您可以 mute/beep 音频中的脏话并将其加入到视频中。
我正在尝试编写一个 python 代码,用户在其中输入视频文件,程序将 mute/beep 其中的 curse/bad 单词并输出过滤后的视频文件,基本上是脏话过滤器。 首先,我将视频文件转换为 .wav 格式,然后尝试将音频亵渎过滤器应用到 .wav 文件中,并将该 .wav 文件写入视频。
到目前为止,我能够制作音频文件块并使用 speech_recognition 库从每个 5 秒的音频块中提取文本。但是,如果单词在块之间重叠,如果在诅咒单词列表中找到该块文本,我将无法检测到它们并应用检查条件,并且会减少该音频文件的 db,从而使其静音(建议其他方式发出哔声而不是静音)。
我很困惑我的方法是否正确。 我只想要一个使用 python 的音频亵渎过滤器,直到现在我才能提取文本并只制作块。
import speech_recognition as sr
import os
from pydub.utils import make_chunks
from pydub import AudioSegment
from pydub.playback import play
import codecs
import re
import fileinput
list=[]
#Curse words list
with codecs.open("words_final.txt", "r") as f0:
sentences_lines=f0.read().splitlines()
for sentences in sentences_lines:
list.append(sentences)
# print(list)
# create a speech recognition object
r = sr.Recognizer()
# a function that splits the audio file into chunks
# and applies speech recognition
def get_large_audio_transcription(path):
"""
Splitting the large audio file into chunks
and apply speech recognition on each of these chunks
"""
# open the audio file using pydub
sound = AudioSegment.from_wav(path)
chunk_length_ms = 5000 # pydub calculates in millisec
chunks = make_chunks(sound, chunk_length_ms) #Make chunks of one sec
folder_name = "audio-chunks"
# create a directory to store the audio chunks
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
whole_text = ""
# process each chunk
for i, audio_chunk in enumerate(chunks, start=1):
# export audio chunk and save it in
# the `folder_name` directory.
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
audio_chunk.export(chunk_filename, format="wav")
# recognize the chunk
with sr.AudioFile(chunk_filename) as source:
audio_listened = r.record(source)
# try converting it to text
try:
text = r.recognize_google(audio_listened,language="en-US")
wav_file=AudioSegment.from_file(chunk_filename, format = "wav")
# Reducing volume by 5
silent_wav_file = AudioSegment.silent(duration=8000)
# Playing silent file
play(silent_wav_file)
except sr.UnknownValueError as e:
print("Error:", str(e))
else:
text = f"{text.capitalize()}. "
print(chunk_filename, ":", text)
whole_text += text
# return the text for all chunks detected
return whole_text
path = "Welcome.wav"
print("\nFull text:", get_large_audio_transcription(path))
#Will implement a loop to sum all chunks a make a final filtered .wav file
首先从视频文件中提取audio.wav。然后使用