使用 Resemblyzer 进行电话交谈的说话人分类
speaker diarization for telephone conversations using Resemblyzer
我有电话交谈的录音,
我使用了 Resemblyzer,它根据扬声器对音频进行聚类。输出是 labelling
,这基本上是一个字典,其中包含在 (speaker_label、start_time、end_time)
时哪个人说话
我需要根据标签中的时间将音频从扬声器中明智地分割出来。我已经为此工作了一个星期。
from resemblyzer import preprocess_wav, VoiceEncoder
from pathlib import Path
import pickle
import scipy.io.wavfile
from spectralcluster import SpectralClusterer
audio_file_path = 'C:/Users/...'
wav_fpath = Path(audio_file_path)
wav = preprocess_wav(wav_fpath)
encoder = VoiceEncoder("cpu")
_, cont_embeds, wav_splits = encoder.embed_utterance(wav, return_partials=True, rate=16)
print(cont_embeds.shape)
clusterer = SpectralClusterer(
min_clusters=2,
max_clusters=100,
p_percentile=0.90,
gaussian_blur_sigma=1)
labels = clusterer.predict(cont_embeds)
def create_labelling(labels, wav_splits):
from resemblyzer.audio import sampling_rate
times = [((s.start + s.stop) / 2) / sampling_rate for s in wav_splits]
labelling = []
start_time = 0
for i, time in enumerate(times):
if i > 0 and labels[i] != labels[i - 1]:
temp = [str(labels[i - 1]), start_time, time]
labelling.append(tuple(temp))
start_time = time
if i == len(times) - 1:
temp = [str(labels[i]), start_time, time]
labelling.append(tuple(temp))
return labelling
labelling = create_labelling(labels, wav_splits)
这段代码很有帮助:
首先将包含时间戳的 time_stamps.txt 文件添加到 trim 上的音频(time_stamps.txt 文件应以逗号分隔)。
然后添加音频文件名及其格式,它就完成了工作。我在 github、https://github.com/raotnameh/Trim_audio
上找到了这个
import numpy as np
from pydub import AudioSegment
def trim(start,end,file_name,format_,i):
t1 = start
t2 = end
t1 = t1 * 1000 #Works in milliseconds
t2 = t2 * 1000
newAudio = AudioSegment.from_wav(file_name + "." +format_)
newAudio = newAudio[t1:t2]
newAudio.export(file_name+ "_" + str(i) + '.wav', format=format_) #Exports to a wav file in the current path.
if __name__ == '__main__':
with open("time_stamps.txt", "rb") as file:
contents = list(map(float,file.read().decode("utf-8").split(',').strip()))
file_name = "male"
format_ = "wav"
for i in range(len(contents)):
try :trim(contents[i],contents[i+1],file_name,format_,i)
except : pass
我有电话交谈的录音,
我使用了 Resemblyzer,它根据扬声器对音频进行聚类。输出是 labelling
,这基本上是一个字典,其中包含在 (speaker_label、start_time、end_time)
我需要根据标签中的时间将音频从扬声器中明智地分割出来。我已经为此工作了一个星期。
from resemblyzer import preprocess_wav, VoiceEncoder
from pathlib import Path
import pickle
import scipy.io.wavfile
from spectralcluster import SpectralClusterer
audio_file_path = 'C:/Users/...'
wav_fpath = Path(audio_file_path)
wav = preprocess_wav(wav_fpath)
encoder = VoiceEncoder("cpu")
_, cont_embeds, wav_splits = encoder.embed_utterance(wav, return_partials=True, rate=16)
print(cont_embeds.shape)
clusterer = SpectralClusterer(
min_clusters=2,
max_clusters=100,
p_percentile=0.90,
gaussian_blur_sigma=1)
labels = clusterer.predict(cont_embeds)
def create_labelling(labels, wav_splits):
from resemblyzer.audio import sampling_rate
times = [((s.start + s.stop) / 2) / sampling_rate for s in wav_splits]
labelling = []
start_time = 0
for i, time in enumerate(times):
if i > 0 and labels[i] != labels[i - 1]:
temp = [str(labels[i - 1]), start_time, time]
labelling.append(tuple(temp))
start_time = time
if i == len(times) - 1:
temp = [str(labels[i]), start_time, time]
labelling.append(tuple(temp))
return labelling
labelling = create_labelling(labels, wav_splits)
这段代码很有帮助: 首先将包含时间戳的 time_stamps.txt 文件添加到 trim 上的音频(time_stamps.txt 文件应以逗号分隔)。 然后添加音频文件名及其格式,它就完成了工作。我在 github、https://github.com/raotnameh/Trim_audio
上找到了这个import numpy as np
from pydub import AudioSegment
def trim(start,end,file_name,format_,i):
t1 = start
t2 = end
t1 = t1 * 1000 #Works in milliseconds
t2 = t2 * 1000
newAudio = AudioSegment.from_wav(file_name + "." +format_)
newAudio = newAudio[t1:t2]
newAudio.export(file_name+ "_" + str(i) + '.wav', format=format_) #Exports to a wav file in the current path.
if __name__ == '__main__':
with open("time_stamps.txt", "rb") as file:
contents = list(map(float,file.read().decode("utf-8").split(',').strip()))
file_name = "male"
format_ = "wav"
for i in range(len(contents)):
try :trim(contents[i],contents[i+1],file_name,format_,i)
except : pass