ParameterError: Mono data must have shape (samples,). Received shape=(1, 87488721)
ParameterError: Mono data must have shape (samples,). Received shape=(1, 87488721)
目前我正在 python 上使用 pyannote 进行嵌入。
我的嵌入函数如下所示:
import torch
import librosa
from pyannote.core import Segment
def embeddings_(audio_path,resegmented,range):
model_emb = torch.hub.load('pyannote/pyannote-audio', 'emb')
embedding = model_emb({'audio': audio_path})
for window, emb in embedding:
assert isinstance(window, Segment)
assert isinstance(emb, np.ndarray)
y, sr = librosa.load(audio_path)
myDict={}
myDict['audio'] = audio_path
myDict['duration'] = len(y)/sr
data=[]
for i in resegmented:
excerpt = Segment(start=i[0], end=i[0]+range)
emb = model_emb.crop(myDict,excerpt)
data.append(emb.T)
data= np.asarray(data)
return data.reshape(len(data),512)
当我运行
embeddings = embeddings_(audiofile,resegmented,2)
我收到这个错误:
ParameterError: Mono data must have shape (samples,). Received shape=(1, 87488721)
我也遇到了同样的错误,但我找到了解决方法。对我来说,错误是在“pyannote/audio/features/utils.py”中触发的,当它试图使用这条线 y = librosa.core.resample(y.T, sample_rate, self.sample_rate).T
对音频重新采样时
这是我的解决方法
def get_features(self, y, sample_rate):
# convert to mono
if self.mono:
y = np.mean(y, axis=1, keepdims=True)
y = np.squeeze(y) # Add this line
# resample if sample rates mismatch
if (self.sample_rate is not None) and (self.sample_rate != sample_rate):
y = librosa.core.resample(y.T, sample_rate, self.sample_rate).T
sample_rate = self.sample_rate
# augment data
if self.augmentation is not None:
y = self.augmentation(y, sample_rate)
# TODO: how time consuming is this thing (needs profiling...)
if len(y.shape) == 1: # Add this line
y = y[:,np.newaxis] # Add this line
try:
valid = valid_audio(y[:, 0], mono=True)
except ParameterError as e:
msg = f"Something went wrong when augmenting waveform."
raise ValueError(msg)
return y
在y
上使用np.squeeze
for librosa.core.resample
,然后使用y[:,np.newaxis]
将其形状更改为(samples, 1) for valid = valid_audio(y[:, 0], mono=True)
目前我正在 python 上使用 pyannote 进行嵌入。 我的嵌入函数如下所示:
import torch
import librosa
from pyannote.core import Segment
def embeddings_(audio_path,resegmented,range):
model_emb = torch.hub.load('pyannote/pyannote-audio', 'emb')
embedding = model_emb({'audio': audio_path})
for window, emb in embedding:
assert isinstance(window, Segment)
assert isinstance(emb, np.ndarray)
y, sr = librosa.load(audio_path)
myDict={}
myDict['audio'] = audio_path
myDict['duration'] = len(y)/sr
data=[]
for i in resegmented:
excerpt = Segment(start=i[0], end=i[0]+range)
emb = model_emb.crop(myDict,excerpt)
data.append(emb.T)
data= np.asarray(data)
return data.reshape(len(data),512)
当我运行
embeddings = embeddings_(audiofile,resegmented,2)
我收到这个错误:
ParameterError: Mono data must have shape (samples,). Received shape=(1, 87488721)
我也遇到了同样的错误,但我找到了解决方法。对我来说,错误是在“pyannote/audio/features/utils.py”中触发的,当它试图使用这条线 y = librosa.core.resample(y.T, sample_rate, self.sample_rate).T
这是我的解决方法
def get_features(self, y, sample_rate):
# convert to mono
if self.mono:
y = np.mean(y, axis=1, keepdims=True)
y = np.squeeze(y) # Add this line
# resample if sample rates mismatch
if (self.sample_rate is not None) and (self.sample_rate != sample_rate):
y = librosa.core.resample(y.T, sample_rate, self.sample_rate).T
sample_rate = self.sample_rate
# augment data
if self.augmentation is not None:
y = self.augmentation(y, sample_rate)
# TODO: how time consuming is this thing (needs profiling...)
if len(y.shape) == 1: # Add this line
y = y[:,np.newaxis] # Add this line
try:
valid = valid_audio(y[:, 0], mono=True)
except ParameterError as e:
msg = f"Something went wrong when augmenting waveform."
raise ValueError(msg)
return y
在y
上使用np.squeeze
for librosa.core.resample
,然后使用y[:,np.newaxis]
将其形状更改为(samples, 1) for valid = valid_audio(y[:, 0], mono=True)