使用 MFCC 进行特征提取
Feature Extraction using MFCC
我想知道,如何提取音频(x.wav)信号,使用MFCC进行特征提取?我知道使用 MFCC 提取音频特征的步骤。我想知道使用 Django 框架
Python 中的精细编码
这是构建语音识别器最重要的一步,因为在将语音信号转换为频域后,我们必须将其转换为可用的特征向量形式。
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
frequency_sampling, audio_signal =
wavfile.read("/home/user/Downloads/OSR_us_000_0010_8k.wav")
audio_signal = audio_signal[:15000]
features_mfcc = mfcc(audio_signal, frequency_sampling)
print('\nMFCC:\nNumber of windows =', features_mfcc.shape[0])
print('Length of each feature =', features_mfcc.shape[1])
features_mfcc = features_mfcc.T
plt.matshow(features_mfcc)
plt.title('MFCC')
filterbank_features = logfbank(audio_signal, frequency_sampling)
print('\nFilter bank:\nNumber of windows =', filterbank_features.shape[0])
print('Length of each feature =', filterbank_features.shape[1])
filterbank_features = filterbank_features.T
plt.matshow(filterbank_features)
plt.title('Filter bank')
plt.show()
或者您可以使用此代码提取特征
import numpy as np
from sklearn import preprocessing
import python_speech_features as mfcc
def extract_features(audio,rate):
"""extract 20 dim mfcc features from an audio, performs CMS and combines
delta to make it 40 dim feature vector"""
mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
delta = calculate_delta(mfcc_feature)
combined = np.hstack((mfcc_feature,delta))
return combined
您可以使用以下代码使用 librosa 包提取音频文件 MFCC 功能(易于安装和使用):
import librosa
import librosa.display
audio_path = 'my_audio_file.wav'
x, sr = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(x, sr=sr,n_mfcc=40)
print(mfccs.shape)
您也可以使用以下代码显示 MFCC:
librosa.display.specshow(mfccs, sr=sr, x_axis='time')
我想知道,如何提取音频(x.wav)信号,使用MFCC进行特征提取?我知道使用 MFCC 提取音频特征的步骤。我想知道使用 Django 框架
Python 中的精细编码这是构建语音识别器最重要的一步,因为在将语音信号转换为频域后,我们必须将其转换为可用的特征向量形式。
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
frequency_sampling, audio_signal =
wavfile.read("/home/user/Downloads/OSR_us_000_0010_8k.wav")
audio_signal = audio_signal[:15000]
features_mfcc = mfcc(audio_signal, frequency_sampling)
print('\nMFCC:\nNumber of windows =', features_mfcc.shape[0])
print('Length of each feature =', features_mfcc.shape[1])
features_mfcc = features_mfcc.T
plt.matshow(features_mfcc)
plt.title('MFCC')
filterbank_features = logfbank(audio_signal, frequency_sampling)
print('\nFilter bank:\nNumber of windows =', filterbank_features.shape[0])
print('Length of each feature =', filterbank_features.shape[1])
filterbank_features = filterbank_features.T
plt.matshow(filterbank_features)
plt.title('Filter bank')
plt.show()
或者您可以使用此代码提取特征
import numpy as np
from sklearn import preprocessing
import python_speech_features as mfcc
def extract_features(audio,rate):
"""extract 20 dim mfcc features from an audio, performs CMS and combines
delta to make it 40 dim feature vector"""
mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
delta = calculate_delta(mfcc_feature)
combined = np.hstack((mfcc_feature,delta))
return combined
您可以使用以下代码使用 librosa 包提取音频文件 MFCC 功能(易于安装和使用):
import librosa
import librosa.display
audio_path = 'my_audio_file.wav'
x, sr = librosa.load(audio_path)
mfccs = librosa.feature.mfcc(x, sr=sr,n_mfcc=40)
print(mfccs.shape)
您也可以使用以下代码显示 MFCC:
librosa.display.specshow(mfccs, sr=sr, x_axis='time')