wav文件的特征提取
Feature extraction of wav file
我们正在尝试从 .wav 文件中提取特征,但总是遇到同样的错误。
我们试过 python 3.6.6 和 3.7.4 版本,但错误是一样的。
import csv
import glob
import os
import librosa
import numpy as np
if __name__ == '__main__':
def extract_feature(file_name):
x, sample_rate = librosa.load(file_name)
stft = np.abs(librosa.stft(x))
mfccs = np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
mel = np.mean(librosa.feature.melspectrogram(x, sr=sample_rate).T, axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(x),
sr=sample_rate).T, axis=0)
return mfccs, chroma, mel, contrast, tonnetz
def parse_audio_files(parent_dir, sub_dirs, file_ext="*.wav"):
full_list = []
features, labels = np.empty((0, 193)), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
varim = fn.split('/')[2]
# print(varim)
try:
mfccs, chroma, mel, contrast, tonnetz = extract_feature(fn)
except Exception as e:
print("Error encountered while parsing file: ", fn)
continue
ext_features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
features = np.vstack([features, ext_features])
labels = np.append(labels, fn.split('/')[2])
# print(var)
# print(features)
new_dict = {varim: ext_features}
print(new_dict)
full_list.append(new_dict)
# value = np.array(features, dtype=np.int), np.array(labels, dtype=np.int)
with open('dog_cat.csv', 'w') as f:
wr = csv.writer(f)
wr.writerow(full_list)
return features, labels
def one_hot_encode(labels):
n_labels = len(labels)
n_unique_labels = len(np.unique(labels))
one_hot_encode = np.zeros((n_labels, n_unique_labels))
# one_hot_encode[np.arange(n_labels), labels] = 1
return one_hot_encode
parent_dir = 'cats_dogs'
tr_sub_dirs = ["fold1"]
file_ext1 = "*.wav"
tr_features, tr_labels = parse_audio_files(parent_dir, tr_sub_dirs)
tr_labels = one_hot_encode(tr_labels)
这是我们得到的错误
Traceback (most recent call last):
File "C:/Users/ja/PycharmProjects/catdog/projekt.py", line 61, in
<module>
tr_features, tr_labels = parse_audio_files(parent_dir, tr_sub_dirs)
File "C:/Users/ja/PycharmProjects/catdog/projekt.py", line 27, in
parse_audio_files
varim = fn.split('/')[2]
IndexError: list index out of range
我们应该得到代表 .wav 文件的数字,这样我们就可以将它们分类为猫还是狗。
varim = fn.split('/')2
fn.split('/') 对您不起作用,因为无法进一步拆分,如下所示
enter image description here
enter image description here
如果您 运行 的目录结构正确,请进行更正。
我们正在尝试从 .wav 文件中提取特征,但总是遇到同样的错误。
我们试过 python 3.6.6 和 3.7.4 版本,但错误是一样的。
import csv
import glob
import os
import librosa
import numpy as np
if __name__ == '__main__':
def extract_feature(file_name):
x, sample_rate = librosa.load(file_name)
stft = np.abs(librosa.stft(x))
mfccs = np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
mel = np.mean(librosa.feature.melspectrogram(x, sr=sample_rate).T, axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(x),
sr=sample_rate).T, axis=0)
return mfccs, chroma, mel, contrast, tonnetz
def parse_audio_files(parent_dir, sub_dirs, file_ext="*.wav"):
full_list = []
features, labels = np.empty((0, 193)), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
varim = fn.split('/')[2]
# print(varim)
try:
mfccs, chroma, mel, contrast, tonnetz = extract_feature(fn)
except Exception as e:
print("Error encountered while parsing file: ", fn)
continue
ext_features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
features = np.vstack([features, ext_features])
labels = np.append(labels, fn.split('/')[2])
# print(var)
# print(features)
new_dict = {varim: ext_features}
print(new_dict)
full_list.append(new_dict)
# value = np.array(features, dtype=np.int), np.array(labels, dtype=np.int)
with open('dog_cat.csv', 'w') as f:
wr = csv.writer(f)
wr.writerow(full_list)
return features, labels
def one_hot_encode(labels):
n_labels = len(labels)
n_unique_labels = len(np.unique(labels))
one_hot_encode = np.zeros((n_labels, n_unique_labels))
# one_hot_encode[np.arange(n_labels), labels] = 1
return one_hot_encode
parent_dir = 'cats_dogs'
tr_sub_dirs = ["fold1"]
file_ext1 = "*.wav"
tr_features, tr_labels = parse_audio_files(parent_dir, tr_sub_dirs)
tr_labels = one_hot_encode(tr_labels)
这是我们得到的错误
Traceback (most recent call last):
File "C:/Users/ja/PycharmProjects/catdog/projekt.py", line 61, in
<module>
tr_features, tr_labels = parse_audio_files(parent_dir, tr_sub_dirs)
File "C:/Users/ja/PycharmProjects/catdog/projekt.py", line 27, in
parse_audio_files
varim = fn.split('/')[2]
IndexError: list index out of range
我们应该得到代表 .wav 文件的数字,这样我们就可以将它们分类为猫还是狗。
varim = fn.split('/')2
fn.split('/') 对您不起作用,因为无法进一步拆分,如下所示
enter image description here
enter image description here
如果您 运行 的目录结构正确,请进行更正。