keras 自定义生成器 categorical_crossentropy 修复输出形状问题
keras custom generator categorical_crossentropy fix output shape issue
我对批次标签有疑问。
如何将标签正确传递给 fit_generator?
我假设它应该是这样的:
如果 #0 = 爵士乐,#1 = 摇滚乐,#2 = 电子乐。
因此,如果批次为 4...
batch_features batch_labels
array_with_rock 1
array_with_rock 1
array_with_electro 2
array_with_jazz 0
其中 array_with_tag 是内存中的梅尔频谱图。
我的任务是:训练NN_Music标注器
https://github.com/Vital95/NN_Music/blob/master/music_tagger_crnn.py
在此数据集上:
https://github.com/mdeff/fma
目前的进展:我能够训练 binary_crossentropy 仅用于岩石的模型。
问题: Error when checking target: expected output to have shape (None, 3) but got array with shape (12, 1)
还有一些主要代码:
主要
if __name__ == '__main__':
batch_features, batch_labels = getFeturesAndLabelsFromTarget(target)
model = getModel()
my_generator = MelGenerator(features = batch_features, labels = batch_labels, batch_size = 12)
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
model.fit_generator(my_generator, samples_per_epoch = 10, nb_epoch = 10, verbose=2, callbacks=None, validation_data=None, class_weight=None, nb_worker=1)
model.save_weights('test012.h5')
print("done")
发电机
def MelGenerator(features, labels, batch_size):
# Create empty arrays to contain batch of features and labels#
batch_features = np.zeros((batch_size, 96, 1366, 1))
batch_labels = np.zeros((batch_size,1))
while True:
for i in range(batch_size):
# choose random index in features
#to do review this line
#index = random.choice(np.size(batch_features,1))
index = randint(0, np.size(batch_features,0))
batch_features[i] = features[index]
batch_labels[i] = labels[index]
yield batch_features, batch_labels
将 Mel Spectro 从 .wav 文件加载到内存(这是其他情况)
def getFeturesAndLabelsFromTarget(targetFolder):
wavFiles = WTI.GetListOfFilesByExt(targetFolder, extention = '.wav')
upperFolder = WTI.getUpperFolders(wavFiles)
n = len(wavFiles)
if len(set(upperFolder)) == 1:
batch_features = np.zeros((n, 96, 1366, 1))
batch_labels = np.zeros((n,1),dtype=np.int8)
for i in range(0,n):
batch_features[i] = preprocess_input(wavFiles[i])
#correct
# print(str(np.size(batch_features,0)))
batch_labels[i] = 0
return batch_features, batch_labels
else:
s = set(upperFolder)
theList = list(s)
batch_features = np.zeros((n, 96, 1366, 1))
batch_labels = np.zeros((n,1),dtype=np.int8)
for i in range(0,n):
batch_features[i] = preprocess_input(wavFiles[i])
if(theList[0] in wavFiles[i]):
#0 = jazz
batch_labels[i] = 0
if(theList[1] in wavFiles[i]):
#1 = rock
batch_labels[i] = 1
if(theList[2] in wavFiles[i]):
#2 = electronic
batch_labels[i] = 2
return batch_features, batch_labels
型号
def getModel(input_tensor=None):
if K.image_dim_ordering() == 'th':
input_shape = (1, 96, 1366)
else:
input_shape = (96, 1366, 1)
if input_tensor is None:
melgram_input = Input(shape=input_shape)
else:
if not K.is_keras_tensor(input_tensor):
melgram_input = Input(tensor=input_tensor, shape=input_shape)
else:
melgram_input = input_tensor
# Determine input axis
if K.image_dim_ordering() == 'th':
channel_axis = 1
freq_axis = 2
time_axis = 3
else:
channel_axis = 3
freq_axis = 1
time_axis = 2
# Input block
x = ZeroPadding2D(padding=(0, 37))(melgram_input)
x = BatchNormalization(axis=time_axis, name='bn_0_freq')(x)
# Conv block 1
x = Convolution2D(64, 3, 3, border_mode='same', name='conv1')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)
# Conv block 2
x = Convolution2D(128, 3, 3, border_mode='same', name='conv2')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x)
# Conv block 3
x = Convolution2D(128, 3, 3, border_mode='same', name='conv3')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x)
# Conv block 4
x = Convolution2D(128, 3, 3, border_mode='same', name='conv4')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x)
# reshaping
if K.image_dim_ordering() == 'th':
x = Permute((3, 1, 2))(x)
x = Reshape((15, 128))(x)
# GRU block 1, 2, output
x = GRU(32, return_sequences=True, name='gru1')(x)
x = GRU(32, return_sequences=False, name='gru2')(x)
x = Dense(3, activation='sigmoid', name='output')(x)
# Create model
model = Model(melgram_input, x)
return model
发生这种情况是因为您没有使用 categorical_crossentropy
损失所需的单热编码标签。
您可以使用 keras.utils.np_utils.to_categorical
转换您的标签,它应该有效。
我对批次标签有疑问。 如何将标签正确传递给 fit_generator?
我假设它应该是这样的: 如果 #0 = 爵士乐,#1 = 摇滚乐,#2 = 电子乐。
因此,如果批次为 4...
batch_features batch_labels
array_with_rock 1
array_with_rock 1
array_with_electro 2
array_with_jazz 0
其中 array_with_tag 是内存中的梅尔频谱图。
我的任务是:训练NN_Music标注器 https://github.com/Vital95/NN_Music/blob/master/music_tagger_crnn.py
在此数据集上: https://github.com/mdeff/fma
目前的进展:我能够训练 binary_crossentropy 仅用于岩石的模型。
问题: Error when checking target: expected output to have shape (None, 3) but got array with shape (12, 1)
还有一些主要代码:
主要
if __name__ == '__main__':
batch_features, batch_labels = getFeturesAndLabelsFromTarget(target)
model = getModel()
my_generator = MelGenerator(features = batch_features, labels = batch_labels, batch_size = 12)
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
model.fit_generator(my_generator, samples_per_epoch = 10, nb_epoch = 10, verbose=2, callbacks=None, validation_data=None, class_weight=None, nb_worker=1)
model.save_weights('test012.h5')
print("done")
发电机
def MelGenerator(features, labels, batch_size):
# Create empty arrays to contain batch of features and labels#
batch_features = np.zeros((batch_size, 96, 1366, 1))
batch_labels = np.zeros((batch_size,1))
while True:
for i in range(batch_size):
# choose random index in features
#to do review this line
#index = random.choice(np.size(batch_features,1))
index = randint(0, np.size(batch_features,0))
batch_features[i] = features[index]
batch_labels[i] = labels[index]
yield batch_features, batch_labels
将 Mel Spectro 从 .wav 文件加载到内存(这是其他情况)
def getFeturesAndLabelsFromTarget(targetFolder):
wavFiles = WTI.GetListOfFilesByExt(targetFolder, extention = '.wav')
upperFolder = WTI.getUpperFolders(wavFiles)
n = len(wavFiles)
if len(set(upperFolder)) == 1:
batch_features = np.zeros((n, 96, 1366, 1))
batch_labels = np.zeros((n,1),dtype=np.int8)
for i in range(0,n):
batch_features[i] = preprocess_input(wavFiles[i])
#correct
# print(str(np.size(batch_features,0)))
batch_labels[i] = 0
return batch_features, batch_labels
else:
s = set(upperFolder)
theList = list(s)
batch_features = np.zeros((n, 96, 1366, 1))
batch_labels = np.zeros((n,1),dtype=np.int8)
for i in range(0,n):
batch_features[i] = preprocess_input(wavFiles[i])
if(theList[0] in wavFiles[i]):
#0 = jazz
batch_labels[i] = 0
if(theList[1] in wavFiles[i]):
#1 = rock
batch_labels[i] = 1
if(theList[2] in wavFiles[i]):
#2 = electronic
batch_labels[i] = 2
return batch_features, batch_labels
型号
def getModel(input_tensor=None):
if K.image_dim_ordering() == 'th':
input_shape = (1, 96, 1366)
else:
input_shape = (96, 1366, 1)
if input_tensor is None:
melgram_input = Input(shape=input_shape)
else:
if not K.is_keras_tensor(input_tensor):
melgram_input = Input(tensor=input_tensor, shape=input_shape)
else:
melgram_input = input_tensor
# Determine input axis
if K.image_dim_ordering() == 'th':
channel_axis = 1
freq_axis = 2
time_axis = 3
else:
channel_axis = 3
freq_axis = 1
time_axis = 2
# Input block
x = ZeroPadding2D(padding=(0, 37))(melgram_input)
x = BatchNormalization(axis=time_axis, name='bn_0_freq')(x)
# Conv block 1
x = Convolution2D(64, 3, 3, border_mode='same', name='conv1')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x)
# Conv block 2
x = Convolution2D(128, 3, 3, border_mode='same', name='conv2')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x)
# Conv block 3
x = Convolution2D(128, 3, 3, border_mode='same', name='conv3')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x)
# Conv block 4
x = Convolution2D(128, 3, 3, border_mode='same', name='conv4')(x)
x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x)
x = ELU()(x)
x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x)
# reshaping
if K.image_dim_ordering() == 'th':
x = Permute((3, 1, 2))(x)
x = Reshape((15, 128))(x)
# GRU block 1, 2, output
x = GRU(32, return_sequences=True, name='gru1')(x)
x = GRU(32, return_sequences=False, name='gru2')(x)
x = Dense(3, activation='sigmoid', name='output')(x)
# Create model
model = Model(melgram_input, x)
return model
发生这种情况是因为您没有使用 categorical_crossentropy
损失所需的单热编码标签。
您可以使用 keras.utils.np_utils.to_categorical
转换您的标签,它应该有效。