IndexError: boolean index vs indexed array mismatch in numpy ndarray
IndexError: boolean index vs indexed array mismatch in numpy ndarray
我正在处理一个多class 问题(5-classes,高度不平衡的数据集)。我想实现一个卷积自动编码器的集成,其中每个自动编码器都在单个 class 上进行训练,然后集成以获得最终的 class 化结果。
然而,我被困在一个点上,要根据 class 训练每个编码器。我收到的错误我认为与我处理 class 标签数组的逻辑有关:
IndexError: boolean index did not match indexed array along dimension 1; dimension is 1 but corresponding boolean dimension is 5
我正在处理非常庞大的数据集,但我为 3-class 问题提供了一个 MWE 以重现以下类似情况:
#..scikitlearn, keras, numpy ....libraries import
class SingleAED:
def __init__(self, train, test):
self.x_train = train
self.x_test = test
def setSingleModel(self):
autoencoder = Sequential()
activ = 'relu'
autoencoder.add(Conv2D(32, (1, 3), strides=(1, 1), padding='same', activation=activ, input_shape=(1, Threshold, 4)))
autoencoder.add(BatchNormalization(axis = 3))
autoencoder.add(Conv2D(32, (1, 3), strides=(1, 1), padding='same', activation=activ ))
autoencoder.add(BatchNormalization(axis = 3))
autoencoder.add(MaxPooling2D(pool_size=(1, 2) ))
autoencoder.compile(optimizer='adam', loss='mae', metrics=['mean_squared_error'])
filepath = "weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
autoencoder.fit(self.x_train, self.x_train, epochs=250, batch_size=256, shuffle=True,callbacks=callbacks_list)
return autoencoder
#generate dummy data
X = np.random.randn(20, 1, 5, 4)
a,b,c = np.repeat(0, 7), np.repeat(1, 7), np.repeat(2, 6)
y = np.hstack((a,b,c))
LABELS= list(set(np.ndarray.flatten(y)))
Threshold = len(X[0, 0, :, 0])
NoClass = len(LABELS)
#train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y,
test_size=0.20, random_state=7)
#...to categorical
y_train = keras.utils.to_categorical(y_train, num_classes=NoClass)
y_test = keras.utils.to_categorical(y_test, num_classes=NoClass)
#train an auto-encoder per class
ensemble = []
for i in range(len(LABELS)):
print(LABELS[i])
sub_train = x_train[y_train == i]
sub_test = x_test[y_test == i]
autoencoder = SingleAED(sub_train, sub_test)
autoencoder = autoencoder.setSingleModel()
ensemble.append(autoencoder)
错误:
0
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-98-e00f5454d8b5> in <module>()
2 for i in range(len(LABELS)):
3 print(LABELS[i])
----> 4 sub_train = x_train[y_train == i]
5 sub_test = x_test[y_test == i]
6
IndexError: boolean index did not match indexed array along dimension 1; dimension is 1 but corresponding boolean dimension is 3
在这种情况下,我想遍历 classes 0..2 以根据 class 训练一个编码器。我不确定为什么会出现此错误,有人可以帮助解决这个问题吗?
您想在 x_train
数组 y_train
转换为分类之前索引 x_train
数组。
x_train, x_test, y_train, y_test = train_test_split(X, y,
test_size=0.20,
random_state=7)
# EDIT: DO NOT OVERRIDE!
y_train_cat = keras.utils.to_categorical(y_train, num_classes=NoClass)
y_test_cat = keras.utils.to_categorical(y_test, num_classes=NoClass)
#train an auto-encoder per class
ensemble = []
for i in range(len(LABELS)):
print(LABELS[I])
# EDIT: USE NON-CATEGORICAL
sub_train = x_train[y_train == i]
sub_test = x_test[y_test == i]
autoencoder = SingleAED(sub_train, sub_test)
autoencoder = autoencoder.setSingleModel()
ensemble.append(autoencoder)
我正在处理一个多class 问题(5-classes,高度不平衡的数据集)。我想实现一个卷积自动编码器的集成,其中每个自动编码器都在单个 class 上进行训练,然后集成以获得最终的 class 化结果。
然而,我被困在一个点上,要根据 class 训练每个编码器。我收到的错误我认为与我处理 class 标签数组的逻辑有关:
IndexError: boolean index did not match indexed array along dimension 1; dimension is 1 but corresponding boolean dimension is 5
我正在处理非常庞大的数据集,但我为 3-class 问题提供了一个 MWE 以重现以下类似情况:
#..scikitlearn, keras, numpy ....libraries import
class SingleAED:
def __init__(self, train, test):
self.x_train = train
self.x_test = test
def setSingleModel(self):
autoencoder = Sequential()
activ = 'relu'
autoencoder.add(Conv2D(32, (1, 3), strides=(1, 1), padding='same', activation=activ, input_shape=(1, Threshold, 4)))
autoencoder.add(BatchNormalization(axis = 3))
autoencoder.add(Conv2D(32, (1, 3), strides=(1, 1), padding='same', activation=activ ))
autoencoder.add(BatchNormalization(axis = 3))
autoencoder.add(MaxPooling2D(pool_size=(1, 2) ))
autoencoder.compile(optimizer='adam', loss='mae', metrics=['mean_squared_error'])
filepath = "weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
autoencoder.fit(self.x_train, self.x_train, epochs=250, batch_size=256, shuffle=True,callbacks=callbacks_list)
return autoencoder
#generate dummy data
X = np.random.randn(20, 1, 5, 4)
a,b,c = np.repeat(0, 7), np.repeat(1, 7), np.repeat(2, 6)
y = np.hstack((a,b,c))
LABELS= list(set(np.ndarray.flatten(y)))
Threshold = len(X[0, 0, :, 0])
NoClass = len(LABELS)
#train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y,
test_size=0.20, random_state=7)
#...to categorical
y_train = keras.utils.to_categorical(y_train, num_classes=NoClass)
y_test = keras.utils.to_categorical(y_test, num_classes=NoClass)
#train an auto-encoder per class
ensemble = []
for i in range(len(LABELS)):
print(LABELS[i])
sub_train = x_train[y_train == i]
sub_test = x_test[y_test == i]
autoencoder = SingleAED(sub_train, sub_test)
autoencoder = autoencoder.setSingleModel()
ensemble.append(autoencoder)
错误:
0
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-98-e00f5454d8b5> in <module>()
2 for i in range(len(LABELS)):
3 print(LABELS[i])
----> 4 sub_train = x_train[y_train == i]
5 sub_test = x_test[y_test == i]
6
IndexError: boolean index did not match indexed array along dimension 1; dimension is 1 but corresponding boolean dimension is 3
在这种情况下,我想遍历 classes 0..2 以根据 class 训练一个编码器。我不确定为什么会出现此错误,有人可以帮助解决这个问题吗?
您想在 x_train
数组 y_train
转换为分类之前索引 x_train
数组。
x_train, x_test, y_train, y_test = train_test_split(X, y,
test_size=0.20,
random_state=7)
# EDIT: DO NOT OVERRIDE!
y_train_cat = keras.utils.to_categorical(y_train, num_classes=NoClass)
y_test_cat = keras.utils.to_categorical(y_test, num_classes=NoClass)
#train an auto-encoder per class
ensemble = []
for i in range(len(LABELS)):
print(LABELS[I])
# EDIT: USE NON-CATEGORICAL
sub_train = x_train[y_train == i]
sub_test = x_test[y_test == i]
autoencoder = SingleAED(sub_train, sub_test)
autoencoder = autoencoder.setSingleModel()
ensemble.append(autoencoder)