Single class classification using ResNet50 error: ValueError: logits and labels must have the same shape ((None, 7, 7, 2) vs (None, None))

Single class classification using ResNet50 error: ValueError: logits and labels must have the same shape ((None, 7, 7, 2) vs (None, None))

我正在尝试进行图像分类以区分猫和狗。我想通过训练 one-class neural network 来做到这一点。我已经在 VGG16 模型上尝试过这种方法,但现在我想使用 ResNet50,因为它更小,因此希望更快。对于 VGG16,以下方法有效,但不适用于 ResNet50。

import os
import random
import itertools
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D,MaxPool2D
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.optimizers import Adam
import keras
import random
import requests
import keras
from keras.models import Model
from keras.layers import Dense
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.applications.vgg16 import decode_predictions
from keras import applications
from keras.optimizers import RMSprop

import pickle

import cv2

base_path = "datafolder/"
SHAPE = (224,224,3)
batch_size = 10

def to_grayscale_then_rgb(image):
    image = tf.image.rgb_to_grayscale(image)
    image = tf.image.grayscale_to_rgb(image)
    return image

def wrap_generator(generator):
    while True:
       x,y = next(generator)
       y = tf.keras.utils.to_categorical(y)
       zeros = tf.zeros_like(y) + tf.constant([1.,0.])
       y = tf.concat([y,zeros], axis=0)
    
       yield x,y

def set_seed(seed):

    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)


def get_model(train=True):

    set_seed(33)

    pre_process = Lambda(tf.keras.applications.resnet50.preprocess_input)
    vgg = applications.ResNet50(weights = 'imagenet', include_top = True, input_shape = SHAPE)
    vgg = Model(vgg.input, vgg.layers[-3].output)
    vgg.trainable = False

    inp = Input(SHAPE)
    vgg_16_process = pre_process(GaussianNoise(0.1)(inp))
    vgg_out = vgg(vgg_16_process)

    noise = Lambda(tf.zeros_like)(vgg_out)
    noise = GaussianNoise(0.1)(noise)

    if train:
        x = Lambda(lambda z: tf.concat(z, axis=0))([vgg_out,noise])
        x = Activation('relu')(x)
    else:
        x = vgg_out
    
    x = Dense(512, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(2, activation='softmax')(x)
    model = Model(inp, out)
    model.compile(Adam(lr=1e-4), loss='binary_crossentropy')

    return model

### FLOW GENERATORS ###

train_generator = train_datagen.flow_from_directory(
            base_path + 'training_set/training_set/',
            target_size = (SHAPE[0], SHAPE[1]),
            batch_size = batch_size,
            class_mode = 'categorical',
            shuffle = True,
            seed = 33,
            classes = ['dogs']
    )

model = get_model()

model.summary()

model.fit(wrap_generator(train_generator), steps_per_epoch=train_generator.samples/train_generator.batch_size, epochs=30)

对于 VGG16 模型,此方法有效,但是在尝试实施 ResNet50 模型时出现以下错误:

ValueError: logits and labels must have the same shape ((None, 7, 7, 2) vs (None, None))

我怀疑这与神经网络的结构有关,如下所示:

Model: "model_57"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_59 (InputLayer)           [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
gaussian_noise_56 (GaussianNois (None, 224, 224, 3)  0           input_59[0][0]                   
__________________________________________________________________________________________________
lambda_98 (Lambda)              (None, 224, 224, 3)  0           gaussian_noise_56[0][0]          
__________________________________________________________________________________________________
model_56 (Functional)           (None, 7, 7, 2048)   23587712    lambda_98[0][0]                  
__________________________________________________________________________________________________
lambda_99 (Lambda)              (None, 7, 7, 2048)   0           model_56[0][0]                   
__________________________________________________________________________________________________
gaussian_noise_57 (GaussianNois (None, 7, 7, 2048)   0           lambda_99[0][0]                  
__________________________________________________________________________________________________
lambda_100 (Lambda)             (None, 7, 7, 2048)   0           model_56[0][0]                   
                                                                 gaussian_noise_57[0][0]          
__________________________________________________________________________________________________
activation_686 (Activation)     (None, 7, 7, 2048)   0           lambda_100[0][0]                 
__________________________________________________________________________________________________
dense_84 (Dense)                (None, 7, 7, 512)    1049088     activation_686[0][0]             
__________________________________________________________________________________________________
dense_85 (Dense)                (None, 7, 7, 128)    65664       dense_84[0][0]                   
__________________________________________________________________________________________________
dense_86 (Dense)                (None, 7, 7, 2)      258         dense_85[0][0]                   
==================================================================================================
Total params: 24,702,722
Trainable params: 1,115,010
Non-trainable params: 23,587,712

最后一层包含两个 7,而 VGG16 则没有。我尝试更改代码中的多项内容,但问题似乎并没有消失。我怀疑 get_model 函数有问题。是什么导致了这个问题?

简单地改变:

vgg = Model(vgg.input, vgg.layers[-3].output)

进入:

vgg = Model(vgg.input, vgg.layers[-2].output)

应该可以。

这是因为我们需要一个输出二维张量的层。使用VGG16时,[-3]位置的层是Flatten层所以没问题。使用RESNET50时,我们需要使用[-2]GlobalPooling)位置的层,才能得到2D输出