GAN结果图像在训练过程中是一样的

GAN result images are the same during the training process

我正在尝试在 MNIST 数据集上训练 GAN。代码现在以混合结果进行训练。问题似乎是生成的图像实际上都是一样的:

您可以在下面找到我的完整代码。我试着环顾四周,看看是否有解决方案,我发现唯一提到的是使用 randn 而不是 rand,但我现在没有使用 rand

import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt
import random
from sklearn.utils import shuffle

# Change this to the location of the database directories
DB_DIR = os.path.dirname(os.path.realpath(__file__))

# Import databases
sys.path.insert(1, DB_DIR)
from db_utils import get_imdb_dataset, get_speech_dataset, get_single_digit_dataset

def choose_dataset(dataset_type):
    """Select dataset based on string variable."""
    if dataset_type == "nlp":
        return get_imdb_dataset(dir=DB_DIR)
    elif dataset_type == "computer_vision":
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
    elif dataset_type == "speech_recognition":
        # (X_train, y_train), (X_test, y_test), (_, _) = get_speech_dataset()
        (X_train, y_train), (X_test, y_test), (_, _) = get_single_digit_dataset(0)        

    else:
        raise ValueError("Couldn't find dataset.")

    (X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)

    (X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)

    return (X_train, y_train), (X_test, y_test)

def normalize_dataset(string, X_train, X_test):
    """Normalize speech recognition and computer vision datasets."""
    if string == "computer vision":
        X_train = X_train / 255
        X_test = X_test / 255
    else:
        mean = np.mean(X_train)
        std = np.std(X_train)
        X_train = (X_train-std)/mean
        X_test = (X_test-std)/mean

    return (X_train, X_test)

def reshape_dataset(X_train, y_train, X_test, y_test):
    """Reshape Computer Vision and Speech datasets."""

    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    return (X_train, y_train), (X_test, y_test)


class GAN:
    """Generative Adversarial Network for digit generation (MNIST)."""
    def __init__(self, input_shape=(28,28,1), rand_vector_shape=(100,), lr=0.0002, beta=0.5):
        
        # Input sizes
        self.img_shape = input_shape
        self.input_size = rand_vector_shape
        
        # optimizer
        self.opt = tf.keras.optimizers.Adam(lr, beta)

        # Create Generator model
        self.generator = self.generator_model()
        self.generator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
        
        # Create Discriminator model
        self.discriminator = self.discriminator_model()
        self.discriminator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
        
        # Set the Discriminator as non trainable in the combined GAN model
        self.discriminator.trainable = False
        
        # Define model input and output
        input = tf.keras.Input(self.input_size)
        generated_img = self.generator(input)
        output = self.discriminator(generated_img)
        
        # Define and compile combined GAN model
        self.GAN = tf.keras.Model(input, output, name="GAN")
        self.GAN.compile(loss='binary_crossentropy', optimizer = self.opt, metrics=['accuracy'])

        return None
        
    def discriminator_model(self):
        """Create discriminator model."""
        model = tf.keras.models.Sequential(name='Discriminator')
        model.add(layers.Flatten())
        model.add(layers.Dense(units=512, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=256, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))

        return model

    def generator_model(self):
        """Create generator model."""
        model = tf.keras.models.Sequential(name='Generator')
        model.add(layers.Dense(units=256, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=512, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=1024, kernel_initializer='normal', activation='relu'))
        model.add(layers.Dense(units=np.prod(self.img_shape), kernel_initializer='normal', activation='relu'))
        model.add(layers.Reshape((28,28)))
        
        return model
    
    def plot_imgs(self, epoch):
        r,c = 4,4

        fig, axs = plt.subplots(r, c)
        count = 0
        for i in range(r):
            for j in range(c):
                    noise = np.random.normal(0, 1, (1, self.input_size[0]))
                    img = self.generator.predict(noise)[0, :]
                    axs[i,j].imshow(img, cmap='gray')
                    axs[i,j].axis('off')
                    count += 1

        fig.savefig("img/img_epoch_{0}.png".format(epoch))
        plt.title("Epoch " + str(epoch))
        # plt.show()
        return None

    def train(self, X_train, batch_size=128, epochs=2000, save_interval=200):
        half_batch = batch_size//2
        y_pos_train_dis = np.ones((half_batch, 1))
        y_neg_train_dis = np.zeros((half_batch, 1))
        y_train_GAN = np.ones((batch_size, 1))
        
        for epoch in range(epochs):
            # Generate training data for Discriminator

            #   random half_batch amount of real images
            X_pos_train_dis = X_train[np.random.randint(0, X_train.shape[0], half_batch)]
            
            #   random half_batch amount of generated fake images
            X_neg_train_dis = self.generator.predict(np.random.normal(0, 1, (half_batch, self.input_size[0])))

            #   Shuffle and append data using sklearn shuffle function
            # X_train_dis, y_train_dis = tf.concat(shuffle(X_neg_train_dis, X_pos_train_dis, random_state=0), axis=0), tf.concat(shuffle(y_neg_train_dis, y_pos_train_dis, random_state=0), axis=0)
            X_train_dis, y_train_dis = tf.random.shuffle(tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0)), tf.random.shuffle(tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0))

            # Generate training data for combined GAN model
            X_train_GAN = np.random.normal(0, 1, (batch_size, self.input_size[0]))
            
            # Train Discriminator
            loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)
            
            # Train Generator
            loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)

            # Print results
            if epoch%save_interval == 0:
                print("Discriminator loss: {0}, Generator loss: {1}".format(loss_dis[0], loss_gen[0]))
                print("Discriminator acc.: {0}, Generator acc.: {1}".format(loss_dis[1], loss_gen[1]))
                self.plot_imgs(epoch)
                
        return 0

def main():

    gan_model = GAN()
    (X_train, _), (_, _) = choose_dataset("computer_vision")
   
    # Switch from grayscale to (-1,1)
    # X_train = X_train/127.5 - 1.0

    gan_model.train(X_train)

if __name__ == '__main__':
    main()

以下是可能提供问题线索的准确度和损失:

Discriminator loss: 1.0392613410949707, Generator loss: 0.7247573137283325
Discriminator acc.: 0.5078125, Generator acc.: 0.125
Discriminator loss: 0.7155331969261169, Generator loss: 0.7227296829223633
Discriminator acc.: 0.484375, Generator acc.: 0.0
Discriminator loss: 0.7079681158065796, Generator loss: 0.6722699403762817
Discriminator acc.: 0.4609375, Generator acc.: 1.0
Discriminator loss: 0.6883177757263184, Generator loss: 0.7037044763565063
Discriminator acc.: 0.5390625, Generator acc.: 0.0
Discriminator loss: 0.7039847373962402, Generator loss: 0.6718121767044067
Discriminator acc.: 0.453125, Generator acc.: 1.0
Discriminator loss: 0.7004268169403076, Generator loss: 0.6409173607826233
Discriminator acc.: 0.4765625, Generator acc.: 1.0
Discriminator loss: 0.6883779168128967, Generator loss: 0.7788660526275635
Discriminator acc.: 0.5390625, Generator acc.: 0.0
Discriminator loss: 0.6933140754699707, Generator loss: 0.6169038414955139
Discriminator acc.: 0.53125, Generator acc.: 1.0
Discriminator loss: 0.6910691261291504, Generator loss: 0.6194907426834106
Discriminator acc.: 0.5625, Generator acc.: 1.0
Discriminator loss: 0.692711353302002, Generator loss: 0.6367968320846558
Discriminator acc.: 0.5078125, Generator acc.: 1.0

我认为如果您简单地更改模型中的激活函数并添加一些 dropout 层,您会得到更好的结果:

def discriminator_model(self):
  """Create discriminator model."""
  model = tf.keras.models.Sequential(name='Discriminator')
  model.add(layers.Flatten())
  model.add(layers.Dense(units=1024, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dropout(0.3))
  model.add(layers.Dense(units=512, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dropout(0.3))
  model.add(layers.Dense(units=256, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dropout(0.3))
  model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))

  return model

def generator_model(self):
  """Create generator model."""
  model = tf.keras.models.Sequential(name='Generator')
  model.add(layers.Dense(units=256, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dense(units=512, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dense(units=1024, kernel_initializer='normal'))
  model.add(layers.LeakyReLU(alpha=0.02))
  model.add(layers.Dense(units=np.prod(self.img_shape), 
  kernel_initializer='normal', activation='tanh'))
  model.add(layers.Reshape((28,28)))
        
  return model

同时删除 tf.random.shuffle,因为它会让您的生成器很难学习任何合理的东西:

X_train_dis, y_train_dis = tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0), tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0)

结果还可以,但是

使用 CNN 网络会更好

更新,确保你的鉴别器相应地设置为trainable=False

self.discriminator.trainable = True

loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)

self.discriminator.trainable = False
            # Train Generator
loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)

这里是整个模型:

import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt
import random
from sklearn.utils import shuffle


def choose_dataset(dataset_type):
    """Select dataset based on string variable."""

    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    (X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)

    (X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)

    return (X_train, y_train), (X_test, y_test)

def normalize_dataset(string, X_train, X_test):
    """Normalize speech recognition and computer vision datasets."""
    if string == "computer vision":
        X_train = X_train / 255
        X_test = X_test / 255
    else:
        mean = np.mean(X_train)
        std = np.std(X_train)
        X_train = (X_train-std)/mean
        X_test = (X_test-std)/mean

    return (X_train, X_test)

def reshape_dataset(X_train, y_train, X_test, y_test):
    """Reshape Computer Vision and Speech datasets."""

    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    return (X_train, y_train), (X_test, y_test)


class GAN:
    """Generative Adversarial Network for digit generation (MNIST)."""
    def __init__(self, input_shape=(28,28,1), rand_vector_shape=(100,), lr=0.0002, beta=0.5):
        
        # Input sizes
        self.img_shape = input_shape
        self.input_size = rand_vector_shape
        
        # optimizer
        self.opt = tf.keras.optimizers.Adam(lr, beta)

        # Create Generator model
        self.generator = self.generator_model()
        self.generator.compile(loss='binary_crossentropy', optimizer = self.opt, metrics = ['accuracy'])
        
        # Create Discriminator model
        self.discriminator = self.discriminator_model()
        self.discriminator.compile(loss='binary_crossentropy', optimizer = tf.keras.optimizers.Adam(0.001, beta), metrics = ['accuracy'])
        
        # Set the Discriminator as non trainable in the combined GAN model
        self.discriminator.trainable = False
        
        # Define model input and output
        input = tf.keras.Input(self.input_size)
        generated_img = self.generator(input)
        output = self.discriminator(generated_img)
        
        # Define and compile combined GAN model
        self.GAN = tf.keras.Model(input, output, name="GAN")
        self.GAN.compile(loss='binary_crossentropy', optimizer = self.opt, metrics=['accuracy'])

        return None
        
    def discriminator_model(self):
        """Create discriminator model."""
        model = tf.keras.models.Sequential(name='Discriminator')
        model.add(layers.Flatten())
        model.add(layers.Dense(units=1024, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=512, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=256, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=1, kernel_initializer='normal', activation='sigmoid'))

        return model

    def generator_model(self):
        """Create generator model."""
        model = tf.keras.models.Sequential(name='Generator')
        model.add(layers.Dense(units=256, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=512, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=1024, kernel_initializer='normal'))
        model.add(layers.LeakyReLU(alpha=0.02))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(units=np.prod(self.img_shape), kernel_initializer='normal', activation='tanh'))
        model.add(layers.Reshape((28,28)))
        
        return model
    
    def plot_imgs(self, epoch):
        seed = tf.random.normal((16, 100))
        predictions = self.generator(seed, training=False)

        fig = plt.figure(figsize=(4, 4))
        for i in range(predictions.shape[0]):
            plt.subplot(4, 4, i+1)
            plt.imshow(predictions[i, :, :] * 127.5 + 127.5, cmap='gray')
            plt.axis('off')

        fig.savefig("img/img_epoch_{0}.png".format(epoch))
        plt.title("Epoch " + str(epoch))
        # plt.show()
        return None

    def train(self, X_train, batch_size=128, epochs=4000, save_interval=200):
        half_batch = batch_size//2
        y_pos_train_dis = tf.ones((half_batch, 1))
        y_neg_train_dis = tf.zeros((half_batch, 1))
        y_train_GAN = tf.ones((batch_size, 1))

        for epoch in range(epochs):
            # Generate training data for Discriminator

            #   random half_batch amount of real images
            X_pos_train_dis = X_train[np.random.randint(0, X_train.shape[0], half_batch)]
            
            #   random half_batch amount of generated fake images
            X_neg_train_dis = self.generator.predict(tf.random.normal((half_batch, self.input_size[0])))

            #   Shuffle and append data using sklearn shuffle function
            # X_train_dis, y_train_dis = tf.concat(shuffle(X_neg_train_dis, X_pos_train_dis, random_state=0), axis=0), tf.concat(shuffle(y_neg_train_dis, y_pos_train_dis, random_state=0), axis=0)
            X_train_dis, y_train_dis = tf.concat([X_neg_train_dis, X_pos_train_dis], axis=0), tf.concat([y_neg_train_dis, y_pos_train_dis], axis=0)

            # Generate training data for combined GAN model
            X_train_GAN = tf.random.normal((batch_size, self.input_size[0]))
            
            # Train Discriminator
            self.discriminator.trainable = True

            loss_dis = self.discriminator.train_on_batch(X_train_dis, y_train_dis)

            self.discriminator.trainable = False
            # Train Generator
            loss_gen = self.GAN.train_on_batch(X_train_GAN, y_train_GAN)

            # Print results
            if epoch%save_interval == 0:
                print("Discriminator loss: {0}, Generator loss: {1}".format(loss_dis[0], loss_gen[0]))
                print("Discriminator acc.: {0}, Generator acc.: {1}".format(loss_dis[1], loss_gen[1]))
                self.plot_imgs(epoch)
                
        return 0


gan_model = GAN()
(X_train, _), (_, _) = choose_dataset("computer_vision")

gan_model.train(X_train)