我需要一些帮助来使用 Spyder 使用 Celeb_a 数据集设置 Keras-Tuner

I need some help setting up Keras-Tuner with Celeb_a dataset using Spyder

我正在尝试想出一种方法来使用 Keras-Tuner 自动识别我的 CNN 的最佳参数。我正在使用 Celeb_a 数据集

我尝试了一个类似的项目,其中我使用了 fashion_mnist 并且效果很好,但是我使用 python 的经验不足以实现我想要实现的目标。当我尝试使用 fashion_mnist 时,我设法创建了这个 table 的结果

我的密码是here.

我希望使用 Celeb_a 数据集生成类似的 table。这是我为大学所做的报告。在报告中,我的大学使用 AWS Rekognition 生成了下面的 table。

我希望能够训练数据,这样我就可以将这个模型保存到 pickle 中并生成类似的 table 结果来比较它们。

关于如何处理这个问题有什么建议吗?我目前的查询是:

  1. 如何正确加载数据集?
  2. 我如何训练模型以在“小胡子”、“胡须”、“情感”(如上面 table 的结果)上提供准确度

我尝试使用以下方式加载数据:

(x_train, y_train), (x_test, y_test) = tfds.load('celeb_a')

但这给了我以下错误

AttributeError: Failed to construct dataset celeb_a: module 'tensorflow_datasets.core.utils' has no attribute 'version'

我正在使用:

Conda: TensorFlow (Python 3.8.5)
Windows 10 Pro
Intel(R) Core(TM) i3-4170 CPU @ 3.7GHz
64-bit

这是我用来启动的脚本,与我的 bitbucket 中的脚本相同,如有任何帮助,我们将不胜感激。 提前谢谢你。

# -*- coding: utf-8 -*-
import tensorflow_datasets as tfds
#from tensorflow.keras.datasets import fashion_mnist
#import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.layers import  Conv2D, MaxPooling2D, Dense, Flatten, Activation

from kerastuner.tuners import RandomSearch
#from kerastuner.engine.hyperparameters import HyperParameter
import time
import os

LOG_DIR = f"{int(time.time())}"


(x_train, y_train), (x_test, y_test) = tfds.load('celeb_a')

x_train = x_train.reshape(-1,28,28,1)
x_test = x_test.reshape(-1,28,28,1)

def build_model(hp):   #random search passes this hyperparameter() object
    model = keras.models.Sequential()
    
    
    #model.add(Conv2D(32, (3, 3), input_shape=x_train.shape[1:]))
    model.add(Conv2D(hp.Int("input_units", min_value=32, max_value=256, step=32), (3,3), input_shape = x_train.shape[1:]))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    
    for i in range(hp.Int("n_layers",min_value = 1, max_value = 4, step=1)):
        #model.add(Conv2D(32, (3, 3)))                
        model.add(Conv2D(hp.Int(f"conv_{i}_units", min_value=32, max_value=256, step=32), (3,3)))
        model.add(Activation('relu'))
        #model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    
    model.add(Dense(10))
    model.add(Activation("softmax"))
    
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

tuner = RandomSearch(build_model,
                     objective = "val_accuracy",
                     max_trials = 1,
                     executions_per_trial=1, #BEST PERFOMANCE SET TO 3+
                     directory= os.path.normpath('C:/'),# there is a limit of characters keep path short
                     overwrite=True #need this to override model when testing
                     )

tuner.search(x=x_train,
             y=y_train, 
             epochs=1,
             batch_size=64,
             validation_data=(x_test,y_test),)

我设法通过创建一个函数来收集所有注释来做到这一点:

def get_annotation(fnmtxt, verbose=True):
if verbose:
    print("_" * 70)
    print(fnmtxt)

rfile = open(fnmtxt, 'r')
texts = rfile.read().split("\n")
rfile.close()

columns = np.array(texts[1].split(" "))
columns = columns[columns != ""]
df = []
for txt in texts[2:]:
    txt = np.array(txt.split(" "))
    txt = txt[txt != ""]

    df.append(txt)

df = pd.DataFrame(df)

if df.shape[1] == len(columns) + 1:
    columns = ["image_id"] + list(columns)
df.columns = columns
df = df.dropna()
if verbose:
    print(" Total number of annotations {}\n".format(df.shape))
    print(df.head())
## cast to integer
for nm in df.columns:
    if nm != "image_id":
        df[nm] = pd.to_numeric(df[nm], downcast="float")
return (df)

我还创建了一个 class 来完成剩下的工作:

class CelebA():
'''Wraps the celebA dataset, allowing an easy way to:
     - Select the features of interest,
     - Split the dataset into 'training', 'test' or 'validation' partition.
'''

def __init__(self, main_folder='data/', selected_features=None, drop_features=[]):
    self.main_folder = main_folder
    self.images_folder = os.path.join(main_folder, 'img_align_celeba/')
    self.attributes_path = os.path.join(main_folder, 'list_attr_celeba.txt')
    self.partition_path = os.path.join(main_folder, 'list_eval_partition.txt')
    self.selected_features = selected_features
    self.features_name = []
    self.__prepare(drop_features)

def __prepare(self, drop_features):
    '''do some preprocessing before using the data: e.g. feature selection'''
    # attributes:
    if self.selected_features is None:
        self.attributes = get_annotation(self.attributes_path)
        self.num_features = 40
    else:
        self.num_features = len(self.selected_features)
        self.selected_features = self.selected_features.copy()
        self.selected_features.append('image_id')
        self.attributes = get_annotation(self.attributes_path)[self.selected_features]

    # remove unwanted features:
    for feature in drop_features:
        if feature in self.attributes:
            self.attributes = self.attributes.drop(feature, axis=1)
            self.num_features -= 1

    self.attributes.set_index('image_id', inplace=True)
    self.attributes.replace(to_replace=-1, value=0, inplace=True)
    self.attributes['image_id'] = list(self.attributes.index)
    # self.attributes.drop(self.attributes.columns[-1], axis=1, inplace=True)

    self.features_name = list(self.attributes.columns)[:-1]

    # load ideal partitioning:
    self.partition = pd.read_csv(self.partition_path, sep=" ")
    self.partition.set_index('image_id', inplace=True)

def split(self, name='0', drop_zero=False):
    '''Returns the [0 'training', 1 'validation', 2 'test'] split of the dataset'''
    # select partition split:
    if name is '0':
        to_drop = self.partition.where(lambda x: x != 0).dropna()
    elif name is '1':
        to_drop = self.partition.where(lambda x: x != 1).dropna()
    elif name is '2':  # test
        to_drop = self.partition.where(lambda x: x != 2).dropna()
    else:
        raise ValueError('CelebA.split() => `name` must be one of [0-training, 1-validation, 2-test]')

    partition = self.partition.drop(index=to_drop.index)

    # join attributes with selected partition:
    joint = partition.join(self.attributes, how='inner').drop('partition', axis=1)

    if drop_zero is True:
        # select rows with all zeros values
        return joint.loc[(joint[self.features_name] == 1).any(axis=1)]
    elif 0 <= drop_zero <= 1:
        zero = joint.loc[(joint[self.features_name] == 0).all(axis=1)]
        zero = zero.sample(frac=drop_zero)
        return joint.drop(index=zero.index)

    return joint