我需要一些帮助来使用 Spyder 使用 Celeb_a 数据集设置 Keras-Tuner
I need some help setting up Keras-Tuner with Celeb_a dataset using Spyder
我正在尝试想出一种方法来使用 Keras-Tuner 自动识别我的 CNN 的最佳参数。我正在使用 Celeb_a 数据集
我尝试了一个类似的项目,其中我使用了 fashion_mnist 并且效果很好,但是我使用 python 的经验不足以实现我想要实现的目标。当我尝试使用 fashion_mnist 时,我设法创建了这个 table 的结果
我的密码是here.
我希望使用 Celeb_a 数据集生成类似的 table。这是我为大学所做的报告。在报告中,我的大学使用 AWS Rekognition 生成了下面的 table。
我希望能够训练数据,这样我就可以将这个模型保存到 pickle 中并生成类似的 table 结果来比较它们。
关于如何处理这个问题有什么建议吗?我目前的查询是:
- 如何正确加载数据集?
- 我如何训练模型以在“小胡子”、“胡须”、“情感”(如上面 table 的结果)上提供准确度
我尝试使用以下方式加载数据:
(x_train, y_train), (x_test, y_test) = tfds.load('celeb_a')
但这给了我以下错误
AttributeError: Failed to construct dataset celeb_a: module 'tensorflow_datasets.core.utils' has no attribute 'version'
我正在使用:
Conda: TensorFlow (Python 3.8.5)
Windows 10 Pro
Intel(R) Core(TM) i3-4170 CPU @ 3.7GHz
64-bit
这是我用来启动的脚本,与我的 bitbucket 中的脚本相同,如有任何帮助,我们将不胜感激。
提前谢谢你。
# -*- coding: utf-8 -*-
import tensorflow_datasets as tfds
#from tensorflow.keras.datasets import fashion_mnist
#import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Activation
from kerastuner.tuners import RandomSearch
#from kerastuner.engine.hyperparameters import HyperParameter
import time
import os
LOG_DIR = f"{int(time.time())}"
(x_train, y_train), (x_test, y_test) = tfds.load('celeb_a')
x_train = x_train.reshape(-1,28,28,1)
x_test = x_test.reshape(-1,28,28,1)
def build_model(hp): #random search passes this hyperparameter() object
model = keras.models.Sequential()
#model.add(Conv2D(32, (3, 3), input_shape=x_train.shape[1:]))
model.add(Conv2D(hp.Int("input_units", min_value=32, max_value=256, step=32), (3,3), input_shape = x_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
for i in range(hp.Int("n_layers",min_value = 1, max_value = 4, step=1)):
#model.add(Conv2D(32, (3, 3)))
model.add(Conv2D(hp.Int(f"conv_{i}_units", min_value=32, max_value=256, step=32), (3,3)))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(10))
model.add(Activation("softmax"))
model.compile(optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
return model
tuner = RandomSearch(build_model,
objective = "val_accuracy",
max_trials = 1,
executions_per_trial=1, #BEST PERFOMANCE SET TO 3+
directory= os.path.normpath('C:/'),# there is a limit of characters keep path short
overwrite=True #need this to override model when testing
)
tuner.search(x=x_train,
y=y_train,
epochs=1,
batch_size=64,
validation_data=(x_test,y_test),)
我设法通过创建一个函数来收集所有注释来做到这一点:
def get_annotation(fnmtxt, verbose=True):
if verbose:
print("_" * 70)
print(fnmtxt)
rfile = open(fnmtxt, 'r')
texts = rfile.read().split("\n")
rfile.close()
columns = np.array(texts[1].split(" "))
columns = columns[columns != ""]
df = []
for txt in texts[2:]:
txt = np.array(txt.split(" "))
txt = txt[txt != ""]
df.append(txt)
df = pd.DataFrame(df)
if df.shape[1] == len(columns) + 1:
columns = ["image_id"] + list(columns)
df.columns = columns
df = df.dropna()
if verbose:
print(" Total number of annotations {}\n".format(df.shape))
print(df.head())
## cast to integer
for nm in df.columns:
if nm != "image_id":
df[nm] = pd.to_numeric(df[nm], downcast="float")
return (df)
我还创建了一个 class 来完成剩下的工作:
class CelebA():
'''Wraps the celebA dataset, allowing an easy way to:
- Select the features of interest,
- Split the dataset into 'training', 'test' or 'validation' partition.
'''
def __init__(self, main_folder='data/', selected_features=None, drop_features=[]):
self.main_folder = main_folder
self.images_folder = os.path.join(main_folder, 'img_align_celeba/')
self.attributes_path = os.path.join(main_folder, 'list_attr_celeba.txt')
self.partition_path = os.path.join(main_folder, 'list_eval_partition.txt')
self.selected_features = selected_features
self.features_name = []
self.__prepare(drop_features)
def __prepare(self, drop_features):
'''do some preprocessing before using the data: e.g. feature selection'''
# attributes:
if self.selected_features is None:
self.attributes = get_annotation(self.attributes_path)
self.num_features = 40
else:
self.num_features = len(self.selected_features)
self.selected_features = self.selected_features.copy()
self.selected_features.append('image_id')
self.attributes = get_annotation(self.attributes_path)[self.selected_features]
# remove unwanted features:
for feature in drop_features:
if feature in self.attributes:
self.attributes = self.attributes.drop(feature, axis=1)
self.num_features -= 1
self.attributes.set_index('image_id', inplace=True)
self.attributes.replace(to_replace=-1, value=0, inplace=True)
self.attributes['image_id'] = list(self.attributes.index)
# self.attributes.drop(self.attributes.columns[-1], axis=1, inplace=True)
self.features_name = list(self.attributes.columns)[:-1]
# load ideal partitioning:
self.partition = pd.read_csv(self.partition_path, sep=" ")
self.partition.set_index('image_id', inplace=True)
def split(self, name='0', drop_zero=False):
'''Returns the [0 'training', 1 'validation', 2 'test'] split of the dataset'''
# select partition split:
if name is '0':
to_drop = self.partition.where(lambda x: x != 0).dropna()
elif name is '1':
to_drop = self.partition.where(lambda x: x != 1).dropna()
elif name is '2': # test
to_drop = self.partition.where(lambda x: x != 2).dropna()
else:
raise ValueError('CelebA.split() => `name` must be one of [0-training, 1-validation, 2-test]')
partition = self.partition.drop(index=to_drop.index)
# join attributes with selected partition:
joint = partition.join(self.attributes, how='inner').drop('partition', axis=1)
if drop_zero is True:
# select rows with all zeros values
return joint.loc[(joint[self.features_name] == 1).any(axis=1)]
elif 0 <= drop_zero <= 1:
zero = joint.loc[(joint[self.features_name] == 0).all(axis=1)]
zero = zero.sample(frac=drop_zero)
return joint.drop(index=zero.index)
return joint
我正在尝试想出一种方法来使用 Keras-Tuner 自动识别我的 CNN 的最佳参数。我正在使用 Celeb_a 数据集
我尝试了一个类似的项目,其中我使用了 fashion_mnist 并且效果很好,但是我使用 python 的经验不足以实现我想要实现的目标。当我尝试使用 fashion_mnist 时,我设法创建了这个 table 的结果
我的密码是here.
我希望使用 Celeb_a 数据集生成类似的 table。这是我为大学所做的报告。在报告中,我的大学使用 AWS Rekognition 生成了下面的 table。
我希望能够训练数据,这样我就可以将这个模型保存到 pickle 中并生成类似的 table 结果来比较它们。
关于如何处理这个问题有什么建议吗?我目前的查询是:
- 如何正确加载数据集?
- 我如何训练模型以在“小胡子”、“胡须”、“情感”(如上面 table 的结果)上提供准确度
我尝试使用以下方式加载数据:
(x_train, y_train), (x_test, y_test) = tfds.load('celeb_a')
但这给了我以下错误
AttributeError: Failed to construct dataset celeb_a: module 'tensorflow_datasets.core.utils' has no attribute 'version'
我正在使用:
Conda: TensorFlow (Python 3.8.5)
Windows 10 Pro
Intel(R) Core(TM) i3-4170 CPU @ 3.7GHz
64-bit
这是我用来启动的脚本,与我的 bitbucket 中的脚本相同,如有任何帮助,我们将不胜感激。 提前谢谢你。
# -*- coding: utf-8 -*-
import tensorflow_datasets as tfds
#from tensorflow.keras.datasets import fashion_mnist
#import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Activation
from kerastuner.tuners import RandomSearch
#from kerastuner.engine.hyperparameters import HyperParameter
import time
import os
LOG_DIR = f"{int(time.time())}"
(x_train, y_train), (x_test, y_test) = tfds.load('celeb_a')
x_train = x_train.reshape(-1,28,28,1)
x_test = x_test.reshape(-1,28,28,1)
def build_model(hp): #random search passes this hyperparameter() object
model = keras.models.Sequential()
#model.add(Conv2D(32, (3, 3), input_shape=x_train.shape[1:]))
model.add(Conv2D(hp.Int("input_units", min_value=32, max_value=256, step=32), (3,3), input_shape = x_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
for i in range(hp.Int("n_layers",min_value = 1, max_value = 4, step=1)):
#model.add(Conv2D(32, (3, 3)))
model.add(Conv2D(hp.Int(f"conv_{i}_units", min_value=32, max_value=256, step=32), (3,3)))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(10))
model.add(Activation("softmax"))
model.compile(optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
return model
tuner = RandomSearch(build_model,
objective = "val_accuracy",
max_trials = 1,
executions_per_trial=1, #BEST PERFOMANCE SET TO 3+
directory= os.path.normpath('C:/'),# there is a limit of characters keep path short
overwrite=True #need this to override model when testing
)
tuner.search(x=x_train,
y=y_train,
epochs=1,
batch_size=64,
validation_data=(x_test,y_test),)
我设法通过创建一个函数来收集所有注释来做到这一点:
def get_annotation(fnmtxt, verbose=True):
if verbose:
print("_" * 70)
print(fnmtxt)
rfile = open(fnmtxt, 'r')
texts = rfile.read().split("\n")
rfile.close()
columns = np.array(texts[1].split(" "))
columns = columns[columns != ""]
df = []
for txt in texts[2:]:
txt = np.array(txt.split(" "))
txt = txt[txt != ""]
df.append(txt)
df = pd.DataFrame(df)
if df.shape[1] == len(columns) + 1:
columns = ["image_id"] + list(columns)
df.columns = columns
df = df.dropna()
if verbose:
print(" Total number of annotations {}\n".format(df.shape))
print(df.head())
## cast to integer
for nm in df.columns:
if nm != "image_id":
df[nm] = pd.to_numeric(df[nm], downcast="float")
return (df)
我还创建了一个 class 来完成剩下的工作:
class CelebA():
'''Wraps the celebA dataset, allowing an easy way to:
- Select the features of interest,
- Split the dataset into 'training', 'test' or 'validation' partition.
'''
def __init__(self, main_folder='data/', selected_features=None, drop_features=[]):
self.main_folder = main_folder
self.images_folder = os.path.join(main_folder, 'img_align_celeba/')
self.attributes_path = os.path.join(main_folder, 'list_attr_celeba.txt')
self.partition_path = os.path.join(main_folder, 'list_eval_partition.txt')
self.selected_features = selected_features
self.features_name = []
self.__prepare(drop_features)
def __prepare(self, drop_features):
'''do some preprocessing before using the data: e.g. feature selection'''
# attributes:
if self.selected_features is None:
self.attributes = get_annotation(self.attributes_path)
self.num_features = 40
else:
self.num_features = len(self.selected_features)
self.selected_features = self.selected_features.copy()
self.selected_features.append('image_id')
self.attributes = get_annotation(self.attributes_path)[self.selected_features]
# remove unwanted features:
for feature in drop_features:
if feature in self.attributes:
self.attributes = self.attributes.drop(feature, axis=1)
self.num_features -= 1
self.attributes.set_index('image_id', inplace=True)
self.attributes.replace(to_replace=-1, value=0, inplace=True)
self.attributes['image_id'] = list(self.attributes.index)
# self.attributes.drop(self.attributes.columns[-1], axis=1, inplace=True)
self.features_name = list(self.attributes.columns)[:-1]
# load ideal partitioning:
self.partition = pd.read_csv(self.partition_path, sep=" ")
self.partition.set_index('image_id', inplace=True)
def split(self, name='0', drop_zero=False):
'''Returns the [0 'training', 1 'validation', 2 'test'] split of the dataset'''
# select partition split:
if name is '0':
to_drop = self.partition.where(lambda x: x != 0).dropna()
elif name is '1':
to_drop = self.partition.where(lambda x: x != 1).dropna()
elif name is '2': # test
to_drop = self.partition.where(lambda x: x != 2).dropna()
else:
raise ValueError('CelebA.split() => `name` must be one of [0-training, 1-validation, 2-test]')
partition = self.partition.drop(index=to_drop.index)
# join attributes with selected partition:
joint = partition.join(self.attributes, how='inner').drop('partition', axis=1)
if drop_zero is True:
# select rows with all zeros values
return joint.loc[(joint[self.features_name] == 1).any(axis=1)]
elif 0 <= drop_zero <= 1:
zero = joint.loc[(joint[self.features_name] == 0).all(axis=1)]
zero = zero.sample(frac=drop_zero)
return joint.drop(index=zero.index)
return joint