通过 Keras 加载自定义数据集
Loading Custom Dataset via Keras
我有一个简单的 GAN 模型(基于 Keras),用于基于 MNIST 数据集生成手写数字图像。我想使用来自 Sokoto Coventry 指纹数据集 (SOCOFing) 的原始图像数据为 Keras 创建一个类似的数据集,该数据集由 6000 个不同的黑白指纹图像样本组成,并将其应用于相同的 GAN 模型。问题是——我一直在创建和 loading/processing 自定义数据集。
这是我用于 MNIST 的模型的代码:
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras.layers import Input
from keras.models import Model, Sequential
from keras.layers.core import Dense, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
from keras import initializers
os.environ["KERAS_BACKEND"] = "tensorflow"
np.random.seed(10)
random_dim = 100
def load_mnist_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5)/127.5
x_train = x_train.reshape(60000, 784)
return (x_train, y_train, x_test, y_test)
出于实验目的,我创建了一个较小版本的 SOCOFing 数据集,其中仅包含 500 个样本。数据集生成器的代码如下:
from PIL import Image
import os
import numpy as np
path_to_files = "./fingerprints/"
vectorized_images_X = []
vectorized_images_Y = []
for _, file in enumerate(os.listdir(path_to_files)):
image = Image.open(path_to_files + file)
image_array = np.array(image)
vectorized_images_X.append(image_array)
vectorized_images_Y.append(image_array)
np.savez("./fingerprints.npz",DataX=vectorized_images_X,DataY=vectorized_images_Y)
import numpy as np
path = "./fingerprints.npz"
with np.load(path) as data:
train_data = data['DataX']
print(train_data)
test_data = data['DataY']
print(test_data)
所以现在我有了一个 *.npz 文件,但不知道如何将它注入到模型中。请指教
这是插入任何 .npz 文件的示例代码,您可以参考此代码。
DATA_URL = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'
path = tf.keras.utils.get_file('mnist.npz', DATA_URL)
with np.load(path) as data:
train_examples = data['x_train']
train_labels = data['y_train']
test_examples = data['x_test']
test_labels = data['y_test']
更多详情,可以关注这个link。
我有一个简单的 GAN 模型(基于 Keras),用于基于 MNIST 数据集生成手写数字图像。我想使用来自 Sokoto Coventry 指纹数据集 (SOCOFing) 的原始图像数据为 Keras 创建一个类似的数据集,该数据集由 6000 个不同的黑白指纹图像样本组成,并将其应用于相同的 GAN 模型。问题是——我一直在创建和 loading/processing 自定义数据集。
这是我用于 MNIST 的模型的代码:
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras.layers import Input
from keras.models import Model, Sequential
from keras.layers.core import Dense, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
from keras import initializers
os.environ["KERAS_BACKEND"] = "tensorflow"
np.random.seed(10)
random_dim = 100
def load_mnist_data():
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = (x_train.astype(np.float32) - 127.5)/127.5
x_train = x_train.reshape(60000, 784)
return (x_train, y_train, x_test, y_test)
出于实验目的,我创建了一个较小版本的 SOCOFing 数据集,其中仅包含 500 个样本。数据集生成器的代码如下:
from PIL import Image
import os
import numpy as np
path_to_files = "./fingerprints/"
vectorized_images_X = []
vectorized_images_Y = []
for _, file in enumerate(os.listdir(path_to_files)):
image = Image.open(path_to_files + file)
image_array = np.array(image)
vectorized_images_X.append(image_array)
vectorized_images_Y.append(image_array)
np.savez("./fingerprints.npz",DataX=vectorized_images_X,DataY=vectorized_images_Y)
import numpy as np
path = "./fingerprints.npz"
with np.load(path) as data:
train_data = data['DataX']
print(train_data)
test_data = data['DataY']
print(test_data)
所以现在我有了一个 *.npz 文件,但不知道如何将它注入到模型中。请指教
这是插入任何 .npz 文件的示例代码,您可以参考此代码。
DATA_URL = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'
path = tf.keras.utils.get_file('mnist.npz', DATA_URL)
with np.load(path) as data:
train_examples = data['x_train']
train_labels = data['y_train']
test_examples = data['x_test']
test_labels = data['y_test']
更多详情,可以关注这个link。