如何使 tensorflow cifar10 教程从 numpy 数组读取?
How to make tensorflow cifar10 tutorial read from numpy array?
我正在尝试使用 CIFAR10 教程来创建我自己的训练脚本。我的数据集存储在一个 MAT 文件中,我使用 h5py
将该文件转换为 Numpy 数组。在本教程中,他们使用以下方式读取数据:
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
然而,就我而言,我使用:
images_placeholder = tf.placeholder(tf.float32, shape=shape)
labels_placeholder = tf.placeholder(tf.int32, shape=batch_size)
问题是当我尝试 运行 使用他们在 CIFAR10 示例中使用的 MonitoredTrainingSession
进行训练时:
def train():
with tf.Graph().as_default():
global_step = tf.contrib.framework.get_or_create_global_step()
with inputs.read_imdb(FLAGS.input_path) as imdb:
sets = np.asarray(imdb['images']['set'], dtype=np.int32)
data_set = inputs.DataSet(imdb, np.where(sets == 1)[0])
images, labels = inputs.placeholder_inputs(data_set, batch_size=128)
logits = model.vgg16(images)
loss = model.loss(logits, labels)
train_op = model.train(loss, global_step, data_set.num_examples)
class _LoggerHook(tf.train.SessionRunHook):
def begin(self):
self._step = -1
def before_run(self, run_context):
self._step += 1
self._start_time = time.time()
return tf.train.SessionRunArgs(loss)
def after_run(self, run_context, run_values):
duration = time.time() - self._start_time
loss_value = run_values.results
if self._step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
while not mon_sess.should_stop():
mon_sess.run(train_op)
其中 inputs.DataSet
基于 MNIST 示例。一些辅助功能:
def read_imdb(path):
imdb = h5py.File(path)
check_imdb(imdb)
return imdb
def placeholder_inputs(data_set, batch_size):
shape = (batch_size,) + data_set.images.shape[1:][::-1]
images_placeholder = tf.placeholder(tf.floatz32, shape=shape)
labels_placeholder = tf.placeholder(tf.int32, shape=batch_size)
return images_placeholder, labels_placeholder
当我尝试 运行 时,显然 return 错误 You must feed a value for placeholder tensor 'Placeholder'
因为我没有创建提要。关键是我确实有创建提要的功能,但我不知道应该把它传递到哪里。
def fill_feed_dict(data_set, images, labels):
images_feed, labels_feed = data_set.next_batch(images.get_shape()[0].value)
feed_dict = {images: images_feed, labels: labels_feed}
return feed_dict
有人能帮忙吗?
谢谢,
丹尼尔
每次调用run
方法时只需要传入fill_feed_dict
创建的dict
即可:
mon_sess.run(train_op, feed_dict=fill_feed_dict(data_set, images, labels))
我正在尝试使用 CIFAR10 教程来创建我自己的训练脚本。我的数据集存储在一个 MAT 文件中,我使用 h5py
将该文件转换为 Numpy 数组。在本教程中,他们使用以下方式读取数据:
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
然而,就我而言,我使用:
images_placeholder = tf.placeholder(tf.float32, shape=shape)
labels_placeholder = tf.placeholder(tf.int32, shape=batch_size)
问题是当我尝试 运行 使用他们在 CIFAR10 示例中使用的 MonitoredTrainingSession
进行训练时:
def train():
with tf.Graph().as_default():
global_step = tf.contrib.framework.get_or_create_global_step()
with inputs.read_imdb(FLAGS.input_path) as imdb:
sets = np.asarray(imdb['images']['set'], dtype=np.int32)
data_set = inputs.DataSet(imdb, np.where(sets == 1)[0])
images, labels = inputs.placeholder_inputs(data_set, batch_size=128)
logits = model.vgg16(images)
loss = model.loss(logits, labels)
train_op = model.train(loss, global_step, data_set.num_examples)
class _LoggerHook(tf.train.SessionRunHook):
def begin(self):
self._step = -1
def before_run(self, run_context):
self._step += 1
self._start_time = time.time()
return tf.train.SessionRunArgs(loss)
def after_run(self, run_context, run_values):
duration = time.time() - self._start_time
loss_value = run_values.results
if self._step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
while not mon_sess.should_stop():
mon_sess.run(train_op)
其中 inputs.DataSet
基于 MNIST 示例。一些辅助功能:
def read_imdb(path):
imdb = h5py.File(path)
check_imdb(imdb)
return imdb
def placeholder_inputs(data_set, batch_size):
shape = (batch_size,) + data_set.images.shape[1:][::-1]
images_placeholder = tf.placeholder(tf.floatz32, shape=shape)
labels_placeholder = tf.placeholder(tf.int32, shape=batch_size)
return images_placeholder, labels_placeholder
当我尝试 运行 时,显然 return 错误 You must feed a value for placeholder tensor 'Placeholder'
因为我没有创建提要。关键是我确实有创建提要的功能,但我不知道应该把它传递到哪里。
def fill_feed_dict(data_set, images, labels):
images_feed, labels_feed = data_set.next_batch(images.get_shape()[0].value)
feed_dict = {images: images_feed, labels: labels_feed}
return feed_dict
有人能帮忙吗?
谢谢, 丹尼尔
每次调用run
方法时只需要传入fill_feed_dict
创建的dict
即可:
mon_sess.run(train_op, feed_dict=fill_feed_dict(data_set, images, labels))