无法从 Tensorflow tfrecord 文件中读取
Unable to read from Tensorflow tfrecord file
我可以使用以下代码创建 tfrecords 文件。
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def convert_to_tfrecord(images,labels,file_name):
# images is a numpy array of shape (num_images,channel,rows,column)
# labels is a numpy array of shape (num_images,)
num_labels = np.shape(labels)
(num_images,depth,rows,cols) = np.shape(images)
writer = tf.python_io.TFRecordWriter(file_name)
for index in range(num_images):
image_raw = images[index]
image_raw = image_raw.astype(np.float32)
image_raw = image_raw.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(rows),
'width': _int64_feature(cols),
'depth': _int64_feature(depth),
'label': _int64_feature(int(labels[index])),
'image_raw': _bytes_feature(image_raw)}))
writer.write(example.SerializeToString())
writer.close()
但是,在使用以下函数从 tfrecord 文件读取数据时
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
img_features = tf.parse_single_example(
serialized_example,
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
})
image = tf.decode_raw(img_features['image_raw'], tf.float32)
label = tf.cast(img_features['label'], tf.int32)
height = tf.cast(img_features['height'], tf.int32)
width = tf.cast(img_features['width'], tf.int32)
depth = tf.cast(img_features['depth'], tf.int32)
image_shape = tf.stack([depth,height, width])
image = tf.reshape(image, image_shape)
return image,label
def inputs(batch_size, num_epochs):
filename = ['set1.tfrecords']
# dir_path is a global variable
file_path = dir_path + 'set1.tfrecords'
filename_queue = tf.train.string_input_producer([file_path], num_epochs=1)
image,label = read_and_decode(filename_queue)
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size, min_after_dequeue=1000)
return images, sparse_labels
我经常收到以下错误
images, labels = tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 1225, in shuffle_batch
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 781, in _shuffle_batch
dtypes=types, shapes=shapes, shared_name=shared_name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 641, in __init__
shapes = _as_shape_list(shapes, dtypes)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 77, in _as_shape_list
raise ValueError("All shapes must be fully defined: %s" % shapes)
ValueError: All shapes must be fully defined: [TensorShape([Dimension(None)]), TensorShape([])]
出现上述错误的原因是什么,如何解决?我可以通过使用 tf.python_io.tf_record_iterator(path=filename)
遍历文件来读取 tfrecords 文件。
出现错误是因为 tf.train.shuffle_batch
需要知道张量的形状才能对它们进行批处理(批次中的项目必须具有相同的形状)。
但是,原则上,原始数据可以有不同的大小,因此 tf.decode_raw
不会为您的张量设置任何形状。
在评论中,你提到你所有的图像都有形状 (192,81,2)
,所以你只需要在从 read_and_decode
返回之前在图像张量中设置该形状:
def read_and_decode(filename_queue):
# rest of your code here
image_shape = [height, width, depth]
image = tf.reshape(image, image_shape)
image.set_shape(image_shape) #<<<<<<<<<<<<<<<
return image,label
我可以使用以下代码创建 tfrecords 文件。
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def convert_to_tfrecord(images,labels,file_name):
# images is a numpy array of shape (num_images,channel,rows,column)
# labels is a numpy array of shape (num_images,)
num_labels = np.shape(labels)
(num_images,depth,rows,cols) = np.shape(images)
writer = tf.python_io.TFRecordWriter(file_name)
for index in range(num_images):
image_raw = images[index]
image_raw = image_raw.astype(np.float32)
image_raw = image_raw.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(rows),
'width': _int64_feature(cols),
'depth': _int64_feature(depth),
'label': _int64_feature(int(labels[index])),
'image_raw': _bytes_feature(image_raw)}))
writer.write(example.SerializeToString())
writer.close()
但是,在使用以下函数从 tfrecord 文件读取数据时
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
img_features = tf.parse_single_example(
serialized_example,
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
})
image = tf.decode_raw(img_features['image_raw'], tf.float32)
label = tf.cast(img_features['label'], tf.int32)
height = tf.cast(img_features['height'], tf.int32)
width = tf.cast(img_features['width'], tf.int32)
depth = tf.cast(img_features['depth'], tf.int32)
image_shape = tf.stack([depth,height, width])
image = tf.reshape(image, image_shape)
return image,label
def inputs(batch_size, num_epochs):
filename = ['set1.tfrecords']
# dir_path is a global variable
file_path = dir_path + 'set1.tfrecords'
filename_queue = tf.train.string_input_producer([file_path], num_epochs=1)
image,label = read_and_decode(filename_queue)
images, sparse_labels = tf.train.shuffle_batch(
[image, label], batch_size=batch_size, num_threads=2,
capacity=1000 + 3 * batch_size, min_after_dequeue=1000)
return images, sparse_labels
我经常收到以下错误
images, labels = tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 1225, in shuffle_batch
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 781, in _shuffle_batch
dtypes=types, shapes=shapes, shared_name=shared_name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 641, in __init__
shapes = _as_shape_list(shapes, dtypes)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 77, in _as_shape_list
raise ValueError("All shapes must be fully defined: %s" % shapes)
ValueError: All shapes must be fully defined: [TensorShape([Dimension(None)]), TensorShape([])]
出现上述错误的原因是什么,如何解决?我可以通过使用 tf.python_io.tf_record_iterator(path=filename)
遍历文件来读取 tfrecords 文件。
出现错误是因为 tf.train.shuffle_batch
需要知道张量的形状才能对它们进行批处理(批次中的项目必须具有相同的形状)。
但是,原则上,原始数据可以有不同的大小,因此 tf.decode_raw
不会为您的张量设置任何形状。
在评论中,你提到你所有的图像都有形状 (192,81,2)
,所以你只需要在从 read_and_decode
返回之前在图像张量中设置该形状:
def read_and_decode(filename_queue):
# rest of your code here
image_shape = [height, width, depth]
image = tf.reshape(image, image_shape)
image.set_shape(image_shape) #<<<<<<<<<<<<<<<
return image,label