从 numpy 数组问题加载 Tensorflow 记录
Loading Tensorflow Record from numpy arrays problem
我将预处理后的音频作为频谱图保存到 TF.Record
文件中。到目前为止一切顺利。
我的数据(一个样本)的形状为 (64, 23)
。我的测试数据集的形状为 (N, 64, 23)
,其中 N 是样本数。
这是我将数据集保存到 TF.Record
中的代码
def folder_to_tfrecord(self, X, Y, output_file):
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
writer = tf.python_io.TFRecordWriter(output_file)
for i, (sample, label) in enumerate(zip(X, Y)):
print(f'Storing example #{i} with shape {sample.shape} and label {label}')
# Get Height and Width for future reconstruction
height, width = sample.shape
# Array to string
sample_raw = sample.astype(np.float32).tostring()
# Integer label
label_raw = label
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(height),
'width': _int64_feature(width),
'data': _bytes_feature(sample_raw),
'label': _int64_feature(label_raw)
}))
writer.write(example.SerializeToString())
writer.close()
哪个输出是正确的:
Storing example #0 with shape (64, 23) and label 0
Storing example #1 with shape (64, 23) and label 0
Storing example #2 with shape (64, 23) and label 0
Storing example #3 with shape (64, 23) and label 0
Storing example #4 with shape (64, 23) and label 0
但是当我尝试使用 TF.Dataset
中的记录并启用急切执行时(这样真实的数据就会出来)我得到异常 (tensorflow keyerror
)
我正在这样读取数据:
import tensorflow as tf
def _parse_function(example_proto):
keys_to_features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'data': tf.FixedLenFeature([64, 23], tf.string),
'label': tf.FixedLenFeature([], tf.int64)}
parsed_features = tf.parse_single_example(example_proto, keys_to_features)
return parsed_features['height'], \
parsed_features['width'], \
parsed_features['data'], \
parsed_features['label']
def create_dataset(filepath = './new.tfrecord', paralel: int = 0):
# This works with arrays as well
dataset = tf.data.TFRecordDataset(filepath)
# Maps the parser on every filepath in the array. You can set the number of parallel loaders here
dataset = dataset.map(_parse_function)
return dataset
sess = tf.Session()
ds = create_dataset()
it = ds.make_one_shot_iterator()
next_data = it.get_next()
while True:
try:
data, label = sess.run(next_data)
print(data)
print(label)
except tf.errors.OutOfRangeError:
print("End of dataset")
break
我遇到了异常tensorflow.python.framework.errors_impl.InvalidArgumentError: Key: data. Can't parse serialized Example.
我做错了什么?是否有可能在给定高度和宽度字段的情况下重新整形数据?
编辑:
当我不使用 eager execution 时,数据管道似乎可以工作
>>> print(next_data)
(<tf.Tensor 'IteratorGetNext:0' shape=() dtype=int64>, <tf.Tensor 'IteratorGetNext:1' shape=() dtype=int64>, <tf.Tensor 'IteratorGetNext:2' shape=(64, 23) dtype=string>, <tf.Tensor 'IteratorGetNext:3' shape=() dtype=int64>)
我终于成功了。我不得不编辑我的解析函数,而不是将形状传递到 FixedLenFeature
。我将形状存储为特征的一部分,然后根据这些值将其重新整形。
def _parse_function(example_proto: 'Protocol Buffer') -> Tuple[tf.Tensor, tf.Tensor]:
"""Map function used as dataset.map(_parse_function) to back data back from the serialized
from the protocol buffer
Arguments:
example_proto {[Protocol Buffer]} -- Incoming Proto
Returns:
Tuple[tf.Tensor, tf.Tensor] -- Returns tuple (image, label) where both of those are tf.Tensors
"""
keys_to_features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'data': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)}
# Parse features
parsed_features = tf.parse_single_example(example_proto, keys_to_features)
# Decoder Scope
with tf.variable_scope('decoder'):
# Shape for reshaping image
height = parsed_features['height']
width = parsed_features['width']
# Label
label = parsed_features['label']
# Image
image = tf.decode_raw(parsed_features['data'], tf.float32)
with tf.variable_scope('image'):
image = tf.reshape(image, (height, width))
return image, label
我将预处理后的音频作为频谱图保存到 TF.Record
文件中。到目前为止一切顺利。
我的数据(一个样本)的形状为 (64, 23)
。我的测试数据集的形状为 (N, 64, 23)
,其中 N 是样本数。
这是我将数据集保存到 TF.Record
def folder_to_tfrecord(self, X, Y, output_file):
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
writer = tf.python_io.TFRecordWriter(output_file)
for i, (sample, label) in enumerate(zip(X, Y)):
print(f'Storing example #{i} with shape {sample.shape} and label {label}')
# Get Height and Width for future reconstruction
height, width = sample.shape
# Array to string
sample_raw = sample.astype(np.float32).tostring()
# Integer label
label_raw = label
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(height),
'width': _int64_feature(width),
'data': _bytes_feature(sample_raw),
'label': _int64_feature(label_raw)
}))
writer.write(example.SerializeToString())
writer.close()
哪个输出是正确的:
Storing example #0 with shape (64, 23) and label 0
Storing example #1 with shape (64, 23) and label 0
Storing example #2 with shape (64, 23) and label 0
Storing example #3 with shape (64, 23) and label 0
Storing example #4 with shape (64, 23) and label 0
但是当我尝试使用 TF.Dataset
中的记录并启用急切执行时(这样真实的数据就会出来)我得到异常 (tensorflow keyerror
)
我正在这样读取数据:
import tensorflow as tf
def _parse_function(example_proto):
keys_to_features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'data': tf.FixedLenFeature([64, 23], tf.string),
'label': tf.FixedLenFeature([], tf.int64)}
parsed_features = tf.parse_single_example(example_proto, keys_to_features)
return parsed_features['height'], \
parsed_features['width'], \
parsed_features['data'], \
parsed_features['label']
def create_dataset(filepath = './new.tfrecord', paralel: int = 0):
# This works with arrays as well
dataset = tf.data.TFRecordDataset(filepath)
# Maps the parser on every filepath in the array. You can set the number of parallel loaders here
dataset = dataset.map(_parse_function)
return dataset
sess = tf.Session()
ds = create_dataset()
it = ds.make_one_shot_iterator()
next_data = it.get_next()
while True:
try:
data, label = sess.run(next_data)
print(data)
print(label)
except tf.errors.OutOfRangeError:
print("End of dataset")
break
我遇到了异常tensorflow.python.framework.errors_impl.InvalidArgumentError: Key: data. Can't parse serialized Example.
我做错了什么?是否有可能在给定高度和宽度字段的情况下重新整形数据?
编辑: 当我不使用 eager execution 时,数据管道似乎可以工作
>>> print(next_data)
(<tf.Tensor 'IteratorGetNext:0' shape=() dtype=int64>, <tf.Tensor 'IteratorGetNext:1' shape=() dtype=int64>, <tf.Tensor 'IteratorGetNext:2' shape=(64, 23) dtype=string>, <tf.Tensor 'IteratorGetNext:3' shape=() dtype=int64>)
我终于成功了。我不得不编辑我的解析函数,而不是将形状传递到 FixedLenFeature
。我将形状存储为特征的一部分,然后根据这些值将其重新整形。
def _parse_function(example_proto: 'Protocol Buffer') -> Tuple[tf.Tensor, tf.Tensor]:
"""Map function used as dataset.map(_parse_function) to back data back from the serialized
from the protocol buffer
Arguments:
example_proto {[Protocol Buffer]} -- Incoming Proto
Returns:
Tuple[tf.Tensor, tf.Tensor] -- Returns tuple (image, label) where both of those are tf.Tensors
"""
keys_to_features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'data': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)}
# Parse features
parsed_features = tf.parse_single_example(example_proto, keys_to_features)
# Decoder Scope
with tf.variable_scope('decoder'):
# Shape for reshaping image
height = parsed_features['height']
width = parsed_features['width']
# Label
label = parsed_features['label']
# Image
image = tf.decode_raw(parsed_features['data'], tf.float32)
with tf.variable_scope('image'):
image = tf.reshape(image, (height, width))
return image, label