Python tensorflow 创建具有多个数组特征的 tfrecord
Python tensorflow creating tfrecord with multiple array features
我正在按照 TensorFlow docs 从三个 NumPy 数组生成 tf.record,但是,我在尝试序列化数据时遇到错误。我希望生成的 tfrecord
包含三个特征。
import numpy as np
import pandas as pd
# some random data
x = np.random.randn(85)
y = np.random.randn(85,2128)
z = np.random.choice(range(10),(85,155))
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def serialize_example(feature0, feature1, feature2):
"""
Creates a tf.Example message ready to be written to a file.
"""
# Create a dictionary mapping the feature name to the tf.Example-compatible
# data type.
feature = {
'feature0': _float_feature(feature0),
'feature1': _float_feature(feature1),
'feature2': _int64_feature(feature2)
}
# Create a Features message using tf.train.Example.
example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
return example_proto.SerializeToString()
features_dataset = tf.data.Dataset.from_tensor_slices((x, y, z))
features_dataset
<TensorSliceDataset shapes: ((), (2128,), (155,)), types: (tf.float64, tf.float32, tf.int64)>
for f0,f1,f2 in features_dataset.take(1):
print(f0)
print(f1)
print(f2)
def tf_serialize_example(f0,f1,f2):
tf_string = tf.py_function(
serialize_example,
(f0,f1,f2), # pass these args to the above function.
tf.string) # the return type is `tf.string`.
return tf.reshape(tf_string, ()) # The result is a scalar
然而,当尝试 运行 tf_serialize_example(f0,f1,f2)
我收到错误:
InvalidArgumentError: TypeError: <tf.Tensor: shape=(2128,), dtype=float32, numpy=
array([-0.5435242 , 0.97947884, -0.74457455, ..., has type tensorflow.python.framework.ops.EagerTensor, but expected one of: int, long, float
Traceback (most recent call last):
我认为原因是,我的特征是数组而不是数字。如何使此代码适用于数组而不是数字的功能?
好的,我现在有时间仔细看看。我注意到 features_dataset
和 tf_serialize_example
的用法来自tensorflow网页上的教程。我不知道这种方法的优点是什么以及如何解决这个问题。
但这里有一个应该适用于您的代码的工作流程(我重新打开生成的 tfrecords 文件,它们没问题)。
import numpy as np
import tensorflow as tf
# some random data
x = np.random.randn(85)
y = np.random.randn(85,2128)
z = np.random.choice(range(10),(85,155))
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=value.flatten()))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=value.flatten()))
def serialize_example(feature0, feature1, feature2):
"""
Creates a tf.Example message ready to be written to a file.
"""
# Create a dictionary mapping the feature name to the tf.Example-compatible
# data type.
feature = {
'feature0': _float_feature(feature0),
'feature1': _float_feature(feature1),
'feature2': _int64_feature(feature2)
}
# Create a Features message using tf.train.Example.
return tf.train.Example(features=tf.train.Features(feature=feature))
writer = tf.python_io.TFRecordWriter('TEST.tfrecords')
example = serialize_example(x,y,z)
writer.write(example.SerializeToString())
writer.close()
此代码的主要区别在于,您将 numpy 数组而不是 tensorflow 张量提供给 serialize_example
。希望这有帮助
我正在按照 TensorFlow docs 从三个 NumPy 数组生成 tf.record,但是,我在尝试序列化数据时遇到错误。我希望生成的 tfrecord
包含三个特征。
import numpy as np
import pandas as pd
# some random data
x = np.random.randn(85)
y = np.random.randn(85,2128)
z = np.random.choice(range(10),(85,155))
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def serialize_example(feature0, feature1, feature2):
"""
Creates a tf.Example message ready to be written to a file.
"""
# Create a dictionary mapping the feature name to the tf.Example-compatible
# data type.
feature = {
'feature0': _float_feature(feature0),
'feature1': _float_feature(feature1),
'feature2': _int64_feature(feature2)
}
# Create a Features message using tf.train.Example.
example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
return example_proto.SerializeToString()
features_dataset = tf.data.Dataset.from_tensor_slices((x, y, z))
features_dataset
<TensorSliceDataset shapes: ((), (2128,), (155,)), types: (tf.float64, tf.float32, tf.int64)>
for f0,f1,f2 in features_dataset.take(1):
print(f0)
print(f1)
print(f2)
def tf_serialize_example(f0,f1,f2):
tf_string = tf.py_function(
serialize_example,
(f0,f1,f2), # pass these args to the above function.
tf.string) # the return type is `tf.string`.
return tf.reshape(tf_string, ()) # The result is a scalar
然而,当尝试 运行 tf_serialize_example(f0,f1,f2)
我收到错误:
InvalidArgumentError: TypeError: <tf.Tensor: shape=(2128,), dtype=float32, numpy=
array([-0.5435242 , 0.97947884, -0.74457455, ..., has type tensorflow.python.framework.ops.EagerTensor, but expected one of: int, long, float
Traceback (most recent call last):
我认为原因是,我的特征是数组而不是数字。如何使此代码适用于数组而不是数字的功能?
好的,我现在有时间仔细看看。我注意到 features_dataset
和 tf_serialize_example
的用法来自tensorflow网页上的教程。我不知道这种方法的优点是什么以及如何解决这个问题。
但这里有一个应该适用于您的代码的工作流程(我重新打开生成的 tfrecords 文件,它们没问题)。
import numpy as np
import tensorflow as tf
# some random data
x = np.random.randn(85)
y = np.random.randn(85,2128)
z = np.random.choice(range(10),(85,155))
def _float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=value.flatten()))
def _int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=value.flatten()))
def serialize_example(feature0, feature1, feature2):
"""
Creates a tf.Example message ready to be written to a file.
"""
# Create a dictionary mapping the feature name to the tf.Example-compatible
# data type.
feature = {
'feature0': _float_feature(feature0),
'feature1': _float_feature(feature1),
'feature2': _int64_feature(feature2)
}
# Create a Features message using tf.train.Example.
return tf.train.Example(features=tf.train.Features(feature=feature))
writer = tf.python_io.TFRecordWriter('TEST.tfrecords')
example = serialize_example(x,y,z)
writer.write(example.SerializeToString())
writer.close()
此代码的主要区别在于,您将 numpy 数组而不是 tensorflow 张量提供给 serialize_example
。希望这有帮助