如何正确组合 tf.data.Dataset 和 tf.estimator.DNNRegressor
How to combine tf.data.Dataset and tf.estimator.DNNRegressor properly
我目前正在学习使用tensorflow,但上手有困难。
我想使用最新的 API,即估算器和数据集。但是,如果我 运行 下面显示的代码会出现错误。
在 tensorflow 页面 https://www.tensorflow.org/api_docs/python/tf/estimator/DNNRegressor 我发现 "The function should construct and return one of the following: * A tf.data.Dataset object: Outputs of Dataset object must be a tuple (features, labels) with same constraints as below."
我以为我的代码会提供这些,但似乎有问题,我没有想法。
import tensorflow as tf
def input_evaluation_set():
data = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
labels = []
for d in data:
labels.append(1)
return tf.data.Dataset.from_tensor_slices((tf.constant(data), tf.constant(labels)))
point = tf.feature_column.numeric_column('points')
estimator = tf.estimator.DNNRegressor(feature_columns = [point],hidden_units = [100,100,100])
estimator.train(input_fn = input_evaluation_set)
我希望 运行 在具有 3 个隐藏层和 100 个神经元的深度神经网络上进行训练,以逼近 'constant 1' 函数;
相反,我得到错误“ValueError:特征应该是'张量'的字典。给定类型:class,'tensorflow.python.framework.ops.Tensor'
您代码中的主要问题是您将数据集中的数据作为简单张量发送。但是数据集中的输入数据应该是字典,其键名与特征列中使用的键名相同。除此之外,我在输入数据中添加了额外的维度。以下代码将起作用。
import tensorflow as tf
import numpy as np
### DEFINE NEW MAP FUNCTION
def map_fn(d, l):
return {'points': d}, l
def input_evaluation_set():
data = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
labels = []
for d in data:
labels.append(1)
### CHANGE STARTS HERE
data = np.array(data)
data = np.expand_dims(data, axis=-1)
labels = np.array(labels)
labels = np.expand_dims(labels, axis=-1)
### CHANGE ENDS HERE
dataset = tf.data.Dataset.from_tensor_slices((tf.constant(data), tf.constant(labels)))
### CREATE DICTIONARY PAIR IN INPUT DATA
dataset = dataset.map(map_fn)
return dataset
point = tf.feature_column.numeric_column('points')
estimator = tf.estimator.DNNRegressor(feature_columns = [point],hidden_units = [100,100,100])
estimator.train(input_fn = input_evaluation_set)
您需要在数据库上使用 .batch 以获得正确的格式。
以下正在我的电脑上运行:
import tensorflow as tf
import numpy as np
def basic_dataset(numPoints):
data = np.linspace(0,1,numPoints)
dataset = dict({'points': data})
labels = []
for d in data:
labels.append(1)
return tf.data.Dataset.from_tensor_slices((dataset, np.array(labels)))
def input_train_set():
dataset = basic_dataset(11)
return dataset.repeat(100).shuffle(1000).batch(1)
point = tf.feature_column.numeric_column('points')
estimator = tf.estimator.DNNRegressor(feature_columns = [point],hidden_units = [100,100,100], label_dimension = 1)
estimator.train(input_fn = input_train_set)
我目前正在学习使用tensorflow,但上手有困难。 我想使用最新的 API,即估算器和数据集。但是,如果我 运行 下面显示的代码会出现错误。
在 tensorflow 页面 https://www.tensorflow.org/api_docs/python/tf/estimator/DNNRegressor 我发现 "The function should construct and return one of the following: * A tf.data.Dataset object: Outputs of Dataset object must be a tuple (features, labels) with same constraints as below."
我以为我的代码会提供这些,但似乎有问题,我没有想法。
import tensorflow as tf
def input_evaluation_set():
data = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
labels = []
for d in data:
labels.append(1)
return tf.data.Dataset.from_tensor_slices((tf.constant(data), tf.constant(labels)))
point = tf.feature_column.numeric_column('points')
estimator = tf.estimator.DNNRegressor(feature_columns = [point],hidden_units = [100,100,100])
estimator.train(input_fn = input_evaluation_set)
我希望 运行 在具有 3 个隐藏层和 100 个神经元的深度神经网络上进行训练,以逼近 'constant 1' 函数; 相反,我得到错误“ValueError:特征应该是'张量'的字典。给定类型:class,'tensorflow.python.framework.ops.Tensor'
您代码中的主要问题是您将数据集中的数据作为简单张量发送。但是数据集中的输入数据应该是字典,其键名与特征列中使用的键名相同。除此之外,我在输入数据中添加了额外的维度。以下代码将起作用。
import tensorflow as tf
import numpy as np
### DEFINE NEW MAP FUNCTION
def map_fn(d, l):
return {'points': d}, l
def input_evaluation_set():
data = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
labels = []
for d in data:
labels.append(1)
### CHANGE STARTS HERE
data = np.array(data)
data = np.expand_dims(data, axis=-1)
labels = np.array(labels)
labels = np.expand_dims(labels, axis=-1)
### CHANGE ENDS HERE
dataset = tf.data.Dataset.from_tensor_slices((tf.constant(data), tf.constant(labels)))
### CREATE DICTIONARY PAIR IN INPUT DATA
dataset = dataset.map(map_fn)
return dataset
point = tf.feature_column.numeric_column('points')
estimator = tf.estimator.DNNRegressor(feature_columns = [point],hidden_units = [100,100,100])
estimator.train(input_fn = input_evaluation_set)
您需要在数据库上使用 .batch 以获得正确的格式。
以下正在我的电脑上运行:
import tensorflow as tf
import numpy as np
def basic_dataset(numPoints):
data = np.linspace(0,1,numPoints)
dataset = dict({'points': data})
labels = []
for d in data:
labels.append(1)
return tf.data.Dataset.from_tensor_slices((dataset, np.array(labels)))
def input_train_set():
dataset = basic_dataset(11)
return dataset.repeat(100).shuffle(1000).batch(1)
point = tf.feature_column.numeric_column('points')
estimator = tf.estimator.DNNRegressor(feature_columns = [point],hidden_units = [100,100,100], label_dimension = 1)
estimator.train(input_fn = input_train_set)