如何*正确*将数据从 csv 读取到 TensorFlow 1
How to *correctly* read data from csv's into TensorFlow 1
我发现 向我们展示了如何设置代码以使用队列读取 csv 文件。但是,每次我运行它时,我运行都会出错。我试过调试它,但无法弄清楚错误的含义。谁能帮帮我?
我使用的代码几乎是逐字逐句地写在上面 post 中 post:
import tensorflow as tf
dataset = '/Users/hdadmin/Data/actions/testing.csv'
def file_len(fname):
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1
def read_from_csv(filename_queue):
reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = reader.read(filename_queue)
record_defaults = [[0],[0],[0],[0],[0]]
colHour,colQuarter,colAction,colUser,colLabel = tf.decode_csv(csv_row, record_defaults=record_defaults)
features = tf.pack([colHour,colQuarter,colAction,colUser])
label = tf.pack([colLabel])
return features, label
def input_pipeline(batch_size, num_epochs=None):
filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
example, label = read_from_csv(filename_queue)
min_after_dequeue = 1000
capacity = min_after_dequeue + 3 * batch_size
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
file_length = file_len(dataset) - 1
examples, labels = input_pipeline(file_length, 1)
with tf.Session() as sess:
tf.initialize_all_variables().run()
# start populating filename queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
while not coord.should_stop():
example_batch, label_batch = sess.run([examples, labels])
print(example_batch)
except tf.errors.OutOfRangeError:
print('Done training, epoch reached')
finally:
coord.request_stop()
coord.join(threads)
我得到的错误是:
E tensorflow/core/client/tensor_c_api.cc:485] Attempting to use uninitialized value input_producer/limit_epochs/epochs
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
E tensorflow/core/client/tensor_c_api.cc:485] RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 10000, current size 0)
[[Node: shuffle_batch = QueueDequeueMany[_class=["loc:@shuffle_batch/random_shuffle_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
Done training, epoch reached
E tensorflow/core/client/tensor_c_api.cc:485] FIFOQueue '_0_input_producer' is closed and has insufficient elements (requested 1, current size 0)
[[Node: ReaderRead = ReaderRead[_class=["loc:@TextLineReader", "loc:@input_producer"], _device="/job:localhost/replica:0/task:0/cpu:0"](TextLineReader, input_producer)]]
E tensorflow/core/client/tensor_c_api.cc:485] Queue '_2_shuffle_batch/random_shuffle_queue' is already closed.
[[Node: shuffle_batch/random_shuffle_queue_Close = QueueClose[_class=["loc:@shuffle_batch/random_shuffle_queue"], cancel_pending_enqueues=false, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue)]]
Traceback (most recent call last):
File "csv_test.py", line 49, in <module>
coord.join(threads)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/coordinator.py", line 357, in join
six.reraise(*self._exc_info_to_raise)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/queue_runner.py", line 185, in _run
sess.run(enqueue_op)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 382, in run
run_metadata_ptr)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 655, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 723, in _do_run
target_list, options, run_metadata)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 743, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.FailedPreconditionError: Attempting to use uninitialized value input_producer/limit_epochs/epochs
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
Caused by op u'input_producer/limit_epochs/CountUpTo', defined at:
File "csv_test.py", line 31, in <module>
examples, labels = input_pipeline(file_length, 1)
File "csv_test.py", line 21, in input_pipeline
filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 194, in string_input_producer
summary_name="fraction_of_%d_full" % capacity)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 133, in input_producer
input_tensor = limit_epochs(input_tensor, num_epochs)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 84, in limit_epochs
counter = epochs.count_up_to(num_epochs)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 577, in count_up_to
return state_ops.count_up_to(self._variable, limit=limit)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 127, in count_up_to
result = _op_def_lib.apply_op("CountUpTo", ref=ref, limit=limit, name=name)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
self._traceback = _extract_stack()
我制作了包含五列的数据以与示例匹配。大致如下:
"v1","v2","v3","v4","v5"
1,1,1,3,10
4,2,1,10,8
1,4,1,9,3
3,3,1,1,5
3,4,1,4,3
3,2,1,5,8
1,1,1,9,7
4,1,1,4,9
2,3,1,8,4
提前致谢。
我认为你缺少的是 local 变量的初始化(例如 input_producer/limit_epochs/epochs
)。在 all 变量的初始化过程中,局部变量没有被初始化,顺便说一句,这很令人困惑。
您可以添加以下初始化操作,一次性初始化所有内容:
init_op = tf.group(tf.initialize_all_variables(),
tf.initialize_local_variables())
然后:
sess.run(init_op)
我发现
我使用的代码几乎是逐字逐句地写在上面 post 中 post:
import tensorflow as tf
dataset = '/Users/hdadmin/Data/actions/testing.csv'
def file_len(fname):
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1
def read_from_csv(filename_queue):
reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = reader.read(filename_queue)
record_defaults = [[0],[0],[0],[0],[0]]
colHour,colQuarter,colAction,colUser,colLabel = tf.decode_csv(csv_row, record_defaults=record_defaults)
features = tf.pack([colHour,colQuarter,colAction,colUser])
label = tf.pack([colLabel])
return features, label
def input_pipeline(batch_size, num_epochs=None):
filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
example, label = read_from_csv(filename_queue)
min_after_dequeue = 1000
capacity = min_after_dequeue + 3 * batch_size
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
file_length = file_len(dataset) - 1
examples, labels = input_pipeline(file_length, 1)
with tf.Session() as sess:
tf.initialize_all_variables().run()
# start populating filename queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
while not coord.should_stop():
example_batch, label_batch = sess.run([examples, labels])
print(example_batch)
except tf.errors.OutOfRangeError:
print('Done training, epoch reached')
finally:
coord.request_stop()
coord.join(threads)
我得到的错误是:
E tensorflow/core/client/tensor_c_api.cc:485] Attempting to use uninitialized value input_producer/limit_epochs/epochs
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
E tensorflow/core/client/tensor_c_api.cc:485] RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 10000, current size 0)
[[Node: shuffle_batch = QueueDequeueMany[_class=["loc:@shuffle_batch/random_shuffle_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
Done training, epoch reached
E tensorflow/core/client/tensor_c_api.cc:485] FIFOQueue '_0_input_producer' is closed and has insufficient elements (requested 1, current size 0)
[[Node: ReaderRead = ReaderRead[_class=["loc:@TextLineReader", "loc:@input_producer"], _device="/job:localhost/replica:0/task:0/cpu:0"](TextLineReader, input_producer)]]
E tensorflow/core/client/tensor_c_api.cc:485] Queue '_2_shuffle_batch/random_shuffle_queue' is already closed.
[[Node: shuffle_batch/random_shuffle_queue_Close = QueueClose[_class=["loc:@shuffle_batch/random_shuffle_queue"], cancel_pending_enqueues=false, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue)]]
Traceback (most recent call last):
File "csv_test.py", line 49, in <module>
coord.join(threads)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/coordinator.py", line 357, in join
six.reraise(*self._exc_info_to_raise)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/queue_runner.py", line 185, in _run
sess.run(enqueue_op)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 382, in run
run_metadata_ptr)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 655, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 723, in _do_run
target_list, options, run_metadata)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 743, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.FailedPreconditionError: Attempting to use uninitialized value input_producer/limit_epochs/epochs
[[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
Caused by op u'input_producer/limit_epochs/CountUpTo', defined at:
File "csv_test.py", line 31, in <module>
examples, labels = input_pipeline(file_length, 1)
File "csv_test.py", line 21, in input_pipeline
filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 194, in string_input_producer
summary_name="fraction_of_%d_full" % capacity)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 133, in input_producer
input_tensor = limit_epochs(input_tensor, num_epochs)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 84, in limit_epochs
counter = epochs.count_up_to(num_epochs)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 577, in count_up_to
return state_ops.count_up_to(self._variable, limit=limit)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 127, in count_up_to
result = _op_def_lib.apply_op("CountUpTo", ref=ref, limit=limit, name=name)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
self._traceback = _extract_stack()
我制作了包含五列的数据以与示例匹配。大致如下:
"v1","v2","v3","v4","v5"
1,1,1,3,10
4,2,1,10,8
1,4,1,9,3
3,3,1,1,5
3,4,1,4,3
3,2,1,5,8
1,1,1,9,7
4,1,1,4,9
2,3,1,8,4
提前致谢。
我认为你缺少的是 local 变量的初始化(例如 input_producer/limit_epochs/epochs
)。在 all 变量的初始化过程中,局部变量没有被初始化,顺便说一句,这很令人困惑。
您可以添加以下初始化操作,一次性初始化所有内容:
init_op = tf.group(tf.initialize_all_variables(),
tf.initialize_local_variables())
然后:
sess.run(init_op)