如何*正确*将数据从 csv 读取到 TensorFlow 1

How to *correctly* read data from csv's into TensorFlow 1

我发现 向我们展示了如何设置代码以使用队列读取 csv 文件。但是,每次我运行它时,我运行都会出错。我试过调试它,但无法弄清楚错误的含义。谁能帮帮我?

我使用的代码几乎是逐字逐句地写在上面 post 中 post:

import tensorflow as tf

dataset = '/Users/hdadmin/Data/actions/testing.csv'

def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1

def read_from_csv(filename_queue):
  reader = tf.TextLineReader(skip_header_lines=1)
  _, csv_row = reader.read(filename_queue)
  record_defaults = [[0],[0],[0],[0],[0]]
  colHour,colQuarter,colAction,colUser,colLabel = tf.decode_csv(csv_row, record_defaults=record_defaults)
  features = tf.pack([colHour,colQuarter,colAction,colUser])  
  label = tf.pack([colLabel])  
  return features, label

def input_pipeline(batch_size, num_epochs=None):
  filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)  
  example, label = read_from_csv(filename_queue)
  min_after_dequeue = 1000
  capacity = min_after_dequeue + 3 * batch_size
  example_batch, label_batch = tf.train.shuffle_batch(
      [example, label], batch_size=batch_size, capacity=capacity,
      min_after_dequeue=min_after_dequeue)
  return example_batch, label_batch

file_length = file_len(dataset) - 1
examples, labels = input_pipeline(file_length, 1)

with tf.Session() as sess:
  tf.initialize_all_variables().run()

  # start populating filename queue
  coord = tf.train.Coordinator()
  threads = tf.train.start_queue_runners(coord=coord)

  try:
    while not coord.should_stop():
      example_batch, label_batch = sess.run([examples, labels])
      print(example_batch)
  except tf.errors.OutOfRangeError:
    print('Done training, epoch reached')
  finally:
    coord.request_stop()

  coord.join(threads) 

我得到的错误是:

E tensorflow/core/client/tensor_c_api.cc:485] Attempting to use uninitialized value input_producer/limit_epochs/epochs
     [[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
E tensorflow/core/client/tensor_c_api.cc:485] RandomShuffleQueue '_2_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 10000, current size 0)
     [[Node: shuffle_batch = QueueDequeueMany[_class=["loc:@shuffle_batch/random_shuffle_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]
Done training, epoch reached
E tensorflow/core/client/tensor_c_api.cc:485] FIFOQueue '_0_input_producer' is closed and has insufficient elements (requested 1, current size 0)
     [[Node: ReaderRead = ReaderRead[_class=["loc:@TextLineReader", "loc:@input_producer"], _device="/job:localhost/replica:0/task:0/cpu:0"](TextLineReader, input_producer)]]
E tensorflow/core/client/tensor_c_api.cc:485] Queue '_2_shuffle_batch/random_shuffle_queue' is already closed.
     [[Node: shuffle_batch/random_shuffle_queue_Close = QueueClose[_class=["loc:@shuffle_batch/random_shuffle_queue"], cancel_pending_enqueues=false, _device="/job:localhost/replica:0/task:0/cpu:0"](shuffle_batch/random_shuffle_queue)]]
Traceback (most recent call last):
  File "csv_test.py", line 49, in <module>
    coord.join(threads) 
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/coordinator.py", line 357, in join
    six.reraise(*self._exc_info_to_raise)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/queue_runner.py", line 185, in _run
    sess.run(enqueue_op)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 382, in run
    run_metadata_ptr)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 655, in _run
    feed_dict_string, options, run_metadata)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 723, in _do_run
    target_list, options, run_metadata)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 743, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.FailedPreconditionError: Attempting to use uninitialized value input_producer/limit_epochs/epochs
     [[Node: input_producer/limit_epochs/CountUpTo = CountUpTo[T=DT_INT64, _class=["loc:@input_producer/limit_epochs/epochs"], limit=1, _device="/job:localhost/replica:0/task:0/cpu:0"](input_producer/limit_epochs/epochs)]]
Caused by op u'input_producer/limit_epochs/CountUpTo', defined at:
  File "csv_test.py", line 31, in <module>
    examples, labels = input_pipeline(file_length, 1)
  File "csv_test.py", line 21, in input_pipeline
    filename_queue = tf.train.string_input_producer([dataset], num_epochs=num_epochs, shuffle=True)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 194, in string_input_producer
    summary_name="fraction_of_%d_full" % capacity)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 133, in input_producer
    input_tensor = limit_epochs(input_tensor, num_epochs)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 84, in limit_epochs
    counter = epochs.count_up_to(num_epochs)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 577, in count_up_to
    return state_ops.count_up_to(self._variable, limit=limit)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_state_ops.py", line 127, in count_up_to
    result = _op_def_lib.apply_op("CountUpTo", ref=ref, limit=limit, name=name)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
    op_def=op_def)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
    self._traceback = _extract_stack()

我制作了包含五列的数据以与示例匹配。大致如下:

"v1","v2","v3","v4","v5"
1,1,1,3,10
4,2,1,10,8
1,4,1,9,3
3,3,1,1,5
3,4,1,4,3
3,2,1,5,8
1,1,1,9,7
4,1,1,4,9
2,3,1,8,4

提前致谢。

我认为你缺少的是 local 变量的初始化(例如 input_producer/limit_epochs/epochs)。在 all 变量的初始化过程中,局部变量没有被初始化,顺便说一句,这很令人困惑。

您可以添加以下初始化操作,一次性初始化所有内容:

init_op = tf.group(tf.initialize_all_variables(),
                   tf.initialize_local_variables())

然后:

sess.run(init_op)