ValueError: Can't convert non-rectangular Python sequence to Tensor. (2)

ValueError: Can't convert non-rectangular Python sequence to Tensor. (2)

假设,我的数据如下(我们这里有90041行数据):

 2.268    7.042 5.781 5.399 5.373 5.423  -9.118   5.488   5.166   4.852  7.470  6.452  6.069     0 0 0 1 0 1 1 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.101    5.781 5.399 5.373 5.423 5.247   5.488   5.166   4.852   5.164  6.452  6.069  6.197     0 1 1 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.222    5.399 5.373 5.423 5.247 5.485   5.166   4.852   5.164   4.943  6.069  6.197  6.434     0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.416    5.373 5.423 5.247 5.485 6.675   4.852   5.164   4.943   8.103  6.197  6.434  8.264     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 3.028    5.423 5.247 5.485 6.675 6.372   5.164   4.943   8.103  -9.152  6.434  8.264  9.047     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-1.235    5.247 5.485 6.675 6.372 5.669   4.943   8.103  -9.152  -8.536  8.264  9.047 11.954     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0.953    5.485 6.675 6.372 5.669 5.304   8.103  -9.152  -8.536   5.433  9.047 11.954  6.703     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.233    6.675 6.372 5.669 5.304 5.461  -9.152  -8.536   5.433   4.924 11.954  6.703  6.407     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2.313    6.372 5.669 5.304 5.461 5.265  -8.536   5.433   4.924   5.007  6.703  6.407  6.088     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 2.314    5.669 5.304 5.461 5.265 5.379   5.433   4.924   5.007   5.057  6.407  6.088  6.410     0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
... ... ...
... ... ...

现在,让我们看看下面的源代码:

import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import sys, random
import time

import tensorflow as tf
from   tensorflow import keras
from   tensorflow.keras.models import Sequential
from   tensorflow.keras.layers import Dense
from   tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np

def load_data_func(fname: str, yyy_index: int, **selection):
    i = 0
    file = open(fname)
    if "top_n_lines" in selection:
        lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
    elif "random_n_lines" in selection:
        tmp_lines = file.readlines()
        lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
    else:
        lines = file.readlines()

    data_x, data_y = [], []
    for l in lines:
        row = l.strip().split()  # return a list of words from the line.
        x = [float(ix) for ix in row[yyy_index+1:]]  # convert 3rd to 20th word into a vector of float numbers.
        y = float(row[yyy_index])  # select the 7th column.
        data_x.append(x)  # append the vector into 'data_x'
        data_y.append(y)  # append the vector into 'data_y'
    # END for l in lines

    num_rows = len(data_x)

    print("row size = ", len(data_x[0]))

    given_fraction = selection.get("validation_part", 1.0)
    if given_fraction > 0.9999:
        valid_x, valid_y = data_x, data_y
    else:
        n = int(num_rows * given_fraction)
        data_x, data_y = data_x[n:], data_y[n:]
        valid_x, valid_y = data_x[:n], data_y[:n]
    # END of if-else block

    print("size of x = ", len(data_x))
    print("size of y = ", len(data_y))

    tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
    ty = tf.convert_to_tensor(data_y, dtype=tf.float32)

    vx = tf.convert_to_tensor(valid_x, dtype=tf.float32)
    vy = tf.convert_to_tensor(valid_y, dtype=tf.float32)

    return tx, ty, vx, vy
# END of the function

当我这样称呼它时:

train_x, train_y, validate_x, validate_y = \
        load_data_func(
            fname="data_file.dat",
            yyy_index=6,
            random_n_lines=90000,
            validation_part=0.2
        )

print("row count", len(train_x))
print("col count", len(train_x[0]))

我收到以下错误:

my_user@my_remote_server:~/my_project_dir$ python3 load_data_test.py
row size =  40
size of x =  72000
size of y =  72000
Traceback (most recent call last):
  File "load_data_test.py", line 74, in <module>
    validation_part=0.2
  File "load_data_test.py", line 58, in load_data_func
    tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py", line 206, in wrapper
    return target(*args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1431, in convert_to_tensor_v2_with_dispatch
    value, dtype=dtype, dtype_hint=dtype_hint, name=name)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1441, in convert_to_tensor_v2
    as_ref=False)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py", line 163, in wrapped
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1566, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 339, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 265, in constant
    allow_broadcast=True)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 276, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 301, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Can't convert non-rectangular Python sequence to Tensor.
my_user@my_remote_server:~/my_project_dir$

有趣的是,以下调用没有显示任何错误:

train_x, train_y, validate_x, validate_y = \
        load_data_func(
            fname="data_file.dat",
            yyy_index=6,
            top_n_lines=90000, #<============
            validation_part=0.2
        )


train_x, train_y, validate_x, validate_y = \
    load_data_k(
        fname="data_file.dat",
        yyy_index=6,
        random_n_lines=60000,  #<=============
        validation_part=0.2
    )

我做错了什么?

你检查过你数据的最后一行了吗?如果它的行数与其他行的行数不同,则变量 data_x 不再是矩形,这会引发值错误,如 M.Innat 的 link 中所述。
它还可以解释为什么您使用前 n 行和 6000 行随机行没有错误(只要不包含最后一层,您的代码就可以工作)