ValueError: Can't convert non-rectangular Python sequence to Tensor. (2)
ValueError: Can't convert non-rectangular Python sequence to Tensor. (2)
假设,我的数据如下(我们这里有90041行数据):
2.268 7.042 5.781 5.399 5.373 5.423 -9.118 5.488 5.166 4.852 7.470 6.452 6.069 0 0 0 1 0 1 1 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.101 5.781 5.399 5.373 5.423 5.247 5.488 5.166 4.852 5.164 6.452 6.069 6.197 0 1 1 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.222 5.399 5.373 5.423 5.247 5.485 5.166 4.852 5.164 4.943 6.069 6.197 6.434 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.416 5.373 5.423 5.247 5.485 6.675 4.852 5.164 4.943 8.103 6.197 6.434 8.264 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3.028 5.423 5.247 5.485 6.675 6.372 5.164 4.943 8.103 -9.152 6.434 8.264 9.047 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-1.235 5.247 5.485 6.675 6.372 5.669 4.943 8.103 -9.152 -8.536 8.264 9.047 11.954 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0.953 5.485 6.675 6.372 5.669 5.304 8.103 -9.152 -8.536 5.433 9.047 11.954 6.703 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.233 6.675 6.372 5.669 5.304 5.461 -9.152 -8.536 5.433 4.924 11.954 6.703 6.407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.313 6.372 5.669 5.304 5.461 5.265 -8.536 5.433 4.924 5.007 6.703 6.407 6.088 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
2.314 5.669 5.304 5.461 5.265 5.379 5.433 4.924 5.007 5.057 6.407 6.088 6.410 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
... ... ...
... ... ...
现在,让我们看看下面的源代码:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
def load_data_func(fname: str, yyy_index: int, **selection):
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y = [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[yyy_index+1:]] # convert 3rd to 20th word into a vector of float numbers.
y = float(row[yyy_index]) # select the 7th column.
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
# END for l in lines
num_rows = len(data_x)
print("row size = ", len(data_x[0]))
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y = data_x, data_y
else:
n = int(num_rows * given_fraction)
data_x, data_y = data_x[n:], data_y[n:]
valid_x, valid_y = data_x[:n], data_y[:n]
# END of if-else block
print("size of x = ", len(data_x))
print("size of y = ", len(data_y))
tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
ty = tf.convert_to_tensor(data_y, dtype=tf.float32)
vx = tf.convert_to_tensor(valid_x, dtype=tf.float32)
vy = tf.convert_to_tensor(valid_y, dtype=tf.float32)
return tx, ty, vx, vy
# END of the function
当我这样称呼它时:
train_x, train_y, validate_x, validate_y = \
load_data_func(
fname="data_file.dat",
yyy_index=6,
random_n_lines=90000,
validation_part=0.2
)
print("row count", len(train_x))
print("col count", len(train_x[0]))
我收到以下错误:
my_user@my_remote_server:~/my_project_dir$ python3 load_data_test.py
row size = 40
size of x = 72000
size of y = 72000
Traceback (most recent call last):
File "load_data_test.py", line 74, in <module>
validation_part=0.2
File "load_data_test.py", line 58, in load_data_func
tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py", line 206, in wrapper
return target(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1431, in convert_to_tensor_v2_with_dispatch
value, dtype=dtype, dtype_hint=dtype_hint, name=name)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1441, in convert_to_tensor_v2
as_ref=False)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py", line 163, in wrapped
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1566, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 339, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 265, in constant
allow_broadcast=True)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 276, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 301, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Can't convert non-rectangular Python sequence to Tensor.
my_user@my_remote_server:~/my_project_dir$
有趣的是,以下调用没有显示任何错误:
train_x, train_y, validate_x, validate_y = \
load_data_func(
fname="data_file.dat",
yyy_index=6,
top_n_lines=90000, #<============
validation_part=0.2
)
train_x, train_y, validate_x, validate_y = \
load_data_k(
fname="data_file.dat",
yyy_index=6,
random_n_lines=60000, #<=============
validation_part=0.2
)
我做错了什么?
你检查过你数据的最后一行了吗?如果它的行数与其他行的行数不同,则变量 data_x 不再是矩形,这会引发值错误,如 M.Innat 的 link 中所述。
它还可以解释为什么您使用前 n 行和 6000 行随机行没有错误(只要不包含最后一层,您的代码就可以工作)
假设,我的数据如下(我们这里有90041行数据):
2.268 7.042 5.781 5.399 5.373 5.423 -9.118 5.488 5.166 4.852 7.470 6.452 6.069 0 0 0 1 0 1 1 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.101 5.781 5.399 5.373 5.423 5.247 5.488 5.166 4.852 5.164 6.452 6.069 6.197 0 1 1 3 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.222 5.399 5.373 5.423 5.247 5.485 5.166 4.852 5.164 4.943 6.069 6.197 6.434 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.416 5.373 5.423 5.247 5.485 6.675 4.852 5.164 4.943 8.103 6.197 6.434 8.264 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3.028 5.423 5.247 5.485 6.675 6.372 5.164 4.943 8.103 -9.152 6.434 8.264 9.047 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-1.235 5.247 5.485 6.675 6.372 5.669 4.943 8.103 -9.152 -8.536 8.264 9.047 11.954 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0.953 5.485 6.675 6.372 5.669 5.304 8.103 -9.152 -8.536 5.433 9.047 11.954 6.703 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.233 6.675 6.372 5.669 5.304 5.461 -9.152 -8.536 5.433 4.924 11.954 6.703 6.407 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2.313 6.372 5.669 5.304 5.461 5.265 -8.536 5.433 4.924 5.007 6.703 6.407 6.088 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
2.314 5.669 5.304 5.461 5.265 5.379 5.433 4.924 5.007 5.057 6.407 6.088 6.410 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
... ... ...
... ... ...
现在,让我们看看下面的源代码:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
def load_data_func(fname: str, yyy_index: int, **selection):
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y = [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[yyy_index+1:]] # convert 3rd to 20th word into a vector of float numbers.
y = float(row[yyy_index]) # select the 7th column.
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
# END for l in lines
num_rows = len(data_x)
print("row size = ", len(data_x[0]))
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y = data_x, data_y
else:
n = int(num_rows * given_fraction)
data_x, data_y = data_x[n:], data_y[n:]
valid_x, valid_y = data_x[:n], data_y[:n]
# END of if-else block
print("size of x = ", len(data_x))
print("size of y = ", len(data_y))
tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
ty = tf.convert_to_tensor(data_y, dtype=tf.float32)
vx = tf.convert_to_tensor(valid_x, dtype=tf.float32)
vy = tf.convert_to_tensor(valid_y, dtype=tf.float32)
return tx, ty, vx, vy
# END of the function
当我这样称呼它时:
train_x, train_y, validate_x, validate_y = \
load_data_func(
fname="data_file.dat",
yyy_index=6,
random_n_lines=90000,
validation_part=0.2
)
print("row count", len(train_x))
print("col count", len(train_x[0]))
我收到以下错误:
my_user@my_remote_server:~/my_project_dir$ python3 load_data_test.py
row size = 40
size of x = 72000
size of y = 72000
Traceback (most recent call last):
File "load_data_test.py", line 74, in <module>
validation_part=0.2
File "load_data_test.py", line 58, in load_data_func
tx = tf.convert_to_tensor(data_x, dtype=tf.float32)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py", line 206, in wrapper
return target(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1431, in convert_to_tensor_v2_with_dispatch
value, dtype=dtype, dtype_hint=dtype_hint, name=name)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1441, in convert_to_tensor_v2
as_ref=False)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/profiler/trace.py", line 163, in wrapped
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1566, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 339, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 265, in constant
allow_broadcast=True)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 276, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 301, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py", line 98, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Can't convert non-rectangular Python sequence to Tensor.
my_user@my_remote_server:~/my_project_dir$
有趣的是,以下调用没有显示任何错误:
train_x, train_y, validate_x, validate_y = \
load_data_func(
fname="data_file.dat",
yyy_index=6,
top_n_lines=90000, #<============
validation_part=0.2
)
train_x, train_y, validate_x, validate_y = \
load_data_k(
fname="data_file.dat",
yyy_index=6,
random_n_lines=60000, #<=============
validation_part=0.2
)
我做错了什么?
你检查过你数据的最后一行了吗?如果它的行数与其他行的行数不同,则变量 data_x 不再是矩形,这会引发值错误,如 M.Innat 的 link 中所述。
它还可以解释为什么您使用前 n 行和 6000 行随机行没有错误(只要不包含最后一层,您的代码就可以工作)