AbortedError: Session 902faae618c6a822 is not found
AbortedError: Session 902faae618c6a822 is not found
这是我的代码,它不能与 tpu 一起工作,但它在 google colab 中以 cpu/gpu 模式工作,我不知道为什么,有人知道原因吗?
我的tensorflow版本是1.15.0
x_train.shape 是 (13887, 3000),y_train.shape 是 (13887,8)
和
num_features = 301+1
sequence_length = 3000
embedding_dimension = 1000
非常感谢!
filter_sizes=[2,2,3,4]
def convolution():
inn = layers.Input(shape=(sequence_length, embedding_dimension, 1))
cnns = []
for size in filter_sizes:
conv = layers.Conv2D(filters=64, kernel_size=(size, embedding_dimension),
strides=1, padding='valid', activation='elu')(inn)
pool = layers.MaxPool2D(pool_size=(sequence_length-size+1, 1), padding='valid')(conv)
cnns.append(pool)
outt = layers.concatenate(cnns)
model = keras.Model(inputs=inn, outputs=outt)
return model
def cnn_mulfilter():
model = keras.Sequential([
layers.Embedding(input_dim=num_features, output_dim=embedding_dimension,
input_length=sequence_length),
layers.Reshape((sequence_length, embedding_dimension, 1)),
convolution(),
layers.Flatten(),
layers.Dense(20, activation='elu'),
layers.Dropout(0.2),
layers.Dense(8, activation='softmax')
])
return model
import os
resolver = tf.contrib.cluster_resolver.TPUClusterResolver('grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.contrib.distribute.initialize_tpu_system(resolver)
strategy = tf.contrib.distribute.TPUStrategy(resolver)
with strategy.scope():
model = cnn_mulfilter()
model.compile(optimizer=keras.optimizers.Adam(1e-4),
loss=keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'])
history = model.fit(x_train.astype(np.float32), y_train.astype(np.float32),
steps_per_epoch=batch_size,
epochs=30,
validation_data=(x_test.astype(np.float32), y_test.astype(np.float32)),
validation_freq=30)
报错如下
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py in _do_call(self, fn, *args)
1364 try:
-> 1365 return fn(*args)
1366 except errors.OpError as e:
10 frames
AbortedError: Session e6d3a6fe721e9009 is not found.
During handling of the above exception, another exception occurred:
AbortedError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py in _do_call(self, fn, *args)
1382 '\nsession_config.graph_options.rewrite_options.'
1383 'disable_meta_optimizer = True')
-> 1384 raise type(e)(node_def, op, message)
1385
1386 def _extend_graph(self):
AbortedError: Session e6d3a6fe721e9009 is not found.
问题是文件在TPU不支持的本地文件系统中,虽然错误信息很奇怪。
改用 TFRecords 解决了问题。
这是我的代码,它不能与 tpu 一起工作,但它在 google colab 中以 cpu/gpu 模式工作,我不知道为什么,有人知道原因吗?
我的tensorflow版本是1.15.0
x_train.shape 是 (13887, 3000),y_train.shape 是 (13887,8)
和
num_features = 301+1
sequence_length = 3000
embedding_dimension = 1000
非常感谢!
filter_sizes=[2,2,3,4]
def convolution():
inn = layers.Input(shape=(sequence_length, embedding_dimension, 1))
cnns = []
for size in filter_sizes:
conv = layers.Conv2D(filters=64, kernel_size=(size, embedding_dimension),
strides=1, padding='valid', activation='elu')(inn)
pool = layers.MaxPool2D(pool_size=(sequence_length-size+1, 1), padding='valid')(conv)
cnns.append(pool)
outt = layers.concatenate(cnns)
model = keras.Model(inputs=inn, outputs=outt)
return model
def cnn_mulfilter():
model = keras.Sequential([
layers.Embedding(input_dim=num_features, output_dim=embedding_dimension,
input_length=sequence_length),
layers.Reshape((sequence_length, embedding_dimension, 1)),
convolution(),
layers.Flatten(),
layers.Dense(20, activation='elu'),
layers.Dropout(0.2),
layers.Dense(8, activation='softmax')
])
return model
import os
resolver = tf.contrib.cluster_resolver.TPUClusterResolver('grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.contrib.distribute.initialize_tpu_system(resolver)
strategy = tf.contrib.distribute.TPUStrategy(resolver)
with strategy.scope():
model = cnn_mulfilter()
model.compile(optimizer=keras.optimizers.Adam(1e-4),
loss=keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'])
history = model.fit(x_train.astype(np.float32), y_train.astype(np.float32),
steps_per_epoch=batch_size,
epochs=30,
validation_data=(x_test.astype(np.float32), y_test.astype(np.float32)),
validation_freq=30)
报错如下
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py in _do_call(self, fn, *args)
1364 try:
-> 1365 return fn(*args)
1366 except errors.OpError as e:
10 frames
AbortedError: Session e6d3a6fe721e9009 is not found.
During handling of the above exception, another exception occurred:
AbortedError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py in _do_call(self, fn, *args)
1382 '\nsession_config.graph_options.rewrite_options.'
1383 'disable_meta_optimizer = True')
-> 1384 raise type(e)(node_def, op, message)
1385
1386 def _extend_graph(self):
AbortedError: Session e6d3a6fe721e9009 is not found.
问题是文件在TPU不支持的本地文件系统中,虽然错误信息很奇怪。
改用 TFRecords 解决了问题。