使用 GPU 时未找到 Tensorflow _tpu_ops.so
Tensorflow _tpu_ops.so not found while using GPU
我将这个 BERT NER github 代码移植到 google colab,在那里我手动将标志设置为 运行 它 (https://github.com/kyzhouhzau/BERT-NER)。
我把use_tpu设为False,所以应该是用GPU。
flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
colab上使用的TF版本是1.13.1,命令tf.test.gpu_device_name() returns '/device:GPU:0'.
这是我在 运行ning tf.app.run() 时收到的错误消息。失败是因为它正在寻找 TPU 吗?我该如何解决?感谢您的帮助!
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
<ipython-input-53-d10a9cf14e41> in <module>()
----> 1 tf.app.run()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/platform/app.py in run(main, argv)
123 # Call the main function, passing through any arguments
124 # to the final program.
--> 125 _sys.exit(main(argv))
126
<ipython-input-26-fed5e3d99ff6> in main(_)
29 FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
30
---> 31 is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
32
33 run_config = tf.contrib.tpu.RunConfig(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/lazy_loader.py in __getattr__(self, item)
59
60 def __getattr__(self, item):
---> 61 module = self._load()
62 return getattr(module, item)
63
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/lazy_loader.py in _load(self)
42 """Load the module and insert it into the parent's globals."""
43 # Import the target module and insert it into the parent's namespace
---> 44 module = importlib.import_module(self.__name__)
45 self._parent_module_globals[self._local_name] = module
46
/usr/lib/python3.6/importlib/__init__.py in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
127
128
/usr/lib/python3.6/importlib/_bootstrap.py in _gcd_import(name, package, level)
/usr/lib/python3.6/importlib/_bootstrap.py in _find_and_load(name, import_)
/usr/lib/python3.6/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)
/usr/lib/python3.6/importlib/_bootstrap.py in _load_unlocked(spec)
/usr/lib/python3.6/importlib/_bootstrap_external.py in exec_module(self, module)
/usr/lib/python3.6/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/__init__.py in <module>()
38 from tensorflow.contrib import data
39 from tensorflow.contrib import deprecated
---> 40 from tensorflow.contrib import distribute
41 from tensorflow.contrib import distributions
42 from tensorflow.contrib import estimator
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/distribute/__init__.py in <module>()
31 from tensorflow.contrib.distribute.python.parameter_server_strategy import ParameterServerStrategy
32 from tensorflow.contrib.distribute.python.step_fn import *
---> 33 from tensorflow.contrib.distribute.python.tpu_strategy import TPUStrategy
34 from tensorflow.python.distribute.cross_device_ops import *
35 from tensorflow.python.distribute.distribute_config import DistributeConfig
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/distribute/python/tpu_strategy.py in <module>()
25 import functools
26
---> 27 from tensorflow.contrib.tpu.python.ops import tpu_ops
28 from tensorflow.contrib.tpu.python.tpu import tpu
29 from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/tpu/__init__.py in <module>()
67 # pylint: disable=wildcard-import,unused-import
68 from tensorflow.contrib.tpu.python import profiler
---> 69 from tensorflow.contrib.tpu.python.ops.tpu_ops import *
70 from tensorflow.contrib.tpu.python.tpu.async_checkpoint import *
71 from tensorflow.contrib.tpu.python.tpu.bfloat16 import *
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/tpu/python/ops/tpu_ops.py in <module>()
37
38 _tpu_ops = loader.load_op_library(
---> 39 resource_loader.get_path_to_datafile("_tpu_ops.so"))
40
41 def _create_default_group_assignment():
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/util/loader.py in load_op_library(path)
54 return None
55 path = resource_loader.get_path_to_datafile(path)
---> 56 ret = load_library.load_op_library(path)
57 assert ret, 'Could not load %s' % path
58 return ret
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/load_library.py in load_op_library(library_filename)
59 RuntimeError: when unable to load the library or get the python wrappers.
60 """
---> 61 lib_handle = py_tf.TF_LoadLibrary(library_filename)
62
63 op_list_str = py_tf.TF_GetOpList(lib_handle)
NotFoundError: /usr/local/lib/python3.6/dist-packages/tensorflow/contrib/tpu/python/ops/_tpu_ops.so: undefined symbol: _ZN6google8protobuf5Arena18CreateMaybeMessageIN10tensorflow9AttrValueEIEEEPT_PS1_DpOT0_
我明白了。当我从 https://github.com/guillaumegenthial/tf_metrics.git using !pip install git+https://github.com/guillaumegenthial/tf_metrics.git 下载 tf_metrics 库时,它以某种方式重新安装了 tensorflow-gpu,我猜它损坏了它。
我单独下载了 tf_metrics.py,它现在正在 google colab 上工作。
我将这个 BERT NER github 代码移植到 google colab,在那里我手动将标志设置为 运行 它 (https://github.com/kyzhouhzau/BERT-NER)。
我把use_tpu设为False,所以应该是用GPU。
flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
colab上使用的TF版本是1.13.1,命令tf.test.gpu_device_name() returns '/device:GPU:0'.
这是我在 运行ning tf.app.run() 时收到的错误消息。失败是因为它正在寻找 TPU 吗?我该如何解决?感谢您的帮助!
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
<ipython-input-53-d10a9cf14e41> in <module>()
----> 1 tf.app.run()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/platform/app.py in run(main, argv)
123 # Call the main function, passing through any arguments
124 # to the final program.
--> 125 _sys.exit(main(argv))
126
<ipython-input-26-fed5e3d99ff6> in main(_)
29 FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
30
---> 31 is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
32
33 run_config = tf.contrib.tpu.RunConfig(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/lazy_loader.py in __getattr__(self, item)
59
60 def __getattr__(self, item):
---> 61 module = self._load()
62 return getattr(module, item)
63
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/lazy_loader.py in _load(self)
42 """Load the module and insert it into the parent's globals."""
43 # Import the target module and insert it into the parent's namespace
---> 44 module = importlib.import_module(self.__name__)
45 self._parent_module_globals[self._local_name] = module
46
/usr/lib/python3.6/importlib/__init__.py in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
127
128
/usr/lib/python3.6/importlib/_bootstrap.py in _gcd_import(name, package, level)
/usr/lib/python3.6/importlib/_bootstrap.py in _find_and_load(name, import_)
/usr/lib/python3.6/importlib/_bootstrap.py in _find_and_load_unlocked(name, import_)
/usr/lib/python3.6/importlib/_bootstrap.py in _load_unlocked(spec)
/usr/lib/python3.6/importlib/_bootstrap_external.py in exec_module(self, module)
/usr/lib/python3.6/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/__init__.py in <module>()
38 from tensorflow.contrib import data
39 from tensorflow.contrib import deprecated
---> 40 from tensorflow.contrib import distribute
41 from tensorflow.contrib import distributions
42 from tensorflow.contrib import estimator
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/distribute/__init__.py in <module>()
31 from tensorflow.contrib.distribute.python.parameter_server_strategy import ParameterServerStrategy
32 from tensorflow.contrib.distribute.python.step_fn import *
---> 33 from tensorflow.contrib.distribute.python.tpu_strategy import TPUStrategy
34 from tensorflow.python.distribute.cross_device_ops import *
35 from tensorflow.python.distribute.distribute_config import DistributeConfig
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/distribute/python/tpu_strategy.py in <module>()
25 import functools
26
---> 27 from tensorflow.contrib.tpu.python.ops import tpu_ops
28 from tensorflow.contrib.tpu.python.tpu import tpu
29 from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/tpu/__init__.py in <module>()
67 # pylint: disable=wildcard-import,unused-import
68 from tensorflow.contrib.tpu.python import profiler
---> 69 from tensorflow.contrib.tpu.python.ops.tpu_ops import *
70 from tensorflow.contrib.tpu.python.tpu.async_checkpoint import *
71 from tensorflow.contrib.tpu.python.tpu.bfloat16 import *
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/tpu/python/ops/tpu_ops.py in <module>()
37
38 _tpu_ops = loader.load_op_library(
---> 39 resource_loader.get_path_to_datafile("_tpu_ops.so"))
40
41 def _create_default_group_assignment():
/usr/local/lib/python3.6/dist-packages/tensorflow/contrib/util/loader.py in load_op_library(path)
54 return None
55 path = resource_loader.get_path_to_datafile(path)
---> 56 ret = load_library.load_op_library(path)
57 assert ret, 'Could not load %s' % path
58 return ret
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/load_library.py in load_op_library(library_filename)
59 RuntimeError: when unable to load the library or get the python wrappers.
60 """
---> 61 lib_handle = py_tf.TF_LoadLibrary(library_filename)
62
63 op_list_str = py_tf.TF_GetOpList(lib_handle)
NotFoundError: /usr/local/lib/python3.6/dist-packages/tensorflow/contrib/tpu/python/ops/_tpu_ops.so: undefined symbol: _ZN6google8protobuf5Arena18CreateMaybeMessageIN10tensorflow9AttrValueEIEEEPT_PS1_DpOT0_
我明白了。当我从 https://github.com/guillaumegenthial/tf_metrics.git using !pip install git+https://github.com/guillaumegenthial/tf_metrics.git 下载 tf_metrics 库时,它以某种方式重新安装了 tensorflow-gpu,我猜它损坏了它。
我单独下载了 tf_metrics.py,它现在正在 google colab 上工作。