为什么在 WSL 中使用 RAPIDS 时会出现 CUDA 内存错误?
Why do I get a CUDA memory error when using RAPIDS in WSL?
我在 Windows 21H2 (19044.1348) 下安装了 WSL 2 (5.10.60.1-microsoft-standard-WSL2),并使用带有 Pascal GPU (1070) 的 NVidia 驱动程序 510.06。
我在 WSL (20.04.3 LTS) 中使用默认的 ubuntu 版本
我尝试了 docker 和 anaconda 版本。我可以 运行 Jupiter Notebook 并导入图书馆的。您还可以创建一个 cudf 数据报。但是写入它或发出任何其他内容都会导致内存错误。
buf = rmm.DeviceBuffer(size=100)
给我(有一次 运行 没有错误,但现在没有了)
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
/tmp/ipykernel_2220/3317065296.py in <module>
1 import rmm
----> 2 buf = rmm.DeviceBuffer(size=100)
rmm/_lib/device_buffer.pyx in rmm._lib.device_buffer.DeviceBuffer.__cinit__()
MemoryError: std::bad_alloc: CUDA error at: /home/user/miniconda3/envs/rapids-21.10/include/rmm/mr/device/cuda_memory_resource.hpp:70: cudaErrorNotSupported operation not supported
和
gdf_float = cudf.DataFrame()
gdf_float['0'] = [1.0, 2.0, 5.0]
gdf_float['1'] = [4.0, 2.0, 1.0]
gdf_float['2'] = [4.0, 2.0, 1.0]
给我
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/column/column.py in as_column(arbitrary, nan_as_null, dtype, length)
2026 data = as_column(
-> 2027 memoryview(arbitrary), dtype=dtype, nan_as_null=nan_as_null
2028 )
TypeError: memoryview: a bytes-like object is required, not 'list'
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_2220/2068985133.py in <module>
1 gdf_float = cudf.DataFrame()
----> 2 gdf_float['0'] = [1.0, 2.0, 5.0]
3 gdf_float['1'] = [4.0, 2.0, 1.0]
4 gdf_float['2'] = [4.0, 2.0, 1.0]
~/miniconda3/envs/rapids-21.10/lib/python3.7/contextlib.py in inner(*args, **kwds)
72 def inner(*args, **kwds):
73 with self._recreate_cm():
---> 74 return func(*args, **kwds)
75 return inner
76
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/dataframe.py in __setitem__(self, arg, value)
766 # disc. with pandas here
767 # pandas raises key error here
--> 768 self.insert(len(self._data), arg, value)
769
770 elif can_convert_to_column(arg):
~/miniconda3/envs/rapids-21.10/lib/python3.7/contextlib.py in inner(*args, **kwds)
72 def inner(*args, **kwds):
73 with self._recreate_cm():
---> 74 return func(*args, **kwds)
75 return inner
76
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/dataframe.py in insert(self, loc, name, value)
3276 )
3277
-> 3278 value = column.as_column(value)
3279
3280 self._data.insert(name, value, loc=loc)
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/column/column.py in as_column(arbitrary, nan_as_null, dtype, length)
2100 ),
2101 dtype=dtype,
-> 2102 nan_as_null=nan_as_null,
2103 )
2104 except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError):
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/column/column.py in as_column(arbitrary, nan_as_null, dtype, length)
1794 "https://issues.apache.org/jira/browse/ARROW-3802"
1795 )
-> 1796 col = ColumnBase.from_arrow(arbitrary)
1797 if isinstance(arbitrary, pa.NullArray):
1798 if type(dtype) == str and dtype == "empty":
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/column/column.py in from_arrow(cls, array)
305 return cudf.core.column.Decimal64Column.from_arrow(array)
306
--> 307 result = libcudf.interop.from_arrow(data, data.column_names)[0]["None"]
308
309 result = result._with_type_metadata(
cudf/_lib/interop.pyx in cudf._lib.interop.from_arrow()
RuntimeError: CUDA error encountered at: ../src/bitmask/null_mask.cu:93: 801 cudaErrorNotSupported operation not supported
如果这是相关的
我的系统内存总是接近满 (16GB),Vmmem 占用大约 10GB
我的显存只有 1,4/8GB
遗憾的是,WSL2 上的 RAPIDS 只能在具有 RAPIDS 21.08 的 Pascal GPU 上运行,而不能在 21.10 或更高版本上运行。请尝试 21.08。这些版本仍处于试验阶段,因此 YMMV。
我在 Windows 21H2 (19044.1348) 下安装了 WSL 2 (5.10.60.1-microsoft-standard-WSL2),并使用带有 Pascal GPU (1070) 的 NVidia 驱动程序 510.06。 我在 WSL (20.04.3 LTS) 中使用默认的 ubuntu 版本 我尝试了 docker 和 anaconda 版本。我可以 运行 Jupiter Notebook 并导入图书馆的。您还可以创建一个 cudf 数据报。但是写入它或发出任何其他内容都会导致内存错误。
buf = rmm.DeviceBuffer(size=100)
给我(有一次 运行 没有错误,但现在没有了)
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
/tmp/ipykernel_2220/3317065296.py in <module>
1 import rmm
----> 2 buf = rmm.DeviceBuffer(size=100)
rmm/_lib/device_buffer.pyx in rmm._lib.device_buffer.DeviceBuffer.__cinit__()
MemoryError: std::bad_alloc: CUDA error at: /home/user/miniconda3/envs/rapids-21.10/include/rmm/mr/device/cuda_memory_resource.hpp:70: cudaErrorNotSupported operation not supported
和
gdf_float = cudf.DataFrame()
gdf_float['0'] = [1.0, 2.0, 5.0]
gdf_float['1'] = [4.0, 2.0, 1.0]
gdf_float['2'] = [4.0, 2.0, 1.0]
给我
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/column/column.py in as_column(arbitrary, nan_as_null, dtype, length)
2026 data = as_column(
-> 2027 memoryview(arbitrary), dtype=dtype, nan_as_null=nan_as_null
2028 )
TypeError: memoryview: a bytes-like object is required, not 'list'
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_2220/2068985133.py in <module>
1 gdf_float = cudf.DataFrame()
----> 2 gdf_float['0'] = [1.0, 2.0, 5.0]
3 gdf_float['1'] = [4.0, 2.0, 1.0]
4 gdf_float['2'] = [4.0, 2.0, 1.0]
~/miniconda3/envs/rapids-21.10/lib/python3.7/contextlib.py in inner(*args, **kwds)
72 def inner(*args, **kwds):
73 with self._recreate_cm():
---> 74 return func(*args, **kwds)
75 return inner
76
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/dataframe.py in __setitem__(self, arg, value)
766 # disc. with pandas here
767 # pandas raises key error here
--> 768 self.insert(len(self._data), arg, value)
769
770 elif can_convert_to_column(arg):
~/miniconda3/envs/rapids-21.10/lib/python3.7/contextlib.py in inner(*args, **kwds)
72 def inner(*args, **kwds):
73 with self._recreate_cm():
---> 74 return func(*args, **kwds)
75 return inner
76
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/dataframe.py in insert(self, loc, name, value)
3276 )
3277
-> 3278 value = column.as_column(value)
3279
3280 self._data.insert(name, value, loc=loc)
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/column/column.py in as_column(arbitrary, nan_as_null, dtype, length)
2100 ),
2101 dtype=dtype,
-> 2102 nan_as_null=nan_as_null,
2103 )
2104 except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError):
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/column/column.py in as_column(arbitrary, nan_as_null, dtype, length)
1794 "https://issues.apache.org/jira/browse/ARROW-3802"
1795 )
-> 1796 col = ColumnBase.from_arrow(arbitrary)
1797 if isinstance(arbitrary, pa.NullArray):
1798 if type(dtype) == str and dtype == "empty":
~/miniconda3/envs/rapids-21.10/lib/python3.7/site-packages/cudf/core/column/column.py in from_arrow(cls, array)
305 return cudf.core.column.Decimal64Column.from_arrow(array)
306
--> 307 result = libcudf.interop.from_arrow(data, data.column_names)[0]["None"]
308
309 result = result._with_type_metadata(
cudf/_lib/interop.pyx in cudf._lib.interop.from_arrow()
RuntimeError: CUDA error encountered at: ../src/bitmask/null_mask.cu:93: 801 cudaErrorNotSupported operation not supported
如果这是相关的 我的系统内存总是接近满 (16GB),Vmmem 占用大约 10GB 我的显存只有 1,4/8GB
遗憾的是,WSL2 上的 RAPIDS 只能在具有 RAPIDS 21.08 的 Pascal GPU 上运行,而不能在 21.10 或更高版本上运行。请尝试 21.08。这些版本仍处于试验阶段,因此 YMMV。