Cupy 适用于 TITAN V,但不适用于 TITAN RTX
Cupy works well with TITAN V, but not with TITAN RTX
我正在使用 cupy 运行 带有 pytorch 的 cuda 代码。
我的环境是
ubuntu 20, anaconda-python 3.7.6, nvidia-driver 440, cuda 10.2, cupy-cuda102, torch 1.4.0
首先我写了一个简单的主代码
import data_load_test
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
def main():
dataset = data_load_test.DataLoadTest()
training_loader = DataLoader(dataset, batch_size=1)
with torch.cuda.device(0):
pbar = tqdm(training_loader)
for epoch in range(3):
for i, img in enumerate(pbar):
print("see the message")
if __name__ == "__main__":
main()
和这样的数据加载器。
from torch.utils.data import Dataset
import cv2
import cupy as cp
def read_cuda_file(cuda_path):
f = open(cuda_path, 'r')
source_line = ""
while True:
line = f.readline()
if not line: break
source_line = source_line + line
f.close()
return source_line
class DataLoadTest(Dataset):
def __init__(self):
source = read_cuda_file("cuda/cuda_code.cu")
cuda_source = '''{}'''.format(source)
module = cp.RawModule(code=cuda_source)
self.myfunc = module.get_function('myfunc')
self.input = cp.asarray(cv2.imread("hi.png",-1), cp.uint8)
h, w, c = self.input.shape
self.h = h
self.w = w
self.output = cp.zeros((w, h, 3), dtype=cp.uint8)
self.block_size = (32, 32)
self.grid_size = (h // self.block_size[1], w // self.block_size[0])
def __len__(self):
return 1
def __getitem__(self, idx):
self.myfunc(self.grid_size, self.block_size, (self.input, self.output, self.h, self.w))
return cp.asnumpy(self.output)
我的 cuda 代码是,
#define PI 3.14159265358979323846f
extern "C"{
__global__ void myfunc(const unsigned char* refImg, unsigned char* warpImg, const long long cols, const long long rows)
{
long long x = blockDim.x * blockIdx.x + threadIdx.x;
long long y = blockDim.y * blockIdx.y + threadIdx.y;
long long indexImg = y * cols + x;
warpImg[indexImg * 3] = 0;
warpImg[indexImg * 3 + 1] = 1;
warpImg[indexImg * 3 + 2] = 2;
}
}
我有两个 GPU TITAN V(设备 0)和 TITAN RTX(设备 1)
当我运行此代码与TITAN V时,(主函数第3行)
with torch.cuda.device(0):
它工作正常,但是
使用 TITAN RTX,
with torch.cuda.device(1):
它给出了这样的错误信息。
File "cupy/core/raw.pyx", line 66, in cupy.core.raw.RawKernel.__call__
File "cupy/cuda/function.pyx", line 162, in cupy.cuda.function.Function.__call__
File "cupy/cuda/function.pyx", line 144, in cupy.cuda.function._launch
File "cupy/cuda/driver.pyx", line 293, in cupy.cuda.driver.launchKernel
File "cupy/cuda/driver.pyx", line 118, in cupy.cuda.driver.check_status
cupy.cuda.driver.CUDADriverError: CUDA_ERROR_CONTEXT_IS_DESTROYED: context is destroyed
请帮忙。
在 main() 中,当 dataLoadTest() class 被实例化时,它发生在默认设备 0 上,因此 cuPy 正在那里编译 myFunc()。
下一行“with torch.cuda.device(0):”是你在失败的版本中切换到设备1的地方?
如果你打电话会发生什么
cuPy.cuda.Device(1).use()
作为 main() 中的第一行,以确保 myFunc() 在设备 1 上被实例化?
我正在使用 cupy 运行 带有 pytorch 的 cuda 代码。
我的环境是 ubuntu 20, anaconda-python 3.7.6, nvidia-driver 440, cuda 10.2, cupy-cuda102, torch 1.4.0
首先我写了一个简单的主代码
import data_load_test
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
def main():
dataset = data_load_test.DataLoadTest()
training_loader = DataLoader(dataset, batch_size=1)
with torch.cuda.device(0):
pbar = tqdm(training_loader)
for epoch in range(3):
for i, img in enumerate(pbar):
print("see the message")
if __name__ == "__main__":
main()
和这样的数据加载器。
from torch.utils.data import Dataset
import cv2
import cupy as cp
def read_cuda_file(cuda_path):
f = open(cuda_path, 'r')
source_line = ""
while True:
line = f.readline()
if not line: break
source_line = source_line + line
f.close()
return source_line
class DataLoadTest(Dataset):
def __init__(self):
source = read_cuda_file("cuda/cuda_code.cu")
cuda_source = '''{}'''.format(source)
module = cp.RawModule(code=cuda_source)
self.myfunc = module.get_function('myfunc')
self.input = cp.asarray(cv2.imread("hi.png",-1), cp.uint8)
h, w, c = self.input.shape
self.h = h
self.w = w
self.output = cp.zeros((w, h, 3), dtype=cp.uint8)
self.block_size = (32, 32)
self.grid_size = (h // self.block_size[1], w // self.block_size[0])
def __len__(self):
return 1
def __getitem__(self, idx):
self.myfunc(self.grid_size, self.block_size, (self.input, self.output, self.h, self.w))
return cp.asnumpy(self.output)
我的 cuda 代码是,
#define PI 3.14159265358979323846f
extern "C"{
__global__ void myfunc(const unsigned char* refImg, unsigned char* warpImg, const long long cols, const long long rows)
{
long long x = blockDim.x * blockIdx.x + threadIdx.x;
long long y = blockDim.y * blockIdx.y + threadIdx.y;
long long indexImg = y * cols + x;
warpImg[indexImg * 3] = 0;
warpImg[indexImg * 3 + 1] = 1;
warpImg[indexImg * 3 + 2] = 2;
}
}
我有两个 GPU TITAN V(设备 0)和 TITAN RTX(设备 1)
当我运行此代码与TITAN V时,(主函数第3行)
with torch.cuda.device(0):
它工作正常,但是
使用 TITAN RTX,
with torch.cuda.device(1):
它给出了这样的错误信息。
File "cupy/core/raw.pyx", line 66, in cupy.core.raw.RawKernel.__call__
File "cupy/cuda/function.pyx", line 162, in cupy.cuda.function.Function.__call__
File "cupy/cuda/function.pyx", line 144, in cupy.cuda.function._launch
File "cupy/cuda/driver.pyx", line 293, in cupy.cuda.driver.launchKernel
File "cupy/cuda/driver.pyx", line 118, in cupy.cuda.driver.check_status
cupy.cuda.driver.CUDADriverError: CUDA_ERROR_CONTEXT_IS_DESTROYED: context is destroyed
请帮忙。
在 main() 中,当 dataLoadTest() class 被实例化时,它发生在默认设备 0 上,因此 cuPy 正在那里编译 myFunc()。
下一行“with torch.cuda.device(0):”是你在失败的版本中切换到设备1的地方?
如果你打电话会发生什么
cuPy.cuda.Device(1).use()
作为 main() 中的第一行,以确保 myFunc() 在设备 1 上被实例化?