如何在pytorch中使用GPU制作class
How to make a class in pytorch use GPU
所以我 运行 一些代码并在 Pytorch 中得到以下错误:"RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same"
据我了解,这意味着我的模型可能不会被推送到 GPU,而输入数据已经在使用 GPU。如果有帮助,我可以分享我的代码(我现在不这样做,因为它比一小段代码长)。
我知道我可以做类似
的事情
myModel=Model()
myModel.cuda()
但是,我正在制作一个 class 作为将添加到顺序包装器的自定义模块的一部分。所以,我真的不能用它来制作一个对象(我不擅长 OOP 术语,所以对于任何技术写作错误我深表歉意)。我想知道是否有办法解决这个问题,并使 class 始终使用 GPU,即使我从未明确定义对象?
如果这还不够清楚,我可以 post 我的代码,但如前所述,可能需要一些时间才能完成(不会太长,但也不是很方便)。
非常感谢任何帮助。
编辑:这是代码,我认为问题出在 RLSTM class,因为在我添加它之前没有错误。
class VGG(nn.Module):
'''
VGG model
'''
def __init__(self, features): # features represents the layers array
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512,512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 512),
nn.ReLU(True),
nn.Linear(512, 10),
)
# Initialize weights
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
m.bias.data.zero_()
def forward(self, x): # x is the image, we run x through the layers
print(x.size())
x = self.features(x) # runs through all features, where each feature is a function
x = x.view(x.size(0), -1)
# after running through features, does sequential steps to finally classify
x = self.classifier(x)
# print(x)
return x
def make_layers(cfg, batch_norm=False):
# print("Making layers!")
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
layers+=[RLSTM()]
return nn.Sequential(*layers)
class RLSTM(nn.Module):
def __init__(self):
super(RLSTM,self).__init__()
def forward(self, image):
print("going in rowlstm")
global current
global _layer
global isgates
size = image.size()
b = size[0]
indvs = list(image.split(1,0)) # split up the batch into individual images
#print(indvs[0].size())
tensor_array = []
for i in range(b):
current = 0
_layer = []
isgates = []
tensor_array.append(self.RowLSTM(indvs[i]))
seq=tuple(tensor_array)
trans = torch.cat(seq,0)
return trans.cuda() # trying to make floattensor error go away
def RowLSTM(self, image):
global current
global _layer
global isgates
# input-to-state (K_is * x_i) : 3x1 convolution. generate 4h x n x n tensor. 4hxnxn tensor contains all i -> s info
# the input to state convolution should only be computed one time
if current==0:
n = image.size()[2]
ch=image.size()[1]
input_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1))
isgates = self.splitIS(input_to_state(image)) # convolve, then split into gates (4 per row)
cell=RowLSTMCell(0,torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1))
# now have dummy, learnable variables for first row
_layer.append(cell)
else:
Cell_prev = _layer[current-1] # access previous row
hidPrev = Cell_prev.getHiddenState()
ch = image.size()[1]
# print("about to apply conv1d")
state_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
# print("applied conv1d")
prevHid=Cell_prev.getHiddenState()
ssgates = self.splitSS(state_to_state(prevHid.unsqueeze(0))) #need to unsqueeze (Ex: currently 16x5, need to make 1x16x5)
gates = self.addGates(isgates,ssgates,current)
# split gates
ig, og, fg, gg = gates[0], gates[1], gates[2], gates[3] # into four, ADD SIGMOID!
cell = RowLSTMCell(Cell_prev,ig,og,fg,gg,0,0)
cell.compute()
_layer.append(cell)
# attempting to eliminate requirement of getting size
#print(current)
try:
current+=1
y=(isgates[0][0][1][current])
return self.RowLSTM(image)
except Exception as error:
concats=[]
for cell in _layer:
tensor=torch.unsqueeze(cell.h,0)
concats.append(tensor)
seq=tuple(concats)
tensor=torch.cat(seq,3)
return tensor
def splitIS(tensor): #always going to be splitting into 4 pieces, so no need to add extra parameters
inputStateGates={}
size=tensor.size() # 1 x 4h x n x n
out_ft=size[1] # get 4h for the nxnx4h tensor
num=size[2] # get n for the nxn image
hh=out_ft/4 # we want to split the tensor into 4, for the gates
tensor = torch.squeeze(tensor) # 4h x n x n
# First, split by row: Creates n tensors of 4h x n x 1
rows = list(tensor.split(1,2))
for i in range(num):
# Each row is a tensor of 4h x n x 1, split it into 4 of h x n x 1
row=rows[i]
inputStateGates[i]=list(row.split(hh,0))
return inputStateGates
def splitSS(tensor): # 1 x 4h x n x 1, create 4 of 1 x h x n x 1
size=tensor.size()
out_ft=size[1] # get 4h for the 1x4hxn tensor
num=size[2] # get n for the 1xhxn row
hh=out_ft/4 # we want to split the tensor into 4, for the gates
tensor = tensor.squeeze(0) # 4h x n x 1
splitted=list(tensor.split(hh,0))
return splitted
def addGates(i2s,s2s,key):
""" these dictionaries are of form {key : [[i], [o], [f], [g]]}
we want to add pairwise elemeents """
# i2s is of form key: [[i], [o], [f], [g]] where each gate is hxn
# s2s is of form [[h,n],[h,n],[h,n], [h,n]]
gateSum = []
for i in range(4): # always of length 4, representing the gates
gateSum.append(torch.sigmoid(i2s[key][i] + s2s[i]))
return gateSum
您必须在 函数 __init__
中定义子模块,以便它们可以注册为模块的参数。如果它们不是参数,当您为父级调用 .cuda()
时,.cuda()
将不会调用它们。
如果您确实需要动态 parameters/modules 声明,请查看 here. The key is apaszke's answer。
使用GPU时需要将数据集改为Cuda浮点型张量。
在放置简单的 if 条件之后,到目前为止我从未见过同样的错误。
希望我的示例代码对您有所帮助。
# Firstly, you need to network model to cuda.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = VGG()
net.to(device) # or net.cuda()
# Secondly, you also need to change your dataset to Cuda Float Tensor.
if "GPU" in device:
images = images.type(torch.cuda.FloatTensor)
else:
images = images.type(torch.FloatTensor)
所以我 运行 一些代码并在 Pytorch 中得到以下错误:"RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same"
据我了解,这意味着我的模型可能不会被推送到 GPU,而输入数据已经在使用 GPU。如果有帮助,我可以分享我的代码(我现在不这样做,因为它比一小段代码长)。
我知道我可以做类似
的事情 myModel=Model()
myModel.cuda()
但是,我正在制作一个 class 作为将添加到顺序包装器的自定义模块的一部分。所以,我真的不能用它来制作一个对象(我不擅长 OOP 术语,所以对于任何技术写作错误我深表歉意)。我想知道是否有办法解决这个问题,并使 class 始终使用 GPU,即使我从未明确定义对象?
如果这还不够清楚,我可以 post 我的代码,但如前所述,可能需要一些时间才能完成(不会太长,但也不是很方便)。
非常感谢任何帮助。
编辑:这是代码,我认为问题出在 RLSTM class,因为在我添加它之前没有错误。
class VGG(nn.Module):
'''
VGG model
'''
def __init__(self, features): # features represents the layers array
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512,512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 512),
nn.ReLU(True),
nn.Linear(512, 10),
)
# Initialize weights
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
m.bias.data.zero_()
def forward(self, x): # x is the image, we run x through the layers
print(x.size())
x = self.features(x) # runs through all features, where each feature is a function
x = x.view(x.size(0), -1)
# after running through features, does sequential steps to finally classify
x = self.classifier(x)
# print(x)
return x
def make_layers(cfg, batch_norm=False):
# print("Making layers!")
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
layers+=[RLSTM()]
return nn.Sequential(*layers)
class RLSTM(nn.Module):
def __init__(self):
super(RLSTM,self).__init__()
def forward(self, image):
print("going in rowlstm")
global current
global _layer
global isgates
size = image.size()
b = size[0]
indvs = list(image.split(1,0)) # split up the batch into individual images
#print(indvs[0].size())
tensor_array = []
for i in range(b):
current = 0
_layer = []
isgates = []
tensor_array.append(self.RowLSTM(indvs[i]))
seq=tuple(tensor_array)
trans = torch.cat(seq,0)
return trans.cuda() # trying to make floattensor error go away
def RowLSTM(self, image):
global current
global _layer
global isgates
# input-to-state (K_is * x_i) : 3x1 convolution. generate 4h x n x n tensor. 4hxnxn tensor contains all i -> s info
# the input to state convolution should only be computed one time
if current==0:
n = image.size()[2]
ch=image.size()[1]
input_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1))
isgates = self.splitIS(input_to_state(image)) # convolve, then split into gates (4 per row)
cell=RowLSTMCell(0,torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1))
# now have dummy, learnable variables for first row
_layer.append(cell)
else:
Cell_prev = _layer[current-1] # access previous row
hidPrev = Cell_prev.getHiddenState()
ch = image.size()[1]
# print("about to apply conv1d")
state_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
# print("applied conv1d")
prevHid=Cell_prev.getHiddenState()
ssgates = self.splitSS(state_to_state(prevHid.unsqueeze(0))) #need to unsqueeze (Ex: currently 16x5, need to make 1x16x5)
gates = self.addGates(isgates,ssgates,current)
# split gates
ig, og, fg, gg = gates[0], gates[1], gates[2], gates[3] # into four, ADD SIGMOID!
cell = RowLSTMCell(Cell_prev,ig,og,fg,gg,0,0)
cell.compute()
_layer.append(cell)
# attempting to eliminate requirement of getting size
#print(current)
try:
current+=1
y=(isgates[0][0][1][current])
return self.RowLSTM(image)
except Exception as error:
concats=[]
for cell in _layer:
tensor=torch.unsqueeze(cell.h,0)
concats.append(tensor)
seq=tuple(concats)
tensor=torch.cat(seq,3)
return tensor
def splitIS(tensor): #always going to be splitting into 4 pieces, so no need to add extra parameters
inputStateGates={}
size=tensor.size() # 1 x 4h x n x n
out_ft=size[1] # get 4h for the nxnx4h tensor
num=size[2] # get n for the nxn image
hh=out_ft/4 # we want to split the tensor into 4, for the gates
tensor = torch.squeeze(tensor) # 4h x n x n
# First, split by row: Creates n tensors of 4h x n x 1
rows = list(tensor.split(1,2))
for i in range(num):
# Each row is a tensor of 4h x n x 1, split it into 4 of h x n x 1
row=rows[i]
inputStateGates[i]=list(row.split(hh,0))
return inputStateGates
def splitSS(tensor): # 1 x 4h x n x 1, create 4 of 1 x h x n x 1
size=tensor.size()
out_ft=size[1] # get 4h for the 1x4hxn tensor
num=size[2] # get n for the 1xhxn row
hh=out_ft/4 # we want to split the tensor into 4, for the gates
tensor = tensor.squeeze(0) # 4h x n x 1
splitted=list(tensor.split(hh,0))
return splitted
def addGates(i2s,s2s,key):
""" these dictionaries are of form {key : [[i], [o], [f], [g]]}
we want to add pairwise elemeents """
# i2s is of form key: [[i], [o], [f], [g]] where each gate is hxn
# s2s is of form [[h,n],[h,n],[h,n], [h,n]]
gateSum = []
for i in range(4): # always of length 4, representing the gates
gateSum.append(torch.sigmoid(i2s[key][i] + s2s[i]))
return gateSum
您必须在 函数 __init__
中定义子模块,以便它们可以注册为模块的参数。如果它们不是参数,当您为父级调用 .cuda()
时,.cuda()
将不会调用它们。
如果您确实需要动态 parameters/modules 声明,请查看 here. The key is apaszke's answer。
使用GPU时需要将数据集改为Cuda浮点型张量。 在放置简单的 if 条件之后,到目前为止我从未见过同样的错误。 希望我的示例代码对您有所帮助。
# Firstly, you need to network model to cuda.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = VGG()
net.to(device) # or net.cuda()
# Secondly, you also need to change your dataset to Cuda Float Tensor.
if "GPU" in device:
images = images.type(torch.cuda.FloatTensor)
else:
images = images.type(torch.FloatTensor)