如何在pytorch中使用GPU制作class

Question

所以我运行一些代码并在 Pytorch 中得到以下错误："RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same"

据我了解，这意味着我的模型可能不会被推送到 GPU，而输入数据已经在使用 GPU。如果有帮助，我可以分享我的代码（我现在不这样做，因为它比一小段代码长）。

我知道我可以做类似

的事情

    myModel=Model()
    myModel.cuda()

但是，我正在制作一个 class 作为将添加到顺序包装器的自定义模块的一部分。所以，我真的不能用它来制作一个对象（我不擅长 OOP 术语，所以对于任何技术写作错误我深表歉意）。我想知道是否有办法解决这个问题，并使 class 始终使用 GPU，即使我从未明确定义对象？

如果这还不够清楚，我可以 post 我的代码，但如前所述，可能需要一些时间才能完成（不会太长，但也不是很方便）。

非常感谢任何帮助。

编辑：这是代码，我认为问题出在 RLSTM class，因为在我添加它之前没有错误。

class VGG(nn.Module):
'''
VGG model 
'''
def __init__(self, features): # features represents the layers array
    super(VGG, self).__init__()
    self.features = features
    self.classifier = nn.Sequential(
        nn.Dropout(),
        nn.Linear(512,512),
        nn.ReLU(True),
        nn.Dropout(),
        nn.Linear(512, 512),
        nn.ReLU(True),
        nn.Linear(512, 10),
    )
     # Initialize weights
    for m in self.modules():
        if isinstance(m, nn.Conv2d):
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
            m.bias.data.zero_()


def forward(self, x): # x is the image, we run x through the layers
    print(x.size())
    x = self.features(x) # runs through all features, where each feature is a function
    x = x.view(x.size(0), -1) 
    # after running through features, does sequential steps to finally classify
    x = self.classifier(x)
    # print(x)
    return x


def make_layers(cfg, batch_norm=False):
   # print("Making layers!")
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
            layers+=[RLSTM()]

    return nn.Sequential(*layers)

class RLSTM(nn.Module):
def __init__(self):
    super(RLSTM,self).__init__()



def forward(self, image):
    print("going in rowlstm")
    global current
    global _layer
    global isgates
    size = image.size()
    b = size[0]
    indvs = list(image.split(1,0)) # split up the batch into individual images
    #print(indvs[0].size())
    tensor_array = []
    for i in range(b):
        current = 0
        _layer = []
        isgates = []
        tensor_array.append(self.RowLSTM(indvs[i]))

    seq=tuple(tensor_array)
    trans = torch.cat(seq,0)
    return trans.cuda() # trying to make floattensor error go away 
def RowLSTM(self, image): 
    global current
    global _layer
    global isgates


    # input-to-state (K_is * x_i) : 3x1 convolution. generate 4h x n x n tensor. 4hxnxn tensor contains all i -> s info

# the input to state convolution should only be computed one time 
    if current==0:
        n = image.size()[2]
        ch=image.size()[1]
        input_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1))
        isgates = self.splitIS(input_to_state(image)) # convolve, then split into gates (4 per row)
        cell=RowLSTMCell(0,torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1))
        # now have dummy, learnable variables for first row
        _layer.append(cell)

    else:   
        Cell_prev = _layer[current-1] # access previous row
        hidPrev = Cell_prev.getHiddenState() 
        ch = image.size()[1] 
    #   print("about to apply conv1d")
        state_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
    #   print("applied conv1d") 
        prevHid=Cell_prev.getHiddenState()
        ssgates = self.splitSS(state_to_state(prevHid.unsqueeze(0))) #need to unsqueeze (Ex: currently 16x5, need to make 1x16x5)
        gates = self.addGates(isgates,ssgates,current)
        # split gates
        ig, og, fg, gg = gates[0], gates[1], gates[2], gates[3] # into four, ADD SIGMOID!
        cell = RowLSTMCell(Cell_prev,ig,og,fg,gg,0,0)
        cell.compute()
        _layer.append(cell)
    # attempting to eliminate requirement of getting size

    #print(current)
    try:

        current+=1
        y=(isgates[0][0][1][current])
        return self.RowLSTM(image) 
    except Exception as error:
        concats=[]
        for cell in _layer:
            tensor=torch.unsqueeze(cell.h,0)

            concats.append(tensor)
        seq=tuple(concats)
        tensor=torch.cat(seq,3)
        return tensor

def splitIS(tensor): #always going to be splitting into 4 pieces, so no need to add extra parameters
    inputStateGates={}
    size=tensor.size() # 1 x 4h x n x n
    out_ft=size[1] # get 4h for the nxnx4h tensor
    num=size[2] # get n for the nxn image
    hh=out_ft/4 # we want to split the tensor into 4, for the gates
    tensor = torch.squeeze(tensor) # 4h x n x n

    # First, split by row: Creates n tensors of 4h x n x 1
    rows = list(tensor.split(1,2))

    for i in range(num):
        # Each row is a tensor of 4h x n x 1, split it into 4 of h x n x 1
        row=rows[i]
        inputStateGates[i]=list(row.split(hh,0))

    return inputStateGates 


def splitSS(tensor): # 1 x 4h x n x 1, create 4 of 1 x h x n x 1 
    size=tensor.size() 
    out_ft=size[1] # get 4h for the 1x4hxn tensor
    num=size[2] # get n for the 1xhxn row
    hh=out_ft/4 # we want to split the tensor into 4, for the gates
    tensor = tensor.squeeze(0) # 4h x n x 1
    splitted=list(tensor.split(hh,0))
    return splitted 


def addGates(i2s,s2s,key):
    """ these dictionaries are of form {key : [[i], [o], [f], [g]]}
        we want to add pairwise elemeents """

    # i2s is of form key: [[i], [o], [f], [g]] where each gate is hxn
    # s2s is of form [[h,n],[h,n],[h,n], [h,n]]
    gateSum = []
    for i in range(4): # always of length 4, representing the gates
        gateSum.append(torch.sigmoid(i2s[key][i] + s2s[i]))

    return gateSum

Answer 1

您必须在函数 __init__ 中定义子模块，以便它们可以注册为模块的参数。如果它们不是参数，当您为父级调用 .cuda() 时，.cuda() 将不会调用它们。

如果您确实需要动态 parameters/modules 声明，请查看 here. The key is apaszke's answer。

Answer 2

使用GPU时需要将数据集改为Cuda浮点型张量。在放置简单的 if 条件之后，到目前为止我从未见过同样的错误。希望我的示例代码对您有所帮助。

# Firstly, you need to network model to cuda.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = VGG()
net.to(device) # or net.cuda()

# Secondly, you also need to change your dataset to Cuda Float Tensor.
if "GPU" in device:
    images = images.type(torch.cuda.FloatTensor)
else:
    images = images.type(torch.FloatTensor)

如何在pytorch中使用GPU制作class

How to make a class in pytorch use GPU

python

gpu

pytorch

tensor