在 ResNet50 中对 pytorch 中包含 10 类 的图像进行分类时遇到此错误。我的代码是:

Facing this error while classifying Images, containing 10 classes in pytorch, in ResNet50. My code is:

这是我正在实施的代码:我正在使用 CalTech256 数据集的一个子集对 10 种不同动物的图像进行分类。我们将介绍数据集准备、数据扩充以及构建分类器的步骤。

def train_and_validate(model, loss_criterion, optimizer, epochs=25):
    '''
    Function to train and validate
    Parameters
        :param model: Model to train and validate
        :param loss_criterion: Loss Criterion to minimize
        :param optimizer: Optimizer for computing gradients
        :param epochs: Number of epochs (default=25)

    Returns
        model: Trained Model with best validation accuracy
        history: (dict object): Having training loss, accuracy and validation loss, accuracy
    '''

    start = time.time()
    history = []
    best_acc = 0.0

    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))

        # Set to training mode
        model.train()

        # Loss and Accuracy within the epoch
        train_loss = 0.0
        train_acc = 0.0

        valid_loss = 0.0
        valid_acc = 0.0

        for i, (inputs, labels) in enumerate(train_data_loader):

            inputs = inputs.to(device)
            labels = labels.to(device)

            # Clean existing gradients
            optimizer.zero_grad()

            # Forward pass - compute outputs on input data using the model
            outputs = model(inputs)

            # Compute loss
            loss = loss_criterion(outputs, labels)

            # Backpropagate the gradients
            loss.backward()

            # Update the parameters
            optimizer.step()

            # Compute the total loss for the batch and add it to train_loss
            train_loss += loss.item() * inputs.size(0)

            # Compute the accuracy
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))

            # Convert correct_counts to float and then compute the mean
            acc = torch.mean(correct_counts.type(torch.FloatTensor))

            # Compute total accuracy in the whole batch and add to train_acc
            train_acc += acc.item() * inputs.size(0)

            #print("Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item()))


        # Validation - No gradient tracking needed
        with torch.no_grad():

            # Set to evaluation mode
            model.eval()

            # Validation loop
            for j, (inputs, labels) in enumerate(valid_data_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass - compute outputs on input data using the model
                outputs = model(inputs)

                # Compute loss
                loss = loss_criterion(outputs, labels)

                # Compute the total loss for the batch and add it to valid_loss
                valid_loss += loss.item() * inputs.size(0)

                # Calculate validation accuracy
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))

                # Convert correct_counts to float and then compute the mean
                acc = torch.mean(correct_counts.type(torch.FloatTensor))

                # Compute total accuracy in the whole batch and add to valid_acc
                valid_acc += acc.item() * inputs.size(0)

                #print("Validation Batch number: {:03d}, Validation: Loss: {:.4f}, Accuracy: {:.4f}".format(j, loss.item(), acc.item()))

        # Find average training loss and training accuracy
        avg_train_loss = train_loss/train_data_size 
        avg_train_acc = train_acc/train_data_size

        # Find average training loss and training accuracy
        avg_valid_loss = valid_loss/valid_data_size 
        avg_valid_acc = valid_acc/valid_data_size

        history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])

        epoch_end = time.time()

        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start))

        # Save if the model has best accuracy till now
        torch.save(model, dataset+'_model_'+str(epoch)+'.pt')

    return model, history

# Load pretrained ResNet50 Model
resnet50 = models.resnet50(pretrained=True)
#resnet50 = resnet50.to('cuda:0')


# Freeze model parameters
for param in resnet50.parameters():
    param.requires_grad = False
# Change the final layer of ResNet50 Model for Transfer Learning
fc_inputs = resnet50.fc.in_features

resnet50.fc = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, num_classes), # Since 10 possible outputs
    nn.LogSoftmax(dim=1) # For using NLLLoss()
)

# Convert model to be used on GPU
# resnet50 = resnet50.to('cuda:0')

# Change the final layer of ResNet50 Model for Transfer Learning
fc_inputs = resnet50.fc.in_features

resnet50.fc = nn.Sequential(
    nn.Linear(fc_inputs, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, num_classes), # Since 10 possible outputs
    nn.LogSoftmax(dienter code herem=1) # For using NLLLoss()
)

# Convert model to be used on GPU
# resnet50 = resnet50.to('cuda:0')`enter code here`

错误是这样的:


RuntimeError Traceback (most recent call last) in () 6 # Train the model for 25 epochs 7 num_epochs = 30 ----> 8 trained_model, history = train_and_validate(resnet50, loss_func, optimizer, num_epochs) 9 10 torch.save(history, dataset+'_history.pt')

in train_and_validate(model, loss_criterion, optimizer, epochs) 43 44 # Compute loss ---> 45 loss = loss_criterion(outputs, labels) 46 47 # Backpropagate the gradients

~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in call(self, *input, **kwargs) 539 result = self._slow_forward(*input, **kwargs) 540 else: --> 541 result = self.forward(*input, **kwargs) 542 for hook in self._forward_hooks.values(): 543 hook_result = hook(self, input, result)

~\Anaconda3\lib\site-packages\torch\nn\modules\loss.py in forward(self, input, target) 202 203 def forward(self, input, target): --> 204 return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction) 205 206

~\Anaconda3\lib\site-packages\torch\nn\functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction) 1836 .format(input.size(0), target.size(0))) 1837 if dim == 2: -> 1838 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index) 1839 elif dim == 4: 1840 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed. at C:\Users\builder\AppData\Local\Temp\pip-req-build-0i480kur\aten\src\THNN/generic/ClassNLLCriterion.c:97

当您的数据集中有不正确的标签,或者标签是 1 索引(而不是 0 索引)时,就会发生这种情况。从错误消息来看,cur_target 必须小于 类 的总数(10)。要验证问题,请检查数据集中的最大和最小标签。如果数据确实是 1 索引的,只需从所有注释中减去一个,你应该没问题。

注意,另一个可能的原因是数据中存在一些-1 标签。一些(尤其是较旧的)数据集使用 -1 作为 wrong/dubious 标签的指示。如果您发现此类标签,请将其丢弃。