如何将 PyTorch 图转换为 ONNX,然后从 OpenCV 进行推理?

How to convert PyTorch graph to ONNX and then inference from OpenCV?

我正在尝试通过 torch.onnx.export 函数将 PyTorch 图转换为 ONNX,然后使用 OpenCV 函数 blobFromImagesetInputforward 进行推理转换后的图形。我认为我在正确的轨道上,但我一直 运行 犯错误,而且我能找到的关于如何做到这一点的有用示例很少。

我意识到一般的堆栈溢出策略是 post 只有代码的相关部分,但是我得到的错误似乎是细节中的魔鬼所以我怀疑我必须 post 一个完整的例子来明确错误的原因。

这是我的训练网(MNIST 的相当标准):

# MnistNet.py

# Net Layout:
# batchSize x 1 x 28 x 28
#     conv1 Conv2d(1, 6, 5)
# batchSize x 6 x 24 x 24
#     relu(x)
#     max_pool2d(x, kernel_size=2)
# batchSize x 6 x 12 x 12
#     conv2 Conv2d(6, 16, 5)
# batchSize x 16 x 8 x 8
#     relu(x)
#     max_pool2d(x, kernel_size=2)
# batchSize x 16 x 4 x 4
#     view(-1, 16 * 4 * 4)    Note: 16 * 4 * 4 = 256
# batchSize x 1 x 256
#     fc1 Linear(256, 120)
#     relu(x)
# batchSize x 1 x 120
#     fc2 Linear(120, 84)
#     relu(x)
# batchSize x 1 x 84
#     fc3 Linear(84, 10)
# batchSize x 1 x 10

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

class MnistNet(nn.Module):

    TRANSFORM = torchvision.transforms.Compose([
        torchvision.transforms.Resize((28, 28)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.5], [0.5])
    ])

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(256, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    # end function

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), kernel_size=2)
        x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=2)
        x = x.view(-1, 256)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    # end function

# end class

这是我的训练脚本(同样,对于 MNIST 来说非常标准):

# 1_train.py

from MnistNet import MnistNet

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision

from termcolor import colored

BATCH_SIZE = 64
NUM_EPOCHS = 10

GRAPH_NAME = 'MNIST.pt'

def main():
    trainDataset = torchvision.datasets.MNIST('built_in_mnist_download', train=True, transform=MnistNet.TRANSFORM, download=True)

    trainDataLoader = DataLoader(trainDataset, batch_size=BATCH_SIZE, shuffle=True)

    # declare net, loss function, and optimizer
    mnistNet = MnistNet()
    lossFunction = nn.CrossEntropyLoss()
    optimizer = optim.Adam(mnistNet.parameters())

    # get device (cuda or cpu)
    if torch.cuda.is_available():
        device = torch.device('cuda')
        print(colored('using cuda', 'green'))
    else:
        device = torch.device('cpu')
        print(colored('GPU does not seem to be available, using CPU', 'red'))
    # end if

    # set network to device
    mnistNet.to(device)

    # set network to train mode
    mnistNet.train()

    print('beginning training . . .')

    # for each epoch . . .
    for epoch in range(1, NUM_EPOCHS+1):

        # variables to calculate loss and accuracy within the epoch
        epochLosses = []
        epochAccuracies = []

        # for each batch . . .
        for i, element in enumerate(trainDataLoader):
            # break out the input images and labels, note these are Tensors
            inputImages, labels = element

            inputImages = inputImages.to(device)
            labels = labels.to(device)

            # clear gradients from the previous step
            optimizer.zero_grad()

            # get net output
            outputs = mnistNet(inputImages)
            # calculate loss
            loss = lossFunction(outputs, labels)
            # call backward() to compute gradients
            loss.backward()
            # update parameters using gradients
            optimizer.step()

            # append the current classification loss to the list of epoch losses
            epochLosses.append(loss.item())

            # calculate current classification accuracy

            # get the highest scoring classification for each prediction
            _, predictions = torch.max(outputs.data, 1)

            # number of labels and predictions should always be the same, log an error if this is not the case
            if labels.size(0) != predictions.size(0):
                print(colored('ERROR: labels.size(0) != predictions.size(0)', 'red'))
            # end if

            # determine the number of correct predictions for the current batch
            correctPredictions = 0
            for j in range(len(labels)):
                if predictions[j].item() == labels[j].item():
                    correctPredictions += 1
                # end if
            # end for

            # append the current batch accuracy to the list of accuracies
            epochAccuracies.append(correctPredictions / labels.size(0))
        # end for

        # calculate epoch loss and accuracy from the respective lists
        epochLoss = sum(epochLosses) / len(epochLosses)
        epochAccuracy = sum(epochAccuracies) / len(epochAccuracies)

        print('epoch ' + str(epoch) + ', epochLoss = ' + '{:.4f}'.format(epochLoss) +
              ', epochAccuracy = ' + '{:.4f}'.format(epochAccuracy * 100) + '%')
    # end for

    print('finished training')

    # save the model
    torch.save(mnistNet.state_dict(), GRAPH_NAME)

    print('saved graph as ' + str(GRAPH_NAME))

# end function

if __name__ == '__main__':
    main()

这是我迄今为止在将保存的图形从 PyTorch 转换为 ONNX 的脚本方面的最佳尝试(我不确定这是否正确,我至少可以说它运行时没有错误):

# 3_convert_graph_to_onnx.py

from MnistNet import MnistNet

import torch

GRAPH_NAME = 'MNIST.pt'
ONNX_GRAPH_NAME = 'MNIST.onnx'

def main():

    net = MnistNet()
    net.load_state_dict(torch.load(GRAPH_NAME))

    net.eval()

    # make a dummy input with a batch size of 1, 1 channel, 28 x 28
    dummyInput = torch.randn(10, 1, 28, 28)

    torch.onnx.export(net, dummyInput, ONNX_GRAPH_NAME, verbose=True)

# end function

if __name__ == '__main__':
    main()

这里是我尝试用OpenCV推断ONNX图(注意PyTorch是包含的,但只用于加载测试MNIST数据集,推断前图像被转换为​​OpenCV格式):

# 4_onnx_opencv_inf.py

from MnistNet import MnistNet

import torchvision

import cv2
import numpy as np
from termcolor import colored

ONNX_GRAPH_NAME = 'MNIST.onnx'

def main():
    testDataset = torchvision.datasets.MNIST('built_in_mnist_download', train=False, transform=MnistNet.TRANSFORM, download=True)

    labels = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]

    net = cv2.dnn.readNetFromONNX(ONNX_GRAPH_NAME)

    # test on 3 images
    for i in range(3):
        # get PyTorch tensor image and ground truth index from dataset
        ptImage, gndTrIdx = testDataset[i]
        # convert to PIL image
        pilImage = torchvision.transforms.ToPILImage()(ptImage)
        # convert to OpenCV image, would convert RGB to BGR here if image was color
        openCvImage = np.array(pilImage)

        gndTr = labels[gndTrIdx]

        # can show OpenCV image here if desired
        # cv2.imshow('openCvImage', openCvImage)
        # cv2.waitKey()

        blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(64, 64))

        net.setInput(blob)
        preds = net.forward()

        predIdx = np.array(preds)[0].argmax()

        prediction = str(predIdx)
        if prediction == gndTr:
            print(colored('i = ' + str(i) + ', predIdx = ' + str(predIdx) + ', gndTrIdx = ' + str(gndTrIdx) + ', correct answer', 'green'))
        else:
            print(colored('i = ' + str(i) + ', predIdx = ' + str(predIdx) + ', gndTrIdx = ' + str(gndTrIdx) + ', incorrect answer', 'red'))
        # end if

    # end for

# end function

if __name__ == '__main__':
    main()

目前这个最终脚本因以下错误而崩溃:

$ python3 4_onnx_opencv_inf.py 
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3441) getLayerShapesRecursively OPENCV/DNN: [Reshape]:(18): getMemoryShapes() throws exception. inputs=1 outputs=1/1 blobs=0
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3447) getLayerShapesRecursively     input[0] = [ 1 16 13 13 ]
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3451) getLayerShapesRecursively     output[0] = [ 1 256 ]
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3457) getLayerShapesRecursively Exception message: OpenCV(4.4.0) /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/layers/reshape_layer.cpp:154: error: (-1:Backtrace) Can't infer a dim denoted by -1 in function 'computeShapeByReshapeMask'

Traceback (most recent call last):
  File "4_onnx_opencv_inf.py", line 54, in <module>
    main()
  File "4_onnx_opencv_inf.py", line 38, in main
    preds = net.forward()
cv2.error: OpenCV(4.4.0) /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/layers/reshape_layer.cpp:154: error: (-1:Backtrace) Can't infer a dim denoted by -1 in function 'computeShapeByReshapeMask'

根据这个错误,我不太确定下一步该怎么做,有人可以就此提出建议吗?我怀疑我至少大体上正确地执行了程序并遗漏了一些小细节。

我在 ONNX 推理脚本中使用了错误的大小。

4_onnx_opencv_inf.py变化中:

blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(64, 64))

blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(28, 28))

使其成为 运行(我使用的是 Ubuntu 20.04 和 PyTorch 1.7.0),但准确性更差。使用上面的常规 PyTorch 推理(第二个脚本),我获得了 98.5% 的准确率,使用 OpenCV ONNX 版本,我获得了 95% 的准确率。

我怀疑差异是由于 cv2.dnn.blobFromImage 中的参数未设置为正确处理规范化,但这是完全不同的 post。