如何将 PyTorch 图转换为 ONNX,然后从 OpenCV 进行推理?
How to convert PyTorch graph to ONNX and then inference from OpenCV?
我正在尝试通过 torch.onnx.export
函数将 PyTorch 图转换为 ONNX,然后使用 OpenCV 函数 blobFromImage
、setInput
和 forward
进行推理转换后的图形。我认为我在正确的轨道上,但我一直 运行 犯错误,而且我能找到的关于如何做到这一点的有用示例很少。
我意识到一般的堆栈溢出策略是 post 只有代码的相关部分,但是我得到的错误似乎是细节中的魔鬼所以我怀疑我必须 post 一个完整的例子来明确错误的原因。
这是我的训练网(MNIST 的相当标准):
# MnistNet.py
# Net Layout:
# batchSize x 1 x 28 x 28
# conv1 Conv2d(1, 6, 5)
# batchSize x 6 x 24 x 24
# relu(x)
# max_pool2d(x, kernel_size=2)
# batchSize x 6 x 12 x 12
# conv2 Conv2d(6, 16, 5)
# batchSize x 16 x 8 x 8
# relu(x)
# max_pool2d(x, kernel_size=2)
# batchSize x 16 x 4 x 4
# view(-1, 16 * 4 * 4) Note: 16 * 4 * 4 = 256
# batchSize x 1 x 256
# fc1 Linear(256, 120)
# relu(x)
# batchSize x 1 x 120
# fc2 Linear(120, 84)
# relu(x)
# batchSize x 1 x 84
# fc3 Linear(84, 10)
# batchSize x 1 x 10
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
class MnistNet(nn.Module):
TRANSFORM = torchvision.transforms.Compose([
torchvision.transforms.Resize((28, 28)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.5], [0.5])
])
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(256, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# end function
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), kernel_size=2)
x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=2)
x = x.view(-1, 256)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# end function
# end class
这是我的训练脚本(同样,对于 MNIST 来说非常标准):
# 1_train.py
from MnistNet import MnistNet
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from termcolor import colored
BATCH_SIZE = 64
NUM_EPOCHS = 10
GRAPH_NAME = 'MNIST.pt'
def main():
trainDataset = torchvision.datasets.MNIST('built_in_mnist_download', train=True, transform=MnistNet.TRANSFORM, download=True)
trainDataLoader = DataLoader(trainDataset, batch_size=BATCH_SIZE, shuffle=True)
# declare net, loss function, and optimizer
mnistNet = MnistNet()
lossFunction = nn.CrossEntropyLoss()
optimizer = optim.Adam(mnistNet.parameters())
# get device (cuda or cpu)
if torch.cuda.is_available():
device = torch.device('cuda')
print(colored('using cuda', 'green'))
else:
device = torch.device('cpu')
print(colored('GPU does not seem to be available, using CPU', 'red'))
# end if
# set network to device
mnistNet.to(device)
# set network to train mode
mnistNet.train()
print('beginning training . . .')
# for each epoch . . .
for epoch in range(1, NUM_EPOCHS+1):
# variables to calculate loss and accuracy within the epoch
epochLosses = []
epochAccuracies = []
# for each batch . . .
for i, element in enumerate(trainDataLoader):
# break out the input images and labels, note these are Tensors
inputImages, labels = element
inputImages = inputImages.to(device)
labels = labels.to(device)
# clear gradients from the previous step
optimizer.zero_grad()
# get net output
outputs = mnistNet(inputImages)
# calculate loss
loss = lossFunction(outputs, labels)
# call backward() to compute gradients
loss.backward()
# update parameters using gradients
optimizer.step()
# append the current classification loss to the list of epoch losses
epochLosses.append(loss.item())
# calculate current classification accuracy
# get the highest scoring classification for each prediction
_, predictions = torch.max(outputs.data, 1)
# number of labels and predictions should always be the same, log an error if this is not the case
if labels.size(0) != predictions.size(0):
print(colored('ERROR: labels.size(0) != predictions.size(0)', 'red'))
# end if
# determine the number of correct predictions for the current batch
correctPredictions = 0
for j in range(len(labels)):
if predictions[j].item() == labels[j].item():
correctPredictions += 1
# end if
# end for
# append the current batch accuracy to the list of accuracies
epochAccuracies.append(correctPredictions / labels.size(0))
# end for
# calculate epoch loss and accuracy from the respective lists
epochLoss = sum(epochLosses) / len(epochLosses)
epochAccuracy = sum(epochAccuracies) / len(epochAccuracies)
print('epoch ' + str(epoch) + ', epochLoss = ' + '{:.4f}'.format(epochLoss) +
', epochAccuracy = ' + '{:.4f}'.format(epochAccuracy * 100) + '%')
# end for
print('finished training')
# save the model
torch.save(mnistNet.state_dict(), GRAPH_NAME)
print('saved graph as ' + str(GRAPH_NAME))
# end function
if __name__ == '__main__':
main()
这是我迄今为止在将保存的图形从 PyTorch 转换为 ONNX 的脚本方面的最佳尝试(我不确定这是否正确,我至少可以说它运行时没有错误):
# 3_convert_graph_to_onnx.py
from MnistNet import MnistNet
import torch
GRAPH_NAME = 'MNIST.pt'
ONNX_GRAPH_NAME = 'MNIST.onnx'
def main():
net = MnistNet()
net.load_state_dict(torch.load(GRAPH_NAME))
net.eval()
# make a dummy input with a batch size of 1, 1 channel, 28 x 28
dummyInput = torch.randn(10, 1, 28, 28)
torch.onnx.export(net, dummyInput, ONNX_GRAPH_NAME, verbose=True)
# end function
if __name__ == '__main__':
main()
这里是我尝试用OpenCV推断ONNX图(注意PyTorch是包含的,但只用于加载测试MNIST数据集,推断前图像被转换为OpenCV格式):
# 4_onnx_opencv_inf.py
from MnistNet import MnistNet
import torchvision
import cv2
import numpy as np
from termcolor import colored
ONNX_GRAPH_NAME = 'MNIST.onnx'
def main():
testDataset = torchvision.datasets.MNIST('built_in_mnist_download', train=False, transform=MnistNet.TRANSFORM, download=True)
labels = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
net = cv2.dnn.readNetFromONNX(ONNX_GRAPH_NAME)
# test on 3 images
for i in range(3):
# get PyTorch tensor image and ground truth index from dataset
ptImage, gndTrIdx = testDataset[i]
# convert to PIL image
pilImage = torchvision.transforms.ToPILImage()(ptImage)
# convert to OpenCV image, would convert RGB to BGR here if image was color
openCvImage = np.array(pilImage)
gndTr = labels[gndTrIdx]
# can show OpenCV image here if desired
# cv2.imshow('openCvImage', openCvImage)
# cv2.waitKey()
blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(64, 64))
net.setInput(blob)
preds = net.forward()
predIdx = np.array(preds)[0].argmax()
prediction = str(predIdx)
if prediction == gndTr:
print(colored('i = ' + str(i) + ', predIdx = ' + str(predIdx) + ', gndTrIdx = ' + str(gndTrIdx) + ', correct answer', 'green'))
else:
print(colored('i = ' + str(i) + ', predIdx = ' + str(predIdx) + ', gndTrIdx = ' + str(gndTrIdx) + ', incorrect answer', 'red'))
# end if
# end for
# end function
if __name__ == '__main__':
main()
目前这个最终脚本因以下错误而崩溃:
$ python3 4_onnx_opencv_inf.py
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3441) getLayerShapesRecursively OPENCV/DNN: [Reshape]:(18): getMemoryShapes() throws exception. inputs=1 outputs=1/1 blobs=0
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3447) getLayerShapesRecursively input[0] = [ 1 16 13 13 ]
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3451) getLayerShapesRecursively output[0] = [ 1 256 ]
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3457) getLayerShapesRecursively Exception message: OpenCV(4.4.0) /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/layers/reshape_layer.cpp:154: error: (-1:Backtrace) Can't infer a dim denoted by -1 in function 'computeShapeByReshapeMask'
Traceback (most recent call last):
File "4_onnx_opencv_inf.py", line 54, in <module>
main()
File "4_onnx_opencv_inf.py", line 38, in main
preds = net.forward()
cv2.error: OpenCV(4.4.0) /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/layers/reshape_layer.cpp:154: error: (-1:Backtrace) Can't infer a dim denoted by -1 in function 'computeShapeByReshapeMask'
根据这个错误,我不太确定下一步该怎么做,有人可以就此提出建议吗?我怀疑我至少大体上正确地执行了程序并遗漏了一些小细节。
我在 ONNX 推理脚本中使用了错误的大小。
在4_onnx_opencv_inf.py
变化中:
blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(64, 64))
到
blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(28, 28))
使其成为 运行(我使用的是 Ubuntu 20.04 和 PyTorch 1.7.0),但准确性更差。使用上面的常规 PyTorch 推理(第二个脚本),我获得了 98.5% 的准确率,使用 OpenCV ONNX 版本,我获得了 95% 的准确率。
我怀疑差异是由于 cv2.dnn.blobFromImage
中的参数未设置为正确处理规范化,但这是完全不同的 post。
我正在尝试通过 torch.onnx.export
函数将 PyTorch 图转换为 ONNX,然后使用 OpenCV 函数 blobFromImage
、setInput
和 forward
进行推理转换后的图形。我认为我在正确的轨道上,但我一直 运行 犯错误,而且我能找到的关于如何做到这一点的有用示例很少。
我意识到一般的堆栈溢出策略是 post 只有代码的相关部分,但是我得到的错误似乎是细节中的魔鬼所以我怀疑我必须 post 一个完整的例子来明确错误的原因。
这是我的训练网(MNIST 的相当标准):
# MnistNet.py
# Net Layout:
# batchSize x 1 x 28 x 28
# conv1 Conv2d(1, 6, 5)
# batchSize x 6 x 24 x 24
# relu(x)
# max_pool2d(x, kernel_size=2)
# batchSize x 6 x 12 x 12
# conv2 Conv2d(6, 16, 5)
# batchSize x 16 x 8 x 8
# relu(x)
# max_pool2d(x, kernel_size=2)
# batchSize x 16 x 4 x 4
# view(-1, 16 * 4 * 4) Note: 16 * 4 * 4 = 256
# batchSize x 1 x 256
# fc1 Linear(256, 120)
# relu(x)
# batchSize x 1 x 120
# fc2 Linear(120, 84)
# relu(x)
# batchSize x 1 x 84
# fc3 Linear(84, 10)
# batchSize x 1 x 10
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
class MnistNet(nn.Module):
TRANSFORM = torchvision.transforms.Compose([
torchvision.transforms.Resize((28, 28)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.5], [0.5])
])
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(256, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# end function
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), kernel_size=2)
x = F.max_pool2d(F.relu(self.conv2(x)), kernel_size=2)
x = x.view(-1, 256)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# end function
# end class
这是我的训练脚本(同样,对于 MNIST 来说非常标准):
# 1_train.py
from MnistNet import MnistNet
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from termcolor import colored
BATCH_SIZE = 64
NUM_EPOCHS = 10
GRAPH_NAME = 'MNIST.pt'
def main():
trainDataset = torchvision.datasets.MNIST('built_in_mnist_download', train=True, transform=MnistNet.TRANSFORM, download=True)
trainDataLoader = DataLoader(trainDataset, batch_size=BATCH_SIZE, shuffle=True)
# declare net, loss function, and optimizer
mnistNet = MnistNet()
lossFunction = nn.CrossEntropyLoss()
optimizer = optim.Adam(mnistNet.parameters())
# get device (cuda or cpu)
if torch.cuda.is_available():
device = torch.device('cuda')
print(colored('using cuda', 'green'))
else:
device = torch.device('cpu')
print(colored('GPU does not seem to be available, using CPU', 'red'))
# end if
# set network to device
mnistNet.to(device)
# set network to train mode
mnistNet.train()
print('beginning training . . .')
# for each epoch . . .
for epoch in range(1, NUM_EPOCHS+1):
# variables to calculate loss and accuracy within the epoch
epochLosses = []
epochAccuracies = []
# for each batch . . .
for i, element in enumerate(trainDataLoader):
# break out the input images and labels, note these are Tensors
inputImages, labels = element
inputImages = inputImages.to(device)
labels = labels.to(device)
# clear gradients from the previous step
optimizer.zero_grad()
# get net output
outputs = mnistNet(inputImages)
# calculate loss
loss = lossFunction(outputs, labels)
# call backward() to compute gradients
loss.backward()
# update parameters using gradients
optimizer.step()
# append the current classification loss to the list of epoch losses
epochLosses.append(loss.item())
# calculate current classification accuracy
# get the highest scoring classification for each prediction
_, predictions = torch.max(outputs.data, 1)
# number of labels and predictions should always be the same, log an error if this is not the case
if labels.size(0) != predictions.size(0):
print(colored('ERROR: labels.size(0) != predictions.size(0)', 'red'))
# end if
# determine the number of correct predictions for the current batch
correctPredictions = 0
for j in range(len(labels)):
if predictions[j].item() == labels[j].item():
correctPredictions += 1
# end if
# end for
# append the current batch accuracy to the list of accuracies
epochAccuracies.append(correctPredictions / labels.size(0))
# end for
# calculate epoch loss and accuracy from the respective lists
epochLoss = sum(epochLosses) / len(epochLosses)
epochAccuracy = sum(epochAccuracies) / len(epochAccuracies)
print('epoch ' + str(epoch) + ', epochLoss = ' + '{:.4f}'.format(epochLoss) +
', epochAccuracy = ' + '{:.4f}'.format(epochAccuracy * 100) + '%')
# end for
print('finished training')
# save the model
torch.save(mnistNet.state_dict(), GRAPH_NAME)
print('saved graph as ' + str(GRAPH_NAME))
# end function
if __name__ == '__main__':
main()
这是我迄今为止在将保存的图形从 PyTorch 转换为 ONNX 的脚本方面的最佳尝试(我不确定这是否正确,我至少可以说它运行时没有错误):
# 3_convert_graph_to_onnx.py
from MnistNet import MnistNet
import torch
GRAPH_NAME = 'MNIST.pt'
ONNX_GRAPH_NAME = 'MNIST.onnx'
def main():
net = MnistNet()
net.load_state_dict(torch.load(GRAPH_NAME))
net.eval()
# make a dummy input with a batch size of 1, 1 channel, 28 x 28
dummyInput = torch.randn(10, 1, 28, 28)
torch.onnx.export(net, dummyInput, ONNX_GRAPH_NAME, verbose=True)
# end function
if __name__ == '__main__':
main()
这里是我尝试用OpenCV推断ONNX图(注意PyTorch是包含的,但只用于加载测试MNIST数据集,推断前图像被转换为OpenCV格式):
# 4_onnx_opencv_inf.py
from MnistNet import MnistNet
import torchvision
import cv2
import numpy as np
from termcolor import colored
ONNX_GRAPH_NAME = 'MNIST.onnx'
def main():
testDataset = torchvision.datasets.MNIST('built_in_mnist_download', train=False, transform=MnistNet.TRANSFORM, download=True)
labels = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
net = cv2.dnn.readNetFromONNX(ONNX_GRAPH_NAME)
# test on 3 images
for i in range(3):
# get PyTorch tensor image and ground truth index from dataset
ptImage, gndTrIdx = testDataset[i]
# convert to PIL image
pilImage = torchvision.transforms.ToPILImage()(ptImage)
# convert to OpenCV image, would convert RGB to BGR here if image was color
openCvImage = np.array(pilImage)
gndTr = labels[gndTrIdx]
# can show OpenCV image here if desired
# cv2.imshow('openCvImage', openCvImage)
# cv2.waitKey()
blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(64, 64))
net.setInput(blob)
preds = net.forward()
predIdx = np.array(preds)[0].argmax()
prediction = str(predIdx)
if prediction == gndTr:
print(colored('i = ' + str(i) + ', predIdx = ' + str(predIdx) + ', gndTrIdx = ' + str(gndTrIdx) + ', correct answer', 'green'))
else:
print(colored('i = ' + str(i) + ', predIdx = ' + str(predIdx) + ', gndTrIdx = ' + str(gndTrIdx) + ', incorrect answer', 'red'))
# end if
# end for
# end function
if __name__ == '__main__':
main()
目前这个最终脚本因以下错误而崩溃:
$ python3 4_onnx_opencv_inf.py
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3441) getLayerShapesRecursively OPENCV/DNN: [Reshape]:(18): getMemoryShapes() throws exception. inputs=1 outputs=1/1 blobs=0
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3447) getLayerShapesRecursively input[0] = [ 1 16 13 13 ]
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3451) getLayerShapesRecursively output[0] = [ 1 256 ]
[ERROR:0] global /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/dnn.cpp (3457) getLayerShapesRecursively Exception message: OpenCV(4.4.0) /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/layers/reshape_layer.cpp:154: error: (-1:Backtrace) Can't infer a dim denoted by -1 in function 'computeShapeByReshapeMask'
Traceback (most recent call last):
File "4_onnx_opencv_inf.py", line 54, in <module>
main()
File "4_onnx_opencv_inf.py", line 38, in main
preds = net.forward()
cv2.error: OpenCV(4.4.0) /tmp/pip-req-build-99ib2vsi/opencv/modules/dnn/src/layers/reshape_layer.cpp:154: error: (-1:Backtrace) Can't infer a dim denoted by -1 in function 'computeShapeByReshapeMask'
根据这个错误,我不太确定下一步该怎么做,有人可以就此提出建议吗?我怀疑我至少大体上正确地执行了程序并遗漏了一些小细节。
我在 ONNX 推理脚本中使用了错误的大小。
在4_onnx_opencv_inf.py
变化中:
blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(64, 64))
到
blob = cv2.dnn.blobFromImage(image=openCvImage, scalefactor=1.0/255.0, size=(28, 28))
使其成为 运行(我使用的是 Ubuntu 20.04 和 PyTorch 1.7.0),但准确性更差。使用上面的常规 PyTorch 推理(第二个脚本),我获得了 98.5% 的准确率,使用 OpenCV ONNX 版本,我获得了 95% 的准确率。
我怀疑差异是由于 cv2.dnn.blobFromImage
中的参数未设置为正确处理规范化,但这是完全不同的 post。